Sunday, November 8, 2015

[SD] Bloom filter [DONE]

import java.util.BitSet;
public class  simplebloomfilter
 {
     private static final  int  default_size  = 2 << 24 ;
     private static final  int [] seeds =new  int []{5,7, 11 , 13 , 41 , 43 , 61};
     private  BitSet bits= new  BitSet(default_size);
     private  simplehash[]  func=new  simplehash[seeds.length];
     public static void  main(String[] args) {
        String value  = "simplebloomfilter@gmail.cn" ;
        simplebloomfilter
 filter=new  simplebloomfilter
();
        System.out.println(filter.contains(value));
        filter.add(value);
        System.out.println(filter.contains(value));
    }
     public  simplebloomfilter
() {
         for( int  i= 0 ; i< seeds.length; i ++ ) {
            func[i]=new  simplehash(default_size, seeds[i]);
        }
    }
     public void  add(String value) {
         for(simplehash f : func) {
            bits.set(f.hash(value),  true );
        }
    }
     public boolean  contains(String value) {
         if(value ==null ) {
             return false ;
        }
         boolean  ret  = true ;
         for(simplehash f : func) {
            ret=ret&& bits.get(f.hash(value));
        }
         return  ret;
    }
     public static class simplehash {
         private int  cap;
         private int  seed;
         public  simplehash( int cap, int seed) {
             this.cap= cap;
             this.seed =seed;
        }
         public int hash(String value) {
             int  result=0 ;
             int  len= value.length();
             for  (int i= 0 ; i< len; i ++ ) {
                result =seed* result + value.charAt(i);// Rolling Hash
                System.out.println(result);
            }
             return (cap - 1 ) & result;// 2 << 24 - 1
        }
    }
} 


// http://www.programgo.com/article/42342753493/#tfbml-data%7B%22iframe_height%22%3A162%2C%22location_url%22%3A%22http%3A%2F%2Fwww.programgo.com%2Farticle%2F42342753493%2F%22%7D
//-----------------------------------------------------------------------------  
// MurmurHash2, by Austin Appleby  
  
// Note - This code makes a few assumptions about how your machine behaves -  
  
// 1. We can read a 4-byte value from any address without crashing  
// 2. sizeof(int) == 4  
  
// And it has a few limitations -  
  
// 1. It will not work incrementally.  
// 2. It will not produce the same results on little-endian and big-endian  
//    machines.  
  
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )  
{  
    // 'm' and 'r' are mixing constants generated offline.  
    // They're not really 'magic', they just happen to work well.  
  
    const unsigned int m = 0x5bd1e995;  
    const int r = 24;  
  
    // Initialize the hash to a 'random' value  
  
    unsigned int h = seed ^ len;  
  
    // Mix 4 bytes at a time into the hash  
  
    const unsigned char * data = (const unsigned char *)key;  
  
    while(len >= 4)  
    {  
        unsigned int k = *(unsigned int *)data;  
  
        k *= m;  
        k ^= k >> r;  
        k *= m;  
  
        h *= m;  
        h ^= k;  
  
        data += 4;  
        len -= 4;  
    }  
  
    // Handle the last few bytes of the input array  
  
    switch(len)  
    {  
    case 3: h ^= data[2] << 16;  
    case 2: h ^= data[1] << 8;  
    case 1: h ^= data[0];  
            h *= m;  
    };  
  
    // Do a few final mixes of the hash to ensure the last few  
    // bytes are well-incorporated.  
  
    h ^= h >> 13;  
    h *= m;  
    h ^= h >> 15;  
  
    return h;  
}  


#include   
  
using namespace std;  
  
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed );  
  
int main() {  
    unsigned int result = MurmurHash2("abcd",4,1);  
    cout<

Monday, November 2, 2015

[Quick select algorithm]find the Kth element in a list in linear time

Source:http://cse-wiki.unl.edu/wiki/index.php/Sorting_Algorithms


Random select.jpg

Source: http://blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html
Arr = [5 1 4 3 2]
Pivot = [4]

Steps:

swap [5] and [2] as 5>=4 and 2<
[2 1 4 3 5]

swap [4] and [3] as 4>=4 and 3<4
[2 1 3 4 5]


Arr = [2 1 3 …]
Pivot = [1]

Steps:
swap [2] and [1] as 2>=2 and 1<2
[1 2 3 …]



Arr = […2 3…]
Pivot= [2]

// Quick Select O(n) instead of O(nlogn) for large dataset

// Source :http://blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html
public static int selectKth(int[] arr, int k) {
 if (arr == null || arr.length <= k)
  throw new Error();
 
 int from = 0, to = arr.length - 1;
 
 // if from == to we reached the kth element
 while (from < to) {
  int r = from, w = to;
  int mid = arr[(r + w) / 2];
 
  // stop if the reader and writer meets
  while (r < w) {
 
   if (arr[r] >= mid) { // put the large values at the end
    int tmp = arr[w];
    arr[w] = arr[r];
    arr[r] = tmp;
    w--;
   } else { // the value is smaller than the pivot, skip
    r++;
   }
  }
 
  // if we stepped up (r++) we need to step one down
  if (arr[r] > mid)
   r--;
 
  // the r pointer is on the end of the first k elements
  if (k <= r) {
   to = r;
  } else {
   from = r + 1;
  }
 }
 
 return arr[k];
}

Thursday, October 29, 2015

[SD]TinyURL


What is a tinyurl? we can represent a long url with a 6-letter url by using alphabets [0-9, a-z, A-Z].
e.g., http://goo.gl/0wV1lU


1. Convert a long url to shorten url

f(long url ) = shorten url
E.g., f(www.weihungliu.com/myminelink) = http://goo.gl/cb
Normally, we store long url in our database, and it will have an corresponded id, which is auto_incremented
Hence, we can translate our problem into convert id to a shortened url

2. Convert id to shorten url
Suppose id is 125 = 1*10^2 + 2*10^1+ 5*10^0, it is 10-base
since we have 62 alphabets = 10(0-9)+26(a-z)+26(A-Z) = 62, so we can convert 10-base to 62-base
then 125 = 2*62^2+1*62^0
Why we want to do this ? 10-base to 62 base?
Since in this case we can represent 125 = 2*62^2+1*62^0 as a simple "cb", where 2 denotes c and 1 denotes b. This is called Rolling hash.

[table
0 → a 
1 → b ... 
25 → z ... 
52 → 0 
...
61 → 9]
this is where "cb" of http://goo.gl/cb come from.


 public String shorturl(int id, int base, HashMap map) {
  StringBuilder ret = new StringBuilder();
  while (id > 0) {
    int digit = id % base;
    ret.append( map.get(digit) );
    id /= base;
  }
  while ( res.length() < 6 )  ret.append('0');
  return ret.reverse().toString(); 
}
3. Convert shorten url to id

It's easy though.
http://goo.gl/cb
we convert c to 2 and b to 1 and use 62-base numeric then id become 2*62^1+1*62^0 = 125

4. Single machine VS Multiple machine 
The above approach is well suited for single machine, However, on Multiple machine we need to think the other way.

SOURCE FROM http://n00tc0d3r.blogspot.com/2013/09/big-data-tinyurl.html
On Multiple Machine
Suppose the service gets more and more traffic and thus we need to distributed data onto multiple servers.

We can use Distributed Database. But maintenance for such a db would be much more complicated (replicate data across servers, sync among servers to get a unique id, etc.).

Alternatively, we can use Distributed Key-Value Datastore.
Some distributed datastore (e.g. Amazon's Dynamo) uses Consistent Hashing to hash servers and inputs into integers and locate the corresponding server using the hash value of the input. We can apply base conversion algorithm on the hash value of the input.

The basic process can be:
Insert
  1. Hash an input long url into a single integer;
  2. Locate a server on the ring and store the key--longUrl on the server;
  3. Compute the shorten url using base conversion (from 10-base to 62-base) and return it to the user.
Retrieve
  1. Convert the shorten url back to the key using base conversion (from 62-base to 10-base);
  2. Locate the server containing that key and return the longUrl.

5. References
http://stackoverflow.com/questions/742013/how-to-code-a-url-shortener
http://n00tc0d3r.blogspot.com/2013/09/big-data-tinyurl.html

Saturday, October 24, 2015

[Multithreading] Classic Problems - Reader and Writer, Producer and Consumer, Dining Philosopher

Concurrent and Distributed Computing in Java

Cracking code philosopher


       public void pickUp(){
   
                  void lock.trylock();
      }
      public void putDown(){
                 void lock.unlock();
     }
            if ( !left.pickUp() )
                     return fasle;
               if ( !right.pickUp() ){
                     left.putDown();
                     return fasle;
               }
// release the left chopstick if we can't pick up  the right one
               return true;


Chopstick call lock.lock() when it is picked up
and lock.unlockQ when it is put down
public class Chopstick {
        private Lock lock;
       publci Chopstick {
            lock = new ReentrantLock();
       }
       public void pickUp(){
                  # Aprroach 1 : deadlock
                  void lock.lock();
                  # Aprroach 2 : GOOD
                  void lock.trylock();
      }
      public void putDown(){
                 void lock.unlock();
     }
}

To prevent deadlocks, we can implement a strategy where a philosopher will put down
his left Chopstick if he is unable to obtain the right one
public class Philosopher extends Thread{
      private int bites = 10;
      private Chopstick left1;
      private Chopstick right2;
      public Philosopher (Chopstick left1, Chopstick right2)
      {
               this.left1= left1; this.right2= right2;
      }

      public void run ()
      {
                 for (int i =0; i  < bites ; i++)
                         eat();
      }

      public void eat()
      {           
                 # Approach 1 :deadlock
                 pickUp();
                  chew();
                  putDown();
                  # Approach 2 :GOOD
                  if ( pickUp() )
                  {
                          chew();
                          putDown();
                    }
      }
      public boolean pickUp()
      {
                 # Approach 1 :deadlock
              left1.pickUp();
               right2.pickUp();
                  # Approach 2 :GOOD
               if ( !left.pickUp() )
                     return fasle;
               if ( !right.pickUp() ){
                     left.putDown();
                     return fasle;
               }
// release the left chopstick if we can't pick up  the right one
               return true;
      }
      public void chew(){}
       publci void putDown(){
              left.putDown();
              right.putDown();
      }
}

Reference: cracking the coding interview




                synchonized (  forks[fork1]  ) {
                                synchonized ( forks[fork2] ){}
                             if ( i%2 == 0)
                                  pholosophers[i] = new Philosopher(i, fork2, fork1);
                             else
                                  pholosophers[i] = new Philosopher(i, fork1, fork2);
public class DiningPhilosophers
{
       // create shared object
       private Object[] forks;
       private Philosopher[] philosophers;

      public DiningPhilosophers ( int num )
      {
                 forks = new Obejct[num];
                 philosophers = new Philosophers[num];
                 for ( int i = 0 ; i < num ; i ++) {
                        forks[i ] = new Object();;
                              # Approach 1: deadlock
                              philosophers[i ] = new Philosopher( i,  i, (i+1)% num, );
                              # Approach 2: livelock, not fair
                              int fork1= i;       
                              int frok2 = (i+1)%num;
                             if ( i == 0)
                                 pholosophers[i] = new Philosopher(i, fork2, fork1);
                             else
                                 pholosophers[i] = nwe Philosopher(i, fork1, fork2);
                             #Approach 3 : fair
                             int fork1 = i;
                             int fork2 =( i+1)% num;
                             if ( i%2 == 0)
                                  pholosophers[i] = new Philosopher(i, fork2, fork1);
                             else
                                  pholosophers[i] = new Philosopher(i, fork1, fork2);
                 }
      }

      public void startEating () throws InterrupedException
      {
                  for (int i = 0; i < philosophers.length;i++)
                  pholosophers[0].start();
                  philosophers[0].join(); // startEating execute after thread[0] finished the task
                  // Suspend the main thread until the first philosopher stops eating, which will never happen  -- this keep simulation running indefinitely
      }

      public static void main (String[] args)
      {
               try {
                          DiningPholosophers d = new DiningPholosophers(5);
                          d.startEating();
               } catch (  ItnerruptedExcepiton e) {
               }
     }
}
INSIDE philosophers
public class Philosopher extends Thread {
      
     private int id;
     private int fork1;
     private int fork2;
private Object[] forks;
     public pholosopher( int id, int fork1, int fork2, Object[] forks )
     {
          this.id = id; this.fork1 = fork1; this.fork2 = fork2; this. forks = forks;
     }
     public void run ()
     {
            System.out.println(“ Ready to eat using forks” + fork1 + “and”+fork2);
            while ( true )
            {
                        synchonized (  forks[fork1]  ) {
                                synchonized ( forks[fork2] ){}

                        }
            }
     }
    
    
}
Reference : programming interview exposed

producer and consumer

With introduction of BlockingQueue Data Structure in Java 5 Its now much simpler because BlockingQueue provides this control implicitly by introducing blocking methods put() and take()

BlockingQueue put() method will block if Queue is full in case of Bounded Queue and take() will block if Queue is empty

BlockingQuue is an interface and Java 5 provides different implantation like ArrayBlockingQueue and LinkedBlockingQueue , both implement FIFO order or elements, while ArrayLinkedQueue is bounded in nature LinkedBlockingQueue is optionally bounded.

import java.util.concurrent.BlockingQueue;
import.java.util.concurrent.LinkedBlockingQueue;
import.java.util.Level;
impot.java.util.Logger;

public class ProducerConsumerPattern
{
         public static void main (String[] args)
         {
                    // createing shared object
                    BlockingQueue sharedQueue = new LinkedBlockingQueue();

                   // creating producer and consumer Thread
                    Thread prodThread = new Thread ( new Producer(sharedQueue));
                    Thread consThread = new Thread(  new Consumer(sharedQueue));

                   // starting producer and consumer thread
                   prodThread().start();
                   consThread().start();
         }
}
class Producer implements Runnable
{
         private final BlockingQueue sharedQueue;
         public Producer(BlockingQueue sharedQueue)
              this.sharedQueue = sharedQueue;
    
        @Override
        public void run()
        {
                   for (int i = 0 ; i < 10 ; i++ )
                {
                         try{
                                  sharedQueue.put(i);
                         } catch ( ItnerrruptedException e ) {
                                   Logger.getLogger(  Producer.class.getName() ).log( Level.SERVERE, null, e );
                         }                
                }
        
        }
}
class Consumer implements Runnable
{
         private final BlockingQueue sharedQueue;
         public Consuemr(BlockingQueue sharedQueue)
                this.sharedQueue = sharedQueue;
     
         @Override
         public void run()
         {
                    while ( true )
                    {
                           try{
                                   sharedQueue.take();
                           } catch (InterruptedException e) {
                                     Logger.getLogger(  Consuemr.class.getname() ).log(  Level.SERVERE, null ,e  );
                           }
                    }      
         }
          
}

classical way is using wait and notify method to communicate between Producer and Consumer thread and blocking each of them on individual condition like full queue and empty queue

public class ProducerConsumerWaitAdnNotify{

         public static void main (String[] args)
         {
                    // creating shared object
                   IntBuffer b  = new IntBuffer();
                   
                   // creating Prodcuer and Consumer Thread
                  Producer p  = new Producer(b);
                  Consumer c = new Consumer(b);

                  // starting producer and consumer thread
                 p.start();
                 c.start();

         }
}
public class Producer extends Thread
{
          private IntBuffer buffer;
          public Producer ( IntBuffer buffer)
              this.buffer = buffer;

         public void run()
         {
                 Random r = new Random();
                 while( true )
                {
                        int num  = r.nextInt();
                        buffer.add( num );
                   
                 }
         }
}
publci class Consumer extends Thread
{
          private IntBuffer buffer;
          public Consumer (IntBuffer buffer)
             this.buffer = buffer;
          
         public void run()
         {
                    while( true )
                    {
                            int num = buffer.remove();
                    }
         }
}
pbulic class IntBuffer
{
        private int index;
        private int[] buffer = new int[8];
    
        // Called from Producer
        public synchronized void add()
        {
                 while( index == buffer.length -1 )
                 {
                             try
                             {
                                       wait();
                              }
                               catch (InterrupedException e)
                              {
                              }
                  }
                  buffer[index+1] = num;
                  notifyAll();
        }

        // Called from Consumer
        public synchronized int remove()
        {

                    while( index == 0 )
                    {
                              try
                              {
                                      wait();
                              }
                             catch( InterrrupedException e)
                              {
           
                              }
                    }
                    int ret = buffer[--index];
                    notifyAll();
                    return ret;
        }
}
Reference: programming interview exposed

readers and writers Problem

semaphore as counter which can be incremented or decremented
binary semaphore protect the access to a SINGLE shared resource, so the internal counter of the semaphore can only take the values 1 or 0
     semaphore = new Semaphore(1);

  1. First, you acquire the semaphore, with the acquire() method.
  2. Then, you do the necessary operations with the shared resource.
  3. Finally, release the semaphore with the release() method.
It decreases the value to 0 with the P(S) operation when the resource is in use, and then releases it with the V(S) operation by increasing it back to 1 when the resource is freed
# Approach 2 : using semaphore
class ReaderWrtier {
     // shared object/data
     int numReaders =0;
     BinarySemaphore mutex = new BinarySemaohre( true);
     BinarySemaphore wlock = new BinsraySemaphore(true);

    public void startRead (){
           mutex.P(); // 1→ 0
           numReaders++;
           if ( numReaders == 1 ) wlock.P(); // CANNOT WRITE, since someone id reading
           nutex.V(); // 0→ 1
    }
    public void endRead(){
          mutex.P();
          numReaders--;
          if ( numReaders == 0 ) wlock.V(); // CAN WRITE
          mutex.V();
    }
    public void startWrite(){
          wlock.P();
    }
    public void endWrite(){
          wlock.V();
    }
}

public class BinarySemaphore {
   boolean value;
   BinarySemaphore(boolean initValue) {
       value = initValue;
   }
   public synchronized void P() {
       while (value == false)
           Util.myWait(this);// in queue of blocked processes
       value = false;
   }
   public synchronized void V() {
       value = true;
       notify();
   }
}

One Wrtier but multiple Readers

# Approach 1 : using monitor

class ReaderWrtier{

       // shared object/ data
        private int numReaders = 0;
        private numWriters = 0;
       
        private synchonized void prepareToRead() // many readers can call this but  only one can read
        {
                  while ( numWrtiers > 0)  {
                        wait();
                  }
                  numberReaders++;
        }
        public synchronized void doneReading() //many readers can call this but  only one can end reading
        {
                  numberReaders--;
                  if ( numReaders == 0) {
                          notify();
              }
         }
         public void someReadMethod (){
                  // reads not synchonized, mutliple readers
                  prepareToRead();
                  // do the reading…
                 doneReading();
        }

       private void prepareToWrtie()
       {
              numberQriters++;
              while ( numReaders!= 0) {
                   wait();
              }
       }
       public void doneReading(){
             numWriters--;
             notify();
       }
       public synchronized void someWriteMethod()
       {
               // synchronized => only one writer
               prepateToWrite();
               // do the wirting
                doneWriting();
       }
}