import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.HashMap; import java.util.TreeMap; import java.util.Map.Entry; import java.util.Collections; class QueueEntry { TreeNode node; int horizontalDistance; QueueEntry(TreeNode node, int horizontalDistance) { this.node = node; this.horizontalDistance = horizontalDistance; } } class TreeNode { TreeNode left; TreeNode right; int val; public TreeNode(int x) { this.val = x; } } public class PrintVerticalLevelNodesSum { /* Output 1. Expected Output - 4 2 1 5 6 3 8 7 2. HashMap Output - 1,5,6, 8,3, 7, 4, 2, 3. TreeMap (key:level=>in order) Output 4, 2, 1,5,6, 8,3, 7, 4. Collections.sort() Output 4, 2, 1,5,6, 3,8, 7, Output for sum 4, 2, 12, 11, 7 */ public static void main(String[] arg) { PrintVerticalLevelNodesSum tree = new PrintVerticalLevelNodesSum(); /* 1 2 3 4 5 6 7 8 */ TreeNode root = tree.createTree(); tree.printVerticalOrder(root); } public TreeNode createTree() { TreeNode root = new TreeNode(1); TreeNode n1 = new TreeNode(2); TreeNode n2 = new TreeNode(3); TreeNode n3 = new TreeNode(4); TreeNode n4 = new TreeNode(5); TreeNode n5 = new TreeNode(6); TreeNode n6 = new TreeNode(7); TreeNode n7 = new TreeNode(8); root.left = n1; root.right = n2; n1.left = n3; n1.right = n4; n2.left = n5; n2.right = n6; n4.right = n7; return root; } // Main Function public static void printVerticalOrder(TreeNode root){ if(root == null) return; //HashMap> map = new HashMap >(); TreeMap > map = new TreeMap >(); printVerticalOrderUtilRecursive(root, 0, map); //printVerticalOrderUtilInterative(root,0,map); TreeMap mapSum = new TreeMap (); printVerticalOrderSumUtilRecursive(root, 0, mapSum); // Print System.out.println("vertical nodes :" ); for(Integer level: map.keySet()){ ArrayList tmp = map.get(level); Collections.sort(tmp); for(Integer value: tmp) System.out.print(value + ","); System.out.println(); } System.out.println(); System.out.println("vertical sum :" ); for(Integer level: mapSum.keySet()){ int tmp = mapSum.get(level); System.out.print(tmp + ","); System.out.println(); } } //private static void printVerticalOrderUtil(TreeNode root, int level, // HashMap > map) { private static void printVerticalOrderUtilRecursive(TreeNode root, int level, TreeMap > map) { // Base Case, root == null if(root == null) return; // Base Case, root != null ArrayList list = (map.get(level) != null)? map.get(level): new ArrayList (); list.add(root.val); map.put(level, list); // Recursive printVerticalOrderUtilRecursive(root.left, level-1, map); printVerticalOrderUtilRecursive(root.right, level+1, map); } private static void printVerticalOrderUtilInterative(TreeNode root, int level, TreeMap > map) { if (root == null) return; ArrayList mapEntry; LinkedList queue = new LinkedList (); queue.add(new QueueEntry(root, level)); while (!queue.isEmpty()) { QueueEntry entry = queue.remove(); // Base Case, root != null mapEntry = (map.get(entry.horizontalDistance) != null)? map.get(entry.horizontalDistance):new ArrayList (); mapEntry.add(entry.node.val); map.put(entry.horizontalDistance, mapEntry); if (entry.node.left != null) { queue.add(new QueueEntry(entry.node.left, entry.horizontalDistance - 1)); } if (entry.node.right != null) { queue.add(new QueueEntry(entry.node.right, entry.horizontalDistance + 1)); } } } private static void printVerticalOrderSumUtilRecursive(TreeNode root, int level, TreeMap map) { // Base Case, root == null if(root == null) return; // Base Case, root != null int sum = (map.get(level) != null)? map.get(level)+root.val: root.val; map.put(level, sum); // Recursive printVerticalOrderSumUtilRecursive(root.left, level-1, map); printVerticalOrderSumUtilRecursive(root.right, level+1, map); } private static void printVerticalOrderSumUtilInterative(TreeNode root, int level,TreeMap map) { if (root == null) return; ArrayList mapEntry; LinkedList queue = new LinkedList (); queue.add(new QueueEntry(root, level)); while (!queue.isEmpty()) { QueueEntry entry = queue.remove(); // Base Case, root != null int sum = (map.get(entry.horizontalDistance) != null)? map.get(entry.horizontalDistance)+ root.val: root.val; map.put(entry.horizontalDistance, sum); if (entry.node.left != null) { queue.add(new QueueEntry(entry.node.left, entry.horizontalDistance - 1)); } if (entry.node.right != null) { queue.add(new QueueEntry(entry.node.right, entry.horizontalDistance + 1)); } } } } //list can not set -1, sum //sh-4.3$ java -Xmx128M -Xms16M PrintVerticalLevelNodesSum //Exception in thread "main" java.lang.IndexOutOfBoundsException: Index: 0, Size: 0 // at java.util.ArrayList.rangeCheck(ArrayList.java:635) // at java.util.ArrayList.get(ArrayList.java:411) // at PrintVerticalLevelNodesSum.printVerticalOrderSumUtilRecursive(PrintVerticalLevelNodesSum.java:185) // at PrintVerticalLevelNodesSum.printVerticalOrder(PrintVerticalLevelNodesSum.java:120) // at PrintVerticalLevelNodesSum.main(PrintVerticalLevelNodesSum.java:84)
Tuesday, November 24, 2015
Facebook: Print a Binary Tree Sum in Vertical Order
Friday, November 20, 2015
Binary Indexed Tree In 2D , 2D Binary Indexed Tree
vector<vector<int>> matrix;
int M = matrix.size(), N = matrix[0].size();
// update > > sum
// update O(1); sum O(n^2)
int M = matrix.size(), N = matrix[0].size();
// update > > sum
// update O(1); sum O(n^2)
[size=13.3333330154419px]set O(1)
[size=13.3333330154419px]sum O(n^2)
void update(int r, int c, int val) {
matrix[r][c] = val;
}
// (r1,c1) upper left, (r2,c2) bottom right
int sum(int r1, int c1, int r2, int c2) {
int res = 0;
for (int i = r1; i <= r2; ++i) {
for (int j = c1; j <= j2; ++j) {
res += matrix[i][j];
}
}
return res;
}
// sum > > update
// dp[i][j] computer sum of rectangle [0,0] --- [0,j] --- [i,0] --- [i,j]
// update O(n^2), sum O(1).
void update(int r, int c, int val) {
matrix[r][c] = val;
}
// (r1,c1) upper left, (r2,c2) bottom right
int sum(int r1, int c1, int r2, int c2) {
int res = 0;
for (int i = r1; i <= r2; ++i) {
for (int j = c1; j <= j2; ++j) {
res += matrix[i][j];
}
}
return res;
}
// sum > > update
// dp[i][j] computer sum of rectangle [0,0] --- [0,j] --- [i,0] --- [i,j]
// update O(n^2), sum O(1).
[size=13.3333330154419px]set O(n^2)
[size=13.3333330154419px]sum O(1)
vector<vector<int>> dp(M,vector(N,0));
void compute() {
dp[0][0] = matrix[0][0];
for (int j = 1; j < N; ++j) {
dp[0][j] = dp[0][j-1] + matrix[0][j];
}
for (int i = 1; i < M; ++i) {
dp[i][0] = dp[i-1][0] + matrix[i][0];
}
for (int i = 1; i < M; ++i) {
for (int j = 1; j < N; ++j) {
dp[i][j] = dp[i-1][j] + dp[i][j-1] - dp[i-1][j-1] + matrix[i][j];
}
}
}
void update(int r, int c, int val) {
int dif = val - matrix[i][j];
for (int i = r; i < M; ++i) {
for (int j = c; j < N; ++j) {
dp[i][j] += dif;
}
}
}
int sum(int r1, int c1, int r2, int c2) {
if (r1 == 0 && c1 == 0) return dp[r2][c2];
else if (r1 == 0) return dp[r2][c2] - dp[r2][c1-1];
else if (c1 == 0) return dp[r2][c2] - dp[r1-1][c2];
else return dp[r2][c2] - dp[r1-1][c2] - dp[r2][c1-1] + dp[r1-1][c1-1];
}
// sum ~= update
// update
vector<vector<int>> dp(M,vector(N,0));
void compute() {
dp[0][0] = matrix[0][0];
for (int j = 1; j < N; ++j) {
dp[0][j] = dp[0][j-1] + matrix[0][j];
}
for (int i = 1; i < M; ++i) {
dp[i][0] = dp[i-1][0] + matrix[i][0];
}
for (int i = 1; i < M; ++i) {
for (int j = 1; j < N; ++j) {
dp[i][j] = dp[i-1][j] + dp[i][j-1] - dp[i-1][j-1] + matrix[i][j];
}
}
}
void update(int r, int c, int val) {
int dif = val - matrix[i][j];
for (int i = r; i < M; ++i) {
for (int j = c; j < N; ++j) {
dp[i][j] += dif;
}
}
}
int sum(int r1, int c1, int r2, int c2) {
if (r1 == 0 && c1 == 0) return dp[r2][c2];
else if (r1 == 0) return dp[r2][c2] - dp[r2][c1-1];
else if (c1 == 0) return dp[r2][c2] - dp[r1-1][c2];
else return dp[r2][c2] - dp[r1-1][c2] - dp[r2][c1-1] + dp[r1-1][c1-1];
}
// sum ~= update
// update
[size=13.3333330154419px]set O(n)
[size=13.3333330154419px]sum O(n)
vector<vector<int>> dp(M,vector(N,0));
// row dp; dp[i][j] = sum_i_{k = 0}^{k = j};
void compute() {
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
if (j == 0) dp[i][j] = matrix[i][j];
else dp[i][j] = dp[i][j-1] + matrix[i][j];
}
}
}
void update(int r, int c, int val) {
int dif = val - matrix[i][j];
for (int j = c; j < N; ++j) {
dp[r][j] += dif;
}
}
int sum(int r1, int c1, int r2, int c2) {
int res = 0;
for (int i = r1; i <= r2; ++i) {
res += c1 == 0 ? dp[i][c2] : dp[i][c2] - dp[i][c2-1];
}
return res;
}
vector<vector<int>> dp(M,vector(N,0));
// row dp; dp[i][j] = sum_i_{k = 0}^{k = j};
void compute() {
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
if (j == 0) dp[i][j] = matrix[i][j];
else dp[i][j] = dp[i][j-1] + matrix[i][j];
}
}
}
void update(int r, int c, int val) {
int dif = val - matrix[i][j];
for (int j = c; j < N; ++j) {
dp[r][j] += dif;
}
}
int sum(int r1, int c1, int r2, int c2) {
int res = 0;
for (int i = r1; i <= r2; ++i) {
res += c1 == 0 ? dp[i][c2] : dp[i][c2] - dp[i][c2-1];
}
return res;
}
[size=13.3333330154419px]set O(log n)
[size=13.3333330154419px]sum O(log n)
Solution:
Lets take an example:
Given a matrix M[R][C], initially set to 0.
We have 2 operations:
1. set r,c,Value element to value.
2.range sum r1,c1,r2,c2 ; return the sum of all the elements r,c such that r1<=r<=r2, c1<=c<=c2;
This can be done using Fenwick 2D Point updation Tree.
1. Use update(r,c,Value) for 1st operation;
void update(int x , int y , int val){
int y1;
while (x <= max_x){
y1 = y;
while (y1 <= max_y){
tree[x][y1] += val;
y1 += (y1 & -y1);
}
x += (x & -x);
}
}
2. read(r1,c1,r2,c2) = read(r2,c2) - read(r2,c1-1) - read(r1-1,c2) + read(r1-1,c1-1);
int read(int x,int y){ // return sum from 1,1 to x,y.
int sum= 0;
while( x){
int y1 = y;
while(y1){
sum += tree[x][y1];
y1 -= y1 & -y1;
}
x -= x & -x;
}
return sum;
}
import java.lang.*; public class BIT2DSumAndUpdate { // Given an NxN matrix of positive and negative integers, write code to find the // range sum and update matrix // Approach#1 sum: O(n^2) time, update:O(1) time. static int sum(int[][] matrix, int r1, int c1, int r2, int c2) { int sum = 0; for (int r = r1; r <= r2; ++r) { for (int c = c1; c <= c2; ++c) { sum += matrix[r][c]; } } return sum; } static void update (int[][] matrix, int r1, int c1, int val){ matrix[r1][c1] = val; } // Approach#2 sum: O(1) time, update:O(n^2) time. static int[][] processMatrix(int[][] m) { if (m == null) return null; int[][] sumMatrix = new int[m.length][m[0].length]; sumMatrix[0][0] = m[0][0]; for (int j = 1; j < m[0].length;j++){ sumMatrix[0][j]=sumMatrix[0][j-1]+m[0][j]; //System.out.println(0+","+j+" - "+sumMatrix[0][j]+","); } for (int i = 1; i < m.length;i++){ sumMatrix[i][0]=sumMatrix[i-1][0]+m[i][0]; //System.out.println(i+","+0+" - "+sumMatrix[i][0]+","); } for (int i =1; i< m.length;i++) { for (int j=1; j < m[0].length;j++){ sumMatrix[i][j] = sumMatrix[i-1][j]+sumMatrix[i][j-1]-sumMatrix[i-1][j-1]+m[i][j]; //System.out.println(i+","+j+" - "+sumMatrix[i][j]+","); } } return sumMatrix; } static int sum2(int[][] sumMatrix, int r1, int c1, int r2, int c2) { if (r1 == 0 && c1 == 0){ return sumMatrix[r2][c2]; } else if (r1 == 0){ return sumMatrix[r2][c2]-sumMatrix[r2][c2-1]; } else if (c1 == 0 ){ return sumMatrix[r2][c2]-sumMatrix[r1-1][c2]; } else { return sumMatrix[r2][c2]-sumMatrix[r2][c1-1]-sumMatrix[r1-1][c2]+sumMatrix[r1-1][c1-1]; } } static void update2(int[][] matrix, int[][] sumMatrix,int r1, int c1, int val){ int diff= val - matrix[r1][c1]; for (int i = r1; i < sumMatrix.length;i++){ for (int j = c1; j< sumMatrix[0].length; j++){ sumMatrix[i][j]+=diff; } } } // Approach#3 sum: O(log n) time, update:O(log n) time. static int[][] processMatrixTree(int[][] m) { if (m == null) return null; int[][] tree = new int[m.length+1][m[0].length+1]; for( int i = 1; i <= m.length;i++){ for(int j = 1; j <=m[0].length;j++){ //System.out.println(i+","+j+". sumFirst="+tree[i][j]); updateBuilde3(tree, i, j, m[i-1][j-1]); } } return tree; } static int sum3(int[][] tree, int r1, int c1, int r2, int c2) { if (r1 == 0 && c1 == 0){ return getSum(tree,r2,c2); } else if (r1 == 0){ return getSum(tree,r2,c2)-getSum(tree,r2,c2-1); } else if (c1 == 0 ){ return getSum(tree,r2,c2)-getSum(tree,r1-1, c2); } else { return getSum(tree,r2,c2)-getSum(tree,r2,c1-1)-getSum(tree,r1-1, c2)+getSum(tree,r1-1,c1-1); } } static int getSum(int[][] tree, int i, int j){ int sum = 0; //System.out.println(i+","+j+". sumAdd="+sum); i = i+1; j = j+1; int j1; while(i > 0) { j1 = j; while(j1 > 0){ sum += tree[i][j1]; ///System.out.println(i+","+j1+". sumAdd="+sum); j1 -= j1 & -j1; } i -= i & -i; } return sum; } static void updateBuilde3(int[][] tree, int i, int j, int val){ //int diff= val - matrix[r1][c1]; int j1; while(i < tree.length){ j1 = j; while(j1 < tree[0].length){ tree[i][j1] += val; //System.out.println(i+","+j1+". sumAfter="+tree[i][j1]); j1 += (j1 & -j1); } i += (i & -i); } } static void update3(int[][] matrix, int[][] tree, int i, int j, int val){ int diff= val - matrix[i][j]; int j1; while(i < tree.length){ j1 = j; while(j1 < tree[0].length){ tree[i][j1] += diff; //System.out.println(i+","+j1+". sumAfter="+tree[i][j1]); j1 += (j1 & -j1); } i += (i & -i); } } //-------------------------------------- public static void main(String[]args) { int[][] m = { {1,-2,3,1}, {1,5,-4,1}, {1,1,0,2}, {-1,1,1,-8}}; int[][] m2 = { {1,-2,3,1}, {1,5,-4,1}, {1,1,0,2}, {-1,1,1,-8}}; int[][] m3 = { {1,-2,3,1}, {1,5,-4,1}, {1,1,0,2}, {-1,1,1,-8}}; long start = System.nanoTime(); int sum1_1 = sum(m,0,0,2,3); long end = System.nanoTime(); update(m, 2,3,3);// 2->3 int sum1_2 = sum(m,0,0,2,3); System.out.println("Time:"+(long)(end-start)+",sum=" +sum1_1+",sum2=" +sum1_2); int[][] sumMatrix = processMatrix(m2); long start2 = System.nanoTime(); int sum2_1 = sum2(sumMatrix,0,0,2,3); long end2 = System.nanoTime(); update2(m2, sumMatrix, 2,3,3);// 2->3 int sum2_2 = sum2(sumMatrix,0,0,2,3); System.out.println("Time:"+(long)(end2-start2)+",sum=" +sum2_1+",sum2=" +sum2_2); int[][] tree = processMatrixTree(m3); long start3 = System.nanoTime(); int sum3_1 = sum3(tree,0,0,2,3); long end3 = System.nanoTime(); update3(m3, tree, 2,3,3);// 2->3 int sum3_2 = sum3(tree,0,0,2,3); System.out.println("Time:"+(long)(end3-start3)+",sum=" +sum3_1+",sum2=" +sum3_2); } }
Tuesday, November 17, 2015
Facebook: Print a Binary Tree in Vertical Order
import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.HashMap; import java.util.TreeMap; import java.util.Map.Entry; import java.util.Collections; class QueueEntry { TreeNode node; int horizontalDistance; QueueEntry(TreeNode node, int horizontalDistance) { this.node = node; this.horizontalDistance = horizontalDistance; } } class TreeNode { TreeNode left; TreeNode right; int val; public TreeNode(int x) { this.val = x; } } public class PrintVerticalLevelNodes { /* 1. Expected Output - 4 2 1 5 6 3 8 7 2. HashMap Output - 1,5,6, 8,3, 7, 4, 2, 3. TreeMap (key:level=>in order) Output 4, 2, 1,5,6, 8,3, 7, 4. Collections.sort() Output 4, 2, 1,5,6, 3,8, 7, */ public static void main(String[] arg) { PrintVerticalLevelNodes tree = new PrintVerticalLevelNodes(); /* 1 2 3 4 5 6 7 8 */ TreeNode root = tree.createTree(); tree.printVerticalOrder(root); } public TreeNode createTree() { TreeNode root = new TreeNode(1); TreeNode n1 = new TreeNode(2); TreeNode n2 = new TreeNode(3); TreeNode n3 = new TreeNode(4); TreeNode n4 = new TreeNode(5); TreeNode n5 = new TreeNode(6); TreeNode n6 = new TreeNode(7); TreeNode n7 = new TreeNode(8); root.left = n1; root.right = n2; n1.left = n3; n1.right = n4; n2.left = n5; n2.right = n6; n4.right = n7; return root; } // Main Function public static void printVerticalOrder(TreeNode root){ if(root == null) return; //HashMap> map = new HashMap >(); TreeMap > map = new TreeMap >(); printVerticalOrderUtilRecursive(root, 0, map); //printVerticalOrderUtilInterative(root,0,map); // Print for(Integer level: map.keySet()){ ArrayList tmp = map.get(level); Collections.sort(tmp); for(Integer value: tmp) System.out.print(value + ","); System.out.println(); } System.out.println(); } //private static void printVerticalOrderUtil(TreeNode root, int level, // HashMap > map) { private static void printVerticalOrderUtilRecursive(TreeNode root, int level, TreeMap > map) { // Base Case, root == null if(root == null) return; // Base Case, root != null ArrayList list = (map.get(level) != null)? map.get(level): new ArrayList (); list.add(root.val); map.put(level, list); // Recursive printVerticalOrderUtilRecursive(root.left, level-1, map); printVerticalOrderUtilRecursive(root.right, level+1, map); } private static void printVerticalOrderUtilInterative(TreeNode root, int level, TreeMap > map) { if (root == null) return; ArrayList mapEntry; LinkedList queue = new LinkedList (); queue.add(new QueueEntry(root, level)); while (!queue.isEmpty()) { QueueEntry entry = queue.remove(); // Base Case, root != null mapEntry = (map.get(entry.horizontalDistance) != null)? map.get(entry.horizontalDistance): new ArrayList (); mapEntry.add(entry.node.val); map.put(entry.horizontalDistance, mapEntry); if (entry.node.left != null) { queue.add(new QueueEntry(entry.node.left, entry.horizontalDistance - 1)); } if (entry.node.right != null) { queue.add(new QueueEntry(entry.node.right, entry.horizontalDistance + 1)); } } } }
if sum, no need for HashMap> ====> need int[] OR ArrayList => index is level and value is sum list.set(level, list.get(level)+root.val);
Sunday, November 8, 2015
[SD] External sort
1. Internal sort :
all sorting algorithm for memory is enough to process
2. External sort:
for memory is NOT enough to process
need to architect
https://karticks.wordpress.com/tag/map-reduce/
http://www.geeksforgeeks.org/sort-numbers-stored-on-different-machines/#tfbml-data%7B%22close_iframe%22%3Atrue%2C%22location_url%22%3A%22http%3A%2F%2Fwww.geeksforgeeks.org%2Fsort-numbers-stored-on-different-machines%2F%22%7D
1. Store the head pointers of the linked lists in a minHeap of size N where N is the number of machines.
2. Extract the minimum item from the minHeap. Update the minHeap by replacing the head of the minHeap with the next number from the linked list or by replacing the head of the minHeap with the last number of the minHeap followed by decreasing the size of heap by 1.
3. Repeat the above step2 until heap is not empty.
Data:short.MaxValue。
Memory size:1200 number( array of size)。
在这种场景下,我们决定每个文件放1000条,也就有33个小文件,也就有33个内存队列,每个队列取Top100条,Batch=500时刷新
硬盘,中转站存放33*2个数字(因为入中转站时打上了队列标记),最后内存活动最大总数为:sum=33(priority queue)*100(100 number / priority queue)+500(intermediate priority queue)+66(2 integer for each small file)=896<1200。
3. N way merge sort
1. divide and conquer
2. merge sort LogN layer and every two way merge take N so run time would be NLogN
3. Two way merge to N way merge
http://www.cnblogs.com/huangxincheng/archive/2012/12/19/2824943.html
all sorting algorithm for memory is enough to process
2. External sort:
for memory is NOT enough to process
need to architect
https://karticks.wordpress.com/tag/map-reduce/
http://www.geeksforgeeks.org/sort-numbers-stored-on-different-machines/#tfbml-data%7B%22close_iframe%22%3Atrue%2C%22location_url%22%3A%22http%3A%2F%2Fwww.geeksforgeeks.org%2Fsort-numbers-stored-on-different-machines%2F%22%7D
1. Store the head pointers of the linked lists in a minHeap of size N where N is the number of machines.
2. Extract the minimum item from the minHeap. Update the minHeap by replacing the head of the minHeap with the next number from the linked list or by replacing the head of the minHeap with the last number of the minHeap followed by decreasing the size of heap by 1.
3. Repeat the above step2 until heap is not empty.
Data:short.MaxValue。
Memory size:1200 number( array of size)。
在这种场景下,我们决定每个文件放1000条,也就有33个小文件,也就有33个内存队列,每个队列取Top100条,Batch=500时刷新
硬盘,中转站存放33*2个数字(因为入中转站时打上了队列标记),最后内存活动最大总数为:sum=33(priority queue)*100(100 number / priority queue)+500(intermediate priority queue)+66(2 integer for each small file)=896<1200。
3. N way merge sort
1. divide and conquer
2. merge sort LogN layer and every two way merge take N so run time would be NLogN
3. Two way merge to N way merge
http://www.cnblogs.com/huangxincheng/archive/2012/12/19/2824943.html
// demo.txt // input: max, create a file with number of max of lines of string public static void createData(int max){ var sw = new StreamWriter(Environment.CurrentDirectory + "//demo.txt"); for (int i = 0 ;i < max; i++){ Thread.sleep(2); var rand = new Random( (int)DateTime.Now.Ticks.Next(0, int.MaxValue >> 3) ; sw.WriteLine(rand); } sw.close(); }
// 1.txt, 2.txt......33.txt // Input : size (estimate a memory can handle size, so we chunk every small file with that size) // And the file is sorted, small.OrderBy(i=>i).Select(i=>i).ToList(); public static int split(int size){ int totalCount = 0; Listsmall = new List (); var sr = new StramReader(Environment.CurrentDirectory +"//demo.txt") ); var pageSize = size; int pageCount = 0; int pageIndex = 0; while (true) { var line = sr.readLine(); // Not yet done if (! string.IsNullOrEmpty(line)){ // Count to size for small totalCount++; small.Add(Convert.ToInt32(line)); if (totalCount % pageSize == 0){ pageIndex = totalCount/pageSize; small = small.OrderBy(i=>i).Select(i=>i).ToList(); File.WriteAllLines(Environment.CurrentDirectory+"//"+pageIndex+".txt", small.Select(i = > i.toString())); small.clear(); } } // Done else { pageCount = (int) Math.Ceiling((double)totalCount/pageSize); small = small.OrderBy(i=>i).Select(i=>i).ToList(); File.WriteAllLines(Environment.CurrentDirectory+"//"+pageCount+".txt", small.Select(i = > i.toString())); small.clear(); } } return pageCount; // how many small files }
// result.txt // add to Top N of small file to its corresponding priority queue and when empty(being processed to result), add another TopN into priority queue public static void AddQueue(int i, List> list, ref int[] skip, int top =100){ var result = File.ReadAllLines((Environment.CurrentDirectory+"//"+(i+1)+".txt")).Skip(skip[i]).Take(top).Select(j=> Convert.ToInt32(j)); // put into PQ foreach (var item in result){ list.get(i).Enqueue(null, item); } // next time skip number skip[i] += result.Count(); }
// Test // size = 1200 // pageSize = 1000 lines (1000lines per file) // pageCount = 33 (33 files) // 33 files => 33 Priority Queues // Build Priority Queue with Top100 from each file // DISK sum=33*100+500+66=896<1200 data-blogger-escaped-logn="" data-blogger-escaped-pre="" data-blogger-escaped-time:o=""> public void main (){ // 1. Data // Generate 2^15 data createData(short.MaxValue); // Number of lines in each small file var pageSize = 1000; // reset when achieve batchCount var batchCount = 500; // Number of small files needed var pageCount = split(pageSize); // 2. Chunk // memory limit 1500 lines List> list = new List >(); // Intermediate Converter PriorityQueue intermediateQueueControl = new PriorityQueue (); // Status of each priority queue boolean[] complete = new boolean[pageCount]; // All complete ? int allComplete = 0; // Define priority queues for (int i = 0 ; i < pageCount;i++){ list.add(new PriorityQueue ()); addQueue(i, list, ref skip); } // 3. Merge for (int i = 0; i < list.size();i++){ var temp = list.get(i).Dequeue(); intermediateQueueControl.Enqueue(i, temp.level); } List batch = new List (); int nextIndex = 0; while ( intermediateQueueControl.size() > 0 ) { // fetch data out var single = intermediateQueueControl.Dequeue(); // next fetched data nextIndex = signle.t.vlaue; var nextData = list.get(nextIndex).Dequeue(); // Empty, small file's priority queue empty if ( nextData == null ){ // Fetch data from file AddQueue(nextIndex, list, ref skip); // Fetch non data, meaning File empty if (list.get(nextIndex).size() == 0){ complete[nextIndex] = true; allComplete++; } else { nextData = list.get(i).Dequeue(); } } // Not empty, data go to intermediateQueueControl if ( nextData != null ){ intermediateQueueControl.Enqueue(nextIndex,nextData.level); } batch.add(single.level); if (batch.count == batchCount || allComplete == pagecount) { var sw = new StreamWriter(Environment.CurrentDirectory+"//result.txt", true); foreach(var item in batch){ sw.WriteLine(item); } sw.close(); batch.Clear(); } Console.WriteLine("Done"); Console.Read(); } // 4. clean }
class ListNode { int data; ListNode next; } class MinHeapNode { ListNode head; } class MinHeap { int count; int capacity; MinHeapNode[] array; } public MinHeap createMinHeap (int capacity){ MinHeap minHeap = new MinHeap; minHeap.capacity = capacity; minHeap.count = 0;// Initialize as ZERO minHeap.array = new MinHeapNode [minHeap.capacity]; return minHeap; } // Insert a new node at the beginning of the linked list public ListNode push(ListNode head, int new_data){ ListNode newNode = new ListNode(); newNode.data= new_data; newNode.next = head; return newNode; } public minHeapify(MinHeap minHeap, int idx){ int left, right, smallest; left = 2*idx+1; right = 2*idx+2; smallest = idx; if ( left < minHeap.count && minHeap.array[left].head.data < minHeap.array[smallest].head.data ){ smallest = left; } if ( right < minHeap.count && minHeap.array[right].head.data < minHeap.array[smallest].head.data ) { smallest = right; } if (smallest != idx){ MinHeapNode tmp = minHeap.array[smallest]; minHeap.array[smallest] = minHeap.array[idx]; minHeap.array[idx] = tmp; minHeapify(minHeap, smallest);//********** } } public boolean isEmpty(MinHeap minHeap){ return (minHeap.count == 0); } public void buildMinHeap(MinHeap minHEap){ int n = minHEap.count; for ( int i = (n-2)/2; i >=0; i-- ){ minHeapify(minHeap, i) } } public void populateMinHeap(MinHeap minHeap, ListNode[] array, int n){ for (int i = 0 ; i < n; i ++){ // count initialize as ZERO minHeap.array[minHeap.count++].head = array[i]; } buildMinHeap(minHeap) } public ListNode extractMin(MinHeap minHeap){ // validate the input if (isEmpty(minHeap)){ return null; } // relocate all nodes since idx 0 gone MinHeapNode tmp = minHeap.array[0]; if (tmp.head.next){ minHeap.array[0].head = tmp.head.next; // Empty, reduce the size } else { minHeap.array[0] = minHeap.array[minHEap.count-1]; minHEap.count--; } minHeapify(minHeap, 0); return tmp.head; } public void externalSort(LsitNode[] array, int N){ MinHeap minHeap = createMinHeap(N); populateMinHeap(minHeap, array, N); while ( !isEmpty(minHeap) ){ ListNode tmp = extractMin( minHEap ); System.out.println(tmp.data); } } public static void main(String[] args){ int N =3; // Number of machines ListNode[] array = new ListNode[3]; array[0]= null; push(array[0],50); push(array[0], 40); push(array[0], 30); array[1] = null; push(array[1], 45); push(array[1], 35); // insert at the beginning array[2] = null; push(array[0],100); push(array[0], 80); push(array[0], 70); push(array[0],60); push(array[0], 10); externalSort(array, N); return 0; } Output: 10 30 35 40 45 50 60 70 80 1004. Reference: http://www.geeksforgeeks.org/sort-numbers-stored-on-different-machines/#tfbml-data%7B%22close_iframe%22%3Atrue%2C%22location_url%22%3A%22http%3A%2F%2Fwww.geeksforgeeks.org%2Fsort-numbers-stored-on-different-machines%2F%22%7D http://www.cnblogs.com/huangxincheng/archive/2012/12/19/2824943.html
Subscribe to:
Posts (Atom)