B+ tree Javascript implementation
When all is just brought down to a brute force dataprocessing some issues come to mind:
sometimes plain hash tables are just not fast enough;
sometimes physical data storage is not fast enough either – too many seeks etc;
Well, there are some structures which allow us to bypass or relieve some of the bottlenecks, like trees for instance.
Last week, while working on our dataset structure for cnxWhirlwind, I came to the idea of using a B+ tree implementation for indexing the storage structure.
While, it is probably one of the most efficient solutions for storing and retrieving chunks based on a key lookup from large amounts of data, it surprisingly was not the fastest one while implemented in Javascript AND used for a single node extraction search. Well, the range search is totally another beer
If you’re ok with the time penalty needed for building the B+ tree structure (ofc there’s a lot room for optimization), my bet is always on the B+ tree.
This is the current implementation and benchmark results :
Test box: Intel Core2Duo@2.67/4Gigs of RAM
Test browser: Google Chome v.3.0.195.21 and Mozilla Firefox v 3.5.3. (IE was excluded for obvious reasons )
Test with 500 000 rows of data (~5.5MB) against:
- plain hash ( Javascript object)
- array containing key/value object
- B+tree implementation
var LeafNode = new Class({ initialize: function(order){ this.order = order; this.isLeafNode = true; this.isInternalNode = false; this.parentNode = null; this.nextNode = null; this.prevNode = null; this.data = []; }, split: function(){ var tmp = new LeafNode(this.order); var m = Math.ceil(this.data.length / 2); var k = this.data[m-1].key; // Copy & shift data for(var i=0; i<m; i++){ tmp.data[i] = this.data.shift(); } tmp.parentNode = this.parentNode; tmp.nextNode = this; tmp.prevNode = this.prevNode; if(tmp.prevNode) tmp.prevNode.nextNode = tmp; this.prevNode = tmp; if(!this.parentNode){ var p = new InternalNode(this.order); this.parentNode = tmp.parentNode = p; } return this.parentNode.insert(k, tmp, this); }, insert: function(key, value){ var pos = 0; for(; pos<this.data.length; pos++){ if(this.data[pos].key == key) { this.data[pos].value = value; return null; } if(this.data[pos].key > key) break; } if(this.data[pos]) this.data.splice(pos, 0, {"key": key, "value": value}); else this.data.push({"key": key, "value": value}); // Split if(this.data.length > this.order) return this.split(); return null; } }); var InternalNode = new Class({ initialize: function(order){ this.order = order; this.isLeafNode = false; this.isInternalNode = true; this.parentNode = null; this.data = []; }, split: function(){ var m = null; if(this.order % 2){ var m = (this.data.length-1)/2 - 1; }else{ var m = (this.data.length-1)/2; } var tmp = new InternalNode(this.order); tmp.parentNode = this.parentNode; for(var i=0; i<m; i++){ tmp.data[i] = this.data.shift(); } for(var i=0; i<tmp.data.length; i+=2){ tmp.data[i].parentNode = tmp; } var key = this.data.shift(); if(!this.parentNode){ this.parentNode = tmp.parentNode = new InternalNode(this.order); } return this.parentNode.insert(key, tmp, this); }, insert: function(key, node1, node2){ if(this.data.length){ var pos = 1; for(; pos < this.data.length; pos+=2){ if(this.data[pos] > key) break; } if(this.data[pos]) { pos--; this.data.splice(pos, 0, key); this.data.splice(pos, 0, node1); }else{ this.data[pos-1] = node1; this.data.push(key); this.data.push(node2); } if(this.data.length > (this.order*2+1)){ return this.split(); } return null; }else{ this.data[0] = node1; this.data[1] = key; this.data[2] = node2; return this; } } }); var BPlusTree = new Class({ Implements: [Options], options: { order: 2 // Min 1 }, initialize: function(options){ this.setOptions(options); this.root = new LeafNode(this.options.order); }, set: function(key, value){ var node = this._search(key); var ret = node.insert(key, value); if(ret) this.root = ret; }, get: function(key){ var node = this._search(key); for(var i=0; i<node.data.length; i++){ if(node.data[i].key == key) return node.data[i].value; } return null; }, getNode: function(key){ return this._search(key); }, _search: function(key){ var current = this.root; var found = false; while(current.isInternalNode){ found = false; var len = current.data.length; for(var i=1; i<len; i+=2){ if( key <= current.data[i]) { current = current.data[i-1]; found = true; break; } } // Follow infinity pointer if(!found) current = current.data[len - 1]; } return current; }, // B+ tree dump routines walk: function(node, level, arr){ var current = node; if(!arr[level]) arr[level] = []; if(current.isLeafNode){ for(var i=0; i<current.data.length; i++){ arr[level].push("<"+current.data[i].key+">"); } arr[level].push(" -> "); }else{ for(var i=1; i<node.data.length; i+=2){ arr[level].push("<"+node.data[i]+">"); } arr[level].push(" -> "); for(var i=0; i<node.data.length; i+=2) { this.walk(node.data[i], level+1, arr); } } return arr; }, dump: function(){ var arr = []; this.walk(this.root, 0, arr); for(var i=0; i<arr.length; i++){ var s = ""; for(var j=0; j<arr[i].length; j++){ s += arr[i][j]; } console.log(s); } } });
Results:
Mozilla Firefox:
Populate B+-tree elapsed time: 41340
Populate object elapsed time: 750
Populate array elapsed time: 485
Search B+-tree elapsed time: 16
Search object elapsed time: 0
Search array elapsed time: 424539
Range search B+-tree elapsed time: 0
Range search object elapsed time: 122
Range search array elapsed time: 1420
Google Chrome:
Populate B+-tree elapsed time: 19585
Populate object elapsed time: 841
Populate array elapsed time: 510
Search B+-tree elapsed time: 7
Search object elapsed time: 0
Search array elapsed time: 9775
Range search B+-tree elapsed time: 0
Range search object elapsed time: 296
Range search array elapsed time: 586
Test routines:
var num = 500000; var key = "key"+250000; var key1 = "key"+200000; var key2 = "key"+300000; var searchCount = 1000; // Populate time var start = $time(); var tree = new BPlusTree({order: 100}); for(var i=0; i<num; i++){ tree.set("key"+i, i+" - value"); } var end = $time(); console.log("Populate B+-tree elapsed time: " + (end - start)); start = $time(); var obj = {}; for(var i=0; i<num; i++){ obj["key"+i] = i+" - value"; } end = $time(); console.log("Populate object elapsed time: " + (end - start)); start = $time(); var arr = []; for(var i=0; i<num; i++){ arr[i] = {"key": "key"+i, "value":(i+" - value")}; } end = $time(); console.log("Populate array elapsed time: " + (end - start)); // Search start = $time(); for(var i=0; i<searchCount;i++){ var x = tree.get(key); } end = $time(); console.log("Search B+-tree elapsed time: " + (end - start)); start = $time(); for(var i=0; i<searchCount;i++){ var x = obj[key]; } end = $time(); console.log("Search object elapsed time: " + (end - start)); start = $time(); for(var i=0; i<searchCount;i++){ for(var j=0; j<num; j++){ if(arr[j].key == key) { var x = arr[i].value; break; } } } end = $time(); console.log("Search array elapsed time: " + (end - start)); // Range search start = $time(); var node = tree.getNode(key1); var flag = true; while(flag){ for(var i=0; i<node.data.length;i++){ if(key1 <= node.data[i].key && node.data[i].key <= key2){ var x = node.data[i].value; }else{ flag = false; break; } } node = node.nextNode; } end = $time(); console.log("Range search B+-tree elapsed time: " + (end - start)); start = $time(); for(var p in obj){ if(key1 <= p && p <= key2){ var x = obj[p]; } } end = $time(); console.log("Range search object elapsed time: " + (end - start)); start = $time(); for(var i=0; i<arr.length; i++){ if(key1 <= arr[i].key && arr[i].key <= key2) { var x = arr[i].data; } } end = $time(); console.log("Range search array elapsed time: " + (end - start));
Resources:
- http://en.wikipedia.org/wiki/B%2B_tree
- http://www.mec.ac.in/resources/notes/notes/ds/bplus.htm
SVG/VML Javascript Chart part 1
Some time ago we’ve been working on a project, which required some reports and charts. We used an embedded version of Firefox 3 beta to run a web ajax application on the desktop. They have included some cool new features like native SVG support, which after some initial testing turned out to be working pretty well. Canvas was way more tested at that time, there were tons of demos on the web including some quite complex animations and even minigames. I’ve been wondering for a while which technology to use and chose SVG.
The main reasons for that were two:
1 It’s vector based, which means you don’t have to worry about things like resizing, opacity and gradient calculations – the browser will do all that for you.
2 It uses the DOM – if you need to get the whole thing interactive it should be quite easy to attach events to DOM elements and manipulate their attributes in a very familiar manner (haven’t tried to do that yet)
So we had a pseudo 3D SVG bar chart working well in Firefox 3, which was not even declared stable yet …
When it came to printing it though things got a bit more complex. I wanted to reuse as much of the code as I could. The natural way to do it was to move the whole SVG specific part in a separate renderer class and access different renderers through some graphics abstraction layer interface. The chart wouldn’t know if it renders as SVG or communicates with the XPCOM GDI printing component as long as they maintain the same interface and I could add other graphics implementations at a later time (hopefully without touching the chart code).
The time came when we needed to use the same component in a pure web application. Some browsers wouldn’t support SVG, others (like Opera back then) would support it very badly. Fighting the problem with minimal efforts, I decided to write a serverside renderer and generate images on the server which I could later fetch and display (I’ll post about that soon).
Few days ago around the whole blog mania, a colleague showed me something working with VML in MSIE. It was worth trying to write a VML renderer since it’s supported on MSIE 5 or later and I needed to post something anyway… The end result was not as good as I expected – MSIE seems to render VML way slower than other browsers do SVG, it’s usable though. Had to do some workarounds, but as a whole no major obstacles appeared.
Some of the tricky parts that worth mentioning:
1 When you do a transformation on a shape in SVG it will rotate the gradient or whatever you fill it with also. VML doesn’t do it exactly the same way so I needed to add a parameter for it.
2 While SVG rotation is based at (0, 0) VML rotates the shape around it’s center and some calculations are needed to adjust the coordinates.
I added some VML/SVG browser capabilities detection code, few buttons that change the chart parameters and packed everything together. You can download the whole thing from here.
Some parameter explanations:
graphics=new Cnx.Graphics(); renderer=graphics.createPreferredRendererInstance(document.getElementById("chartDemo"), 600, 400); if(renderer) { renderer.init(); chart=new Cnx.Chart({ values: [1, 3, 2, 5.5, 7, 4, 9, 10], // bar values titles: ["entry 1", "entry 2", "entry 3", "entry 4", "entry 5", "entry 6", "entry 7", "entry 8"], // bar titles style: "verticalBars", // the only type of chart we have so far width: 600, height: 400, spaceTop: 50, // padding spaceLeft: 80, spaceRight: 10, spaceBottom: 40, yCaption: "Rating", // the caption for the y axis xCaption: "Entry", // the caption for the x axis mainCaption: ["Entry ratings main caption", "Entry ratings main caption second line"], // caption displayed over the chart (can be more than 1 line) mainCaptionFont: {fontFamily: "Verdana", fontSize: 20}, axisVerticalMax: 11, // max value for the y axis axisVerticalMin: 0, // min value for the y axis axisVerticalValuesDisplay: [1, 4, 7, 10], // control values for the y axis - displays the values next to the axis and a dashed line valueFormat: null, // override display value function colors: [ // bar color gradients, if more values than colors round-robin {start: "#ff0000", end: "#6f0000", light: "#ffaaaa"}, {start: "#00ff00", end: "#006f00", light: "#aaffaa"}, {start: "#0000ff", end: "#00006f", light: "#aaaaff"}, {start: "#ffff00", end: "#6f6f00", light: "#ffffaa"}, {start: "#ff00ff", end: "#6f006f", light: "#ffaaff"}, {start: "#00ffff", end: "#006f6f", light: "#aaffff"}, {start: "#6f6f6f", end: "#ffffff", light: "#aaaaaa"} ], legendFlag: false, // display legend? legendHeight: 0, controlLineFlag: true, // display control line? controlLineValues: [3, 3, 5, 6, 7, 4, 9, 8], controlLineColor: "#ffffff", controlLineIncludeInLegend: true, controlLineLegendTitle: "Control line" }); chart.setRenderer(renderer); chart.render(); } else alert("Unsupported browser");
Things to do in future:
1 Make the chart animated and interactive (attach events to the bars).
2 Support other types of charts (piechart).
3 Canvas support would be nice to have, but not really a priority. The chances are good if you have canvas you have SVG also.
Any comments, suggestions and improvements are appreciated.
Credits:
While googling for VML related stuff I came across an interesting project, which aims at the same problems – http://prototype-graphic.xilinus.com/
It’s a framework based on prototype and adresses also cross browser graphics rendering. Doesn’t seem to be very advanced yet, supports canvas, SVG and VML for creating basic shapes. Digging into the source helped me solve some of the VML problems I had.
crescentfresh has a cool answer on how to detect SVG and VML support at http://stackoverflow.com/questions/654112/how-do-you-detect-support-for-vml-or-svg-in-a-browser