When all is just brought down to a brute force dataprocessing some issues come to mind:
sometimes plain hash tables are just not fast enough;
sometimes physical data storage is not fast enough either – too many seeks etc;

Well, there are some structures which allow us to bypass or relieve some of the bottlenecks, like trees for instance.
Last week, while working on our dataset structure for cnxWhirlwind, I came to the idea of using a B+ tree implementation for indexing the storage structure.
While, it is probably one of the most efficient solutions for storing and retrieving chunks based on a key lookup from large amounts of data, it surprisingly was not the fastest one while implemented in Javascript AND used for a single node extraction search. Well, the range search is totally another beer :)
If you’re ok with the time penalty needed for building the B+ tree structure (ofc there’s a lot room for optimization), my bet is always on the B+ tree.

This is the current implementation and benchmark results :

Test box: Intel Core2Duo@2.67/4Gigs of RAM
Test browser: Google Chome v.3.0.195.21 and Mozilla Firefox v 3.5.3. (IE was excluded for obvious reasons :) )
Test with 500 000 rows of data (~5.5MB) against:
- plain hash ( Javascript object)
- array containing key/value object
- B+tree implementation

var LeafNode = new Class({
 
	initialize: function(order){
		this.order = order;
		this.isLeafNode = true;
		this.isInternalNode = false;
 
		this.parentNode = null;
		this.nextNode = null;
		this.prevNode = null;
 
		this.data = [];
	},
 
	split: function(){
		var tmp = new LeafNode(this.order);
		var m = Math.ceil(this.data.length / 2);
		var k = this.data[m-1].key;
 
		// Copy & shift data
		for(var i=0; i<m; i++){
			tmp.data[i] = this.data.shift();
		}
		tmp.parentNode = this.parentNode;
		tmp.nextNode = this;
		tmp.prevNode = this.prevNode;
		if(tmp.prevNode) tmp.prevNode.nextNode = tmp;
		this.prevNode = tmp;
 
		if(!this.parentNode){
			var p = new InternalNode(this.order);
			this.parentNode = tmp.parentNode = p;
		}
 
		return this.parentNode.insert(k, tmp, this);
	},
 
	insert: function(key, value){
		var pos = 0;
		for(; pos<this.data.length; pos++){
			if(this.data[pos].key == key) {
				this.data[pos].value = value;
				return null;
			}
			if(this.data[pos].key > key) break;
		}
 
		if(this.data[pos]) this.data.splice(pos, 0, {"key": key, "value": value});
		else this.data.push({"key": key, "value": value});
 
		// Split
		if(this.data.length > this.order) return this.split();
		return null;
	}
 
});
 
var InternalNode = new Class({
 
	initialize: function(order){
		this.order = order;
		this.isLeafNode = false;
		this.isInternalNode = true;
 
		this.parentNode = null;
 
		this.data = [];
	},
 
	split: function(){
		var m = null;
		if(this.order % 2){
			var m = (this.data.length-1)/2 - 1;
		}else{
			var m = (this.data.length-1)/2;
		}
 
		var tmp = new InternalNode(this.order);
		tmp.parentNode = this.parentNode;
		for(var i=0; i<m; i++){
			tmp.data[i] = this.data.shift();
		}
		for(var i=0; i<tmp.data.length; i+=2){
			tmp.data[i].parentNode = tmp;
		}
		var key = this.data.shift();
 
		if(!this.parentNode){
			this.parentNode = tmp.parentNode = new InternalNode(this.order);
		}
 
		return this.parentNode.insert(key, tmp, this);
	},
 
	insert: function(key, node1, node2){
		if(this.data.length){
			var pos = 1;
			for(; pos < this.data.length; pos+=2){
				if(this.data[pos] > key) break;
			}
 
			if(this.data[pos]) {
				pos--;
				this.data.splice(pos, 0, key);
				this.data.splice(pos, 0, node1);
			}else{
				this.data[pos-1] = node1;
				this.data.push(key);
				this.data.push(node2);
			}
 
			if(this.data.length > (this.order*2+1)){
				return this.split();
			}
			return null;
		}else{
			this.data[0] = node1;
			this.data[1] = key;
			this.data[2] = node2;
			return this;
		}
	}
 
});
 
 
var BPlusTree = new Class({
 
	Implements: [Options],
 
	options: {
		order: 2 // Min 1
	},
 
	initialize: function(options){
		this.setOptions(options);
		this.root = new LeafNode(this.options.order);
	},
 
	set: function(key, value){
		var node = this._search(key);
		var ret = node.insert(key, value);
		if(ret) this.root = ret;
	},
 
	get: function(key){
		var node = this._search(key);
		for(var i=0; i<node.data.length; i++){
			if(node.data[i].key == key) return node.data[i].value;
		}
		return null;
	},
 
	getNode: function(key){
		return this._search(key);
	},
 
	_search: function(key){
		var current = this.root;
		var found = false;
 
		while(current.isInternalNode){
			found = false;
			var len = current.data.length;
			for(var i=1; i<len; i+=2){
				if( key <= current.data[i]) {
					current = current.data[i-1];
					found = true;
					break;
				}
			}
 
			// Follow infinity pointer
			if(!found) current = current.data[len - 1];
		}
 
		return current;
	},
 
	// B+ tree dump routines
	walk: function(node, level, arr){
		var current = node;
		if(!arr[level]) arr[level] = [];
 
		if(current.isLeafNode){
			for(var i=0; i<current.data.length; i++){
				arr[level].push("<"+current.data[i].key+">");
			}
			arr[level].push(" -> ");
		}else{
 
			for(var i=1; i<node.data.length; i+=2){
				arr[level].push("<"+node.data[i]+">");
			}
			arr[level].push(" -> ");
			for(var i=0; i<node.data.length; i+=2) {
				this.walk(node.data[i], level+1, arr);
			}
 
		}
		return arr;
	},
 
	dump: function(){
		var arr = [];
		this.walk(this.root, 0, arr);
		for(var i=0; i<arr.length; i++){
			var s = "";
			for(var j=0; j<arr[i].length; j++){
				s += arr[i][j];
			}
			console.log(s);
		}
	}
});

Results:

Mozilla Firefox:
Populate B+-tree elapsed time: 41340
Populate object elapsed time: 750
Populate array elapsed time: 485
Search B+-tree elapsed time: 16
Search object elapsed time: 0
Search array elapsed time: 424539
Range search B+-tree elapsed time: 0
Range search object elapsed time: 122
Range search array elapsed time: 1420

Google Chrome:
Populate B+-tree elapsed time: 19585
Populate object elapsed time: 841
Populate array elapsed time: 510
Search B+-tree elapsed time: 7
Search object elapsed time: 0
Search array elapsed time: 9775
Range search B+-tree elapsed time: 0
Range search object elapsed time: 296
Range search array elapsed time: 586

Test routines:

var num = 500000;
	var key = "key"+250000;
	var key1 = "key"+200000;
	var key2 = "key"+300000;
 
	var searchCount = 1000;
 
	// Populate time
	var start = $time();
	var tree = new BPlusTree({order: 100});
	for(var i=0; i&lt;num; i++){
		tree.set("key"+i, i+" - value");
	}
	var end = $time();
	console.log("Populate B+-tree elapsed time: " + (end - start));
 
	start = $time();
	var obj = {};
	for(var i=0; i&lt;num; i++){
		obj["key"+i] = i+" - value";
	}
	end = $time();
	console.log("Populate object elapsed time: " + (end - start));
 
	start = $time();
	var arr = [];
	for(var i=0; i&lt;num; i++){
		arr[i] = {"key": "key"+i, "value":(i+" - value")};
	}
	end = $time();
	console.log("Populate array elapsed time: " + (end - start));
 
	// Search
	start = $time();
	for(var i=0; i&lt;searchCount;i++){
		var x = tree.get(key);
	}
	end = $time();
	console.log("Search B+-tree elapsed time: " + (end - start));
 
	start = $time();
	for(var i=0; i&lt;searchCount;i++){
		var x = obj[key];
	}
	end = $time();
	console.log("Search object elapsed time: " + (end - start));
 
	start = $time();
	for(var i=0; i&lt;searchCount;i++){
		for(var j=0; j&lt;num; j++){
			if(arr[j].key == key) {
				var x = arr[i].value;
				break;
			}
		}
	}
 
	end = $time();
	console.log("Search array elapsed time: " + (end - start));	
 
	// Range search
	start = $time();
	var node = tree.getNode(key1);
	var flag = true;
	while(flag){
		for(var i=0; i&lt;node.data.length;i++){
			if(key1 &lt;= node.data[i].key &amp;&amp; node.data[i].key &lt;= key2){
				var x = node.data[i].value;
			}else{
				flag = false;
				break;
			}
		}
		node = node.nextNode;
	}
	end = $time();
	console.log("Range search B+-tree elapsed time: " + (end - start));
 
	start = $time();
	for(var p in obj){
		if(key1 &lt;= p &amp;&amp; p &lt;= key2){
			var x = obj[p];
		}
	}
	end = $time();
	console.log("Range search object elapsed time: " + (end - start));
 
	start = $time();
	for(var i=0; i&lt;arr.length; i++){
		if(key1 &lt;= arr[i].key &amp;&amp; arr[i].key &lt;= key2) {
			var x = arr[i].data;
		}
	}
	end = $time();
	console.log("Range search array elapsed time: " + (end - start));

Resources:
- http://en.wikipedia.org/wiki/B%2B_tree
- http://www.mec.ac.in/resources/notes/notes/ds/bplus.htm


by nucleo