Skip to content

Instantly share code, notes, and snippets.

@achille
Created December 9, 2016 00:39
Show Gist options
  • Save achille/becaf55954ea51790787b5694384d84e to your computer and use it in GitHub Desktop.
Save achille/becaf55954ea51790787b5694384d84e to your computer and use it in GitHub Desktop.
// compactness() calculates how closely the resulting documents are located together
// It counts the size of the documents vs size of the unique pages they reside on
function compactness(collection, query, limit) {
Object.size = function(o) { var size = 0, key;
for (key in o) { if (o.hasOwnProperty(key)) size++; }
return size; };
count=0;
size=0;
blocksize=1024*4;
disklocs={} //will store each disk loc, format: file-loc%4kb, ie 1-4096, etc
db.getCollection(collection).find(query).limit(limit).showDiskLoc().forEach(
function(doc) {
count++;
docsize=Object.bsonsize(doc)-45 //$diskloc info adds 45 bytes
size+=docsize
file=doc.$diskLoc.file
offset=(doc.$diskLoc.offset)
offsetPage=offset-offset%4096
disklocs[file+"-"+offsetPage]=1;
if (docsize > blocksize) {
//if document size is bigger than 4kb, need to count the next few pages as well
numPagesPerDoc = Math.ceil(docsize/(blocksize))
for(i=0;i<numPagesPerDoc;i++){
disklocs[file+"-"+ (offsetPage + blocksize * i) ]=1;
}
}
}
)
numpages=Object.size(disklocs)
numbytespages=blocksize*numpages;
printjson(disklocs);
print("Number of documents returned : " + count)
print("Number of pages touched : " + numpages)
print("Size of returned data in bytes: " + size)
print("Size of pages touched by data : " + numbytespages)
print("Compactness: " + Math.floor(100*size/numbytespages) + "%");
}
/* Example usage:
> compactness("large",{},1);
Number of documents returned : 1
Number of pages touched : 2
Size of returned data in bytes: 5142
Size of pages touched by data : 8192
Compactness: 62% */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment