Created
March 16, 2010 07:51
-
-
Save planbnet/333736 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Bookmarklet to calculate the most generic xpath for the current selection | |
// (helper utility for scraping websites) | |
function selectionxpath() { | |
function calculateShortestXpathOfElement( sel ) { | |
var node = sel; | |
var nextId = null; | |
var stop = null; | |
var xpath = ""; | |
//find next element with an id | |
while (true) { | |
if (node.id && node.id != "") { | |
nextId = node.id; | |
break; | |
} | |
node = node.parentNode; | |
if (node == stop) break; | |
} | |
if (nextId != null) { | |
xpath = "//" + node.tagName.toLowerCase() + "[@id='" + nextId + "']"; | |
if ( node == sel ) { | |
return xpath; | |
} else { | |
stop = node; | |
} | |
} | |
//find next element with unique tag+class | |
node = sel; | |
var nextUniqueClass = null; | |
while (true) { | |
if (node.nodeType === 1) { | |
var styleClass = node.getAttribute("class"); | |
if (styleClass != null) { | |
var tmpXpath = xpath+"//"+node.tagName+"[@class='"+styleClass+"']"; | |
var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); | |
if (tempResult.singleNodeValue == node) { | |
nextUniqueClass = styleClass; | |
break; | |
} | |
} | |
} | |
node = node.parentNode; | |
if (node == stop) break; | |
} | |
if (nextUniqueClass != null) { | |
xpath += "//"+node.tagName.toLowerCase()+"[@class='"+nextUniqueClass+"']"; | |
if ( node == sel ) { | |
return xpath; | |
} else { | |
stop = node; | |
} | |
} | |
//find next element with unique tag | |
node = sel; | |
var nextUniqueTag = null; | |
while (true) { | |
if (node.nodeType === 1) { | |
var tmpXpath = xpath+"//"+node.tagName; | |
var tempResult = document.evaluate(tmpXpath, sel.ownerDocument, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); | |
if (tempResult.singleNodeValue == node) { | |
nextUniqueTag = node.tagName; | |
break; | |
} | |
} | |
node = node.parentNode; | |
if (node == stop) break; | |
} | |
if (nextUniqueTag != null) { | |
xpath += "//"+node.tagName.toLowerCase(); | |
if ( node == sel ) { | |
return xpath; | |
} else { | |
stop = node; | |
} | |
} | |
//get absolute path for the rest | |
var restPath = ""; | |
for (node = sel; node && node.nodeType == 1; node = node.parentNode) { | |
if (node == stop) break; | |
var idx = 1; | |
for (var sib = node.previousSibling; sib ; sib = sib.previousSibling) { | |
if(sib.nodeType == 1 && sib.tagName == node.tagName) idx++; | |
} | |
var xname = node.tagName.toLowerCase(); | |
if (idx > 1) xname += "[" + idx + "]"; | |
restPath = "/" + xname + restPath; | |
} | |
var result = xpath + restPath; | |
return result; | |
} | |
function depthOf( el ) { | |
i = 0; | |
while (el) { | |
el = el.parentNode; | |
i++; | |
} | |
return i; | |
} | |
function calculateShortestXpathOfSelection() { | |
var sel = window.getSelection().getRangeAt(0); | |
if (!sel) return null; | |
var start = sel.startContainer; | |
var end = sel.endContainer; | |
var i = depthOf( start ); | |
var j = depthOf( end ); | |
while (start != end && i != 0 && j != 0) { | |
if (i > j) { | |
start = start.parentNode; | |
i--; | |
} else { | |
end = end.parentNode; | |
j--; | |
} | |
} | |
return calculateShortestXpathOfElement(start); | |
} | |
var xpath = calculateShortestXpathOfSelection(); | |
var node = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | |
var border = node.style.border; | |
if (!border) border = ""; | |
node.style.border = "2px dashed red"; | |
if (xpath) { | |
prompt("Most generic xpath for selection:", xpath); | |
node.style.border = border; | |
} else { | |
alert("Could not determine generic xpath for selection"); | |
} | |
} | |
selectionxpath(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><body><p><a href="javascript:function%20selectionxpath()%20{function%20calculateShortestXpathOfElement(%20sel%20)%20{var%20node%20=%20sel;var%20nextId%20=%20null;var%20stop%20=%20null;var%20xpath%20=%20%22%22;while%20(true)%20{if%20(node.id%20&&%20node.id%20!=%20%22%22)%20{nextId%20=%20node.id;break;}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextId%20!=%20null)%20{xpath%20=%20%22//%22%20+%20node.tagName.toLowerCase()%20+%20%22[@id=%27%22%20+%20nextId%20+%20%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueClass%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20styleClass%20=%20node.getAttribute(%22class%22);if%20(styleClass%20!=%20null)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName+%22[@class=%27%22+styleClass+%22%27]%22;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueClass%20=%20styleClass;break;}}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueClass%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase()+%22[@class=%27%22+nextUniqueClass+%22%27]%22;if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}node%20=%20sel;var%20nextUniqueTag%20=%20null;while%20(true)%20{if%20(node.nodeType%20===%201)%20{var%20tmpXpath%20=%20xpath+%22//%22+node.tagName;var%20tempResult%20=%20document.evaluate(tmpXpath,%20sel.ownerDocument,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null);if%20(tempResult.singleNodeValue%20==%20node)%20{nextUniqueTag%20=%20node.tagName;break;}}node%20=%20node.parentNode;if%20(node%20==%20stop)%20break;}if%20(nextUniqueTag%20!=%20null)%20{xpath%20+=%20%22//%22+node.tagName.toLowerCase();if%20(%20node%20==%20sel%20)%20{return%20xpath;}%20else%20{stop%20=%20node;}}var%20restPath%20=%20%22%22;for%20(node%20=%20sel;%20node%20&&%20node.nodeType%20==%201;%20node%20=%20node.parentNode)%20{if%20(node%20==%20stop)%20break;var%20idx%20=%201;for%20(var%20sib%20=%20node.previousSibling;%20sib%20;%20sib%20=%20sib.previousSibling)%20{if(sib.nodeType%20==%201%20&&%20sib.tagName%20==%20node.tagName)%20idx++;}var%20xname%20=%20node.tagName.toLowerCase();if%20(idx%20>%201)%20xname%20+=%20%22[%22%20+%20idx%20+%20%22]%22;restPath%20=%20%22/%22%20+%20xname%20+%20restPath;}var%20result%20=%20xpath%20+%20restPath;return%20result;}function%20depthOf(%20el%20)%20{i%20=%200;while%20(el)%20{el%20=%20el.parentNode;i++;}return%20i;}function%20calculateShortestXpathOfSelection()%20{var%20sel%20=%20window.getSelection().getRangeAt(0);if%20(!sel)%20return%20null;var%20start%20=%20sel.startContainer;var%20end%20=%20sel.endContainer;var%20i%20=%20depthOf(%20start%20);var%20j%20=%20depthOf(%20end%20);while%20(start%20!=%20end%20&&%20i%20!=%200%20&&%20j%20!=%200)%20{if%20(i%20>%20j)%20{start%20=%20start.parentNode;i--;}%20else%20{end%20=%20end.parentNode;j--;}}return%20calculateShortestXpathOfElement(start);}var%20xpath%20=%20calculateShortestXpathOfSelection();var%20node%20=%20document.evaluate(xpath,%20document,%20null,%20XPathResult.FIRST_ORDERED_NODE_TYPE,%20null).singleNodeValue;var%20border%20=%20node.style.border;if%20(!border)%20border%20=%20%22%22;node.style.border%20=%20%222px%20dashed%20red%22;if%20(xpath)%20{prompt(%22Most%20generic%20xpath%20for%20selection:%22,%20xpath);node.style.border%20=%20border;}%20else%20{alert(%22Could%20not%20determine%20generic%20xpath%20for%20selection%22);}}selectionxpath();">Selection XPath</a></p><p/><p>Drag the link to you bookmark bar</p></body></html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment