Created
May 5, 2020 21:18
-
-
Save andrewmkhoury/ca7d53862c3af8214e42609454d19291 to your computer and use it in GitHub Desktop.
Apache Oak Groovy Script to find large binary properties in Oak JCR
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Adaptation of @stillalex's script from here https://gist.github.com/stillalex/06303f8cc1d3780d3eab4c72575883ae | |
//This version works with Oak 1.6 and later versions | |
import java.io.InputStream; | |
import java.util.concurrent.atomic.AtomicInteger | |
import org.apache.jackrabbit.oak.api.Type | |
import org.apache.jackrabbit.oak.spi.state.NodeState | |
import org.apache.jackrabbit.oak.spi.state.NodeStore | |
import org.apache.jackrabbit.oak.commons.PathUtils | |
import com.google.common.collect.Lists | |
import java.util.List | |
org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger("countNodes.groovy"); | |
def countNodes(NodeState n, deep = false, String path = "/", Integer flush = 1000, AtomicInteger count = new AtomicInteger(0), AtomicInteger binaries = new AtomicInteger(0), root = true) { | |
org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger("countNodes.groovy"); | |
if(root) { | |
log.info("Counting nodes in tree ${path}"); | |
} | |
cnt = count.incrementAndGet() | |
if (cnt % flush == 0) log.info(" " + cnt) | |
try { | |
for(prop in n.getProperties()) { | |
try { | |
if(prop.getType() == Type.BINARIES) { | |
for(b in prop.getValue(Type.BINARIES)) { | |
if(b.length() > 10000000) log.info("Binary ${path}/@${prop.getName()}: " + b.length()/1024/1024 + " MB"); | |
binaries.incrementAndGet(); | |
} | |
} else if(prop.getType() == Type.BINARY) { | |
def b = prop.getValue(Type.BINARY); | |
if(b.length() > 10000000) log.info("Binary ${path}/@${prop.getName()}: " + b.length()/1024/1024 + " MB"); | |
binaries.incrementAndGet(); | |
} else { | |
// Check regular properties for missing segments | |
if(prop.isArray()) { | |
for(sf in prop.getValue(prop.getType())) { | |
// do nothing - we just need to read all values | |
} | |
} else { | |
prop.getValue(prop.getType()); | |
} | |
} | |
} catch(e) { | |
log.error("warning unable to read node properties ${path} ${prop.name}: " + e.getMessage()) | |
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out) | |
} | |
} | |
try { | |
for(child in n.getChildNodeEntries()) { | |
try { | |
if(child.getName().equals(":index")) { | |
log.info("Skipping sync index at ${path}/${child.getName()}"); | |
continue; | |
} | |
countNodes(child.getNodeState(), deep, path + "/" + child.getName(), flush, count, binaries, false) | |
} catch(e) { | |
log.error("warning unable to read child node ${path} : " + e.getMessage()) | |
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out) | |
} | |
} | |
} catch(e) { | |
log.error("warning unable to read child entries ${path} : " + e.getMessage()) | |
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out) | |
} | |
} catch(e) { | |
log.error("warning unable to read node ${path} : " + e.getMessage()) | |
//org.codehaus.groovy.runtime.StackTraceUtils.printSanitizedStackTrace(e, out) | |
} | |
if(root) { | |
log.info("Total nodes in tree ${path}: ${cnt}"); | |
log.info("Total binaries in tree ${path}: ${binaries.get()}"); | |
} | |
return cnt | |
} | |
def countNodes(session, path, deep) { | |
NodeStore nstore = session.getRootNode().sessionDelegate.root.store | |
def rs = nstore.root | |
def rnb = rs.builder() | |
def nb = rnb; | |
List<String> elements = Lists.newArrayList(); | |
PathUtils.elements(path).each{String element -> | |
if (PathUtils.denotesParent(element)) { | |
if (!elements.isEmpty()) { | |
elements.remove(elements.size() - 1); | |
} | |
} else if (!PathUtils.denotesCurrent(element)) { | |
elements.add(element); | |
} | |
} | |
elements.each { | |
if(it.size() > 0) { | |
nb = nb.getChildNode(it) | |
} | |
} | |
countNodes(nb.getNodeState(), deep, path) | |
} | |
def countNodes(path) { | |
def repo = osgi.getService(org.apache.sling.jcr.api.SlingRepository) | |
def session = repo.loginAdministrative(null) | |
try { | |
countNodes(session, path, true) | |
} finally { | |
session.logout() | |
} | |
} | |
log.info("Running node counter"); | |
// Or only check the async oak indexes | |
t1 = Thread.start("countNodes /oak:index",{countNodes("/oak:index")}) | |
log.info("Done starting countNodes threads"); | |
t1.join(); | |
log.info("Done running countNodes"); | |
null |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment