Created
May 1, 2024 17:27
-
-
Save michael-simons/277ceb4c5df241f532ebffa71a24b5b2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
///usr/bin/env jbang "$0" "$@" ; exit $? | |
//JAVA 17 | |
//DEPS info.picocli:picocli:4.7.5 | |
//DEPS info.picocli:picocli-codegen:4.7.5 | |
//DEPS org.neo4j:neo4j-cypher-dsl-parser:2023.9.7 | |
//DEPS com.opencsv:opencsv:5.9 | |
import java.io.InputStreamReader; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.Map; | |
import java.util.Scanner; | |
import java.util.Set; | |
import java.util.concurrent.Callable; | |
import java.util.function.Predicate; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import org.neo4j.cypherdsl.core.StatementCatalog; | |
import org.neo4j.cypherdsl.parser.CypherParser; | |
import com.opencsv.CSVReaderBuilder; | |
import picocli.CommandLine; | |
/** | |
* Run with | |
* <pre> | |
* {@code | |
* jbang compare_query_and_schema.java \ | |
* --queries ~/tmp/text2cypher_gpt4turbo.csv \ | |
* --schemas ~/tmp/text2cypher_schemas.csv | |
* } | |
* </pre> | |
*/ | |
@CommandLine.Command(mixinStandardHelpOptions = true) | |
public class compare_query_and_schema implements Callable<Integer> { | |
@CommandLine.Option(names = "--schemas", required = true) | |
private Path schemaPath; | |
@CommandLine.Option(names = "--queries", required = true) | |
private Path queries; | |
public static void main(String... args) { | |
int exitCode = new CommandLine(new compare_query_and_schema()).execute(args); | |
System.exit(exitCode); | |
} | |
record Schema(Map<String, Set<String>> nodeProperties, Map<String, Set<String>> relationshipProperties) { | |
} | |
static class SchemaParser { | |
private Map<String, Set<String>> target = null; | |
private String currentEntity = null; | |
private Set<String> currentProperties = null; | |
private void checkEntityState() { | |
if (currentEntity == null || currentProperties == null) { | |
return; | |
} | |
target.put(currentEntity, currentProperties); | |
currentProperties = null; | |
currentEntity = null; | |
} | |
Schema parse(String in) { | |
var nodeProperties = new HashMap<String, Set<String>>(); | |
var relationshipProperties = new HashMap<String, Set<String>>(); | |
var relPattern = Pattern.compile("\\(.*\\)<?-\\[:(.*)]->?\\(.*\\)"); | |
Matcher relMatcher; | |
try (var scanner = new Scanner(in)) { | |
while (scanner.hasNextLine()) { | |
var line = scanner.nextLine(); | |
if ("Node properties:".equals(line)) { | |
checkEntityState(); | |
target = nodeProperties; | |
} else if ("Relationship properties:".equals(line)) { | |
checkEntityState(); | |
target = relationshipProperties; | |
} else if ("The relationships:".equals(line)) { | |
checkEntityState(); | |
target = null; | |
} else if (line.startsWith("- **") && target != null) { | |
checkEntityState(); | |
currentEntity = line.substring(4, line.lastIndexOf("**")); | |
currentProperties = new HashSet<>(); | |
} else if (line.startsWith(" - `") && currentProperties != null) { | |
currentProperties.add(line.substring(line.indexOf("`"), line.indexOf(":")).replace("`", "")); | |
} else if ((relMatcher = relPattern.matcher(line)).matches()) { | |
if (!relationshipProperties.containsKey(relMatcher.group(1))) { | |
relationshipProperties.put(relMatcher.group(1), new HashSet<>()); | |
} | |
} | |
} | |
} | |
return new Schema(nodeProperties, relationshipProperties); | |
} | |
} | |
@Override | |
public Integer call() throws Exception { | |
var schemas = new HashMap<String, Schema>(); | |
var schemaParser = new SchemaParser(); | |
try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(schemaPath))).withSkipLines(1).build()) { | |
String[] nextRecord; | |
while ((nextRecord = csvReader.readNext()) != null) { | |
schemas.put(nextRecord[0], schemaParser.parse(nextRecord[1])); | |
} | |
} | |
int currentLine = 0; | |
try (var csvReader = new CSVReaderBuilder(new InputStreamReader(Files.newInputStream(queries))).withSkipLines(1).build()) { | |
String[] nextRecord; | |
while ((nextRecord = csvReader.readNext()) != null) { | |
++currentLine; | |
var database = nextRecord[3]; | |
var cypher = nextRecord[1]; | |
var schema = schemas.get(database); | |
try { | |
var catalog = CypherParser.parse(nextRecord[1]).getCatalog(); | |
var allLabels = catalog.getNodeLabels().stream().map(StatementCatalog.Token::value); | |
var allTypes = catalog.getRelationshipTypes().stream().map(StatementCatalog.Token::value); | |
var labelsNotInSchema = allLabels | |
.filter(Predicate.not(schema.nodeProperties::containsKey)) | |
.toList(); | |
var typesNotInSchema = allTypes | |
.filter(Predicate.not(schema.relationshipProperties::containsKey)) | |
.toList(); | |
var propertiesNotInSchema = new HashSet<String>(); | |
catalog.getProperties().forEach(property -> { | |
property.owningToken().forEach(token -> { | |
Set<String> properties = switch (token.type()) { | |
case NODE_LABEL -> schema.nodeProperties.getOrDefault(token.value(), Set.of()); | |
case RELATIONSHIP_TYPE -> | |
schema.relationshipProperties.getOrDefault(token.value(), Set.of()); | |
}; | |
if (!properties.contains(property.name())) { | |
propertiesNotInSchema.add(token.value() + "." + property.name()); | |
} | |
}); | |
}); | |
if (!labelsNotInSchema.isEmpty()) { | |
System.out.printf("Line %d, database %s, additional labels generated for '%s': %s%n", currentLine, database, nextRecord[0], labelsNotInSchema); | |
} | |
if (!typesNotInSchema.isEmpty()) { | |
System.out.printf("Line %d, database %s, additional types generated for '%s': %s%n", currentLine, database, nextRecord[0], typesNotInSchema); | |
} | |
if (!propertiesNotInSchema.isEmpty()) { | |
System.out.printf("Line %d, database %s, additional properties generated for '%s': %s%n", currentLine, database, nextRecord[0], propertiesNotInSchema); | |
} | |
} catch (Exception e) { | |
System.out.printf("Line %d, database %s, invalid Cypher generated for '%s': %s%n", currentLine, database, nextRecord[0], cypher); | |
} | |
} | |
} | |
return 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment