Created
May 9, 2025 18:51
-
-
Save JJL772/2db98832f8e0dceaef71638c10302a9f to your computer and use it in GitHub Desktop.
Tiny exception analyzer thing using libclang
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from clang.cindex import Index, Cursor, CursorKind, TranslationUnitLoadError, TranslationUnit | |
import argparse | |
import json | |
import os | |
from dataclasses import dataclass | |
import clang.cindex | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-p', type=str, required=True, help='Path to the directory containing compile_commands.json') | |
parser.add_argument('-v', action='store_true', help='Verbose mode') | |
parser.add_argument('-o', type=str, default='report.json', help='Report location') | |
parser.add_argument('files', action='store', nargs='*', help='List of files to parse. if not provided, parse all in the clangdb') | |
verbose = False | |
def vprint(msg: str): | |
if verbose: | |
print(msg) | |
@dataclass | |
class ClassInfo: | |
name: str | |
bases: list[str] | |
class ExceptionAnalyzer: | |
def __init__(self, compiledb: str, ignores: list[str]): | |
self._index = Index.create() | |
self._sym_stack = [] | |
self._cur_usr = '' | |
self._report = { 'throwers': {}, 'typeinfo': {}} | |
self._ignored_methods = ignores | |
self._method_cursor: Cursor = None | |
with open(compiledb, 'r') as fp: | |
self._compiledb: dict = json.load(fp) | |
# Process all files | |
def process_files(self, filter: list[str]|None) -> bool: | |
for o in self._compiledb: | |
if filter is not None and o['file'] not in filter: | |
continue | |
if not self._process_file(o['file'], o['arguments'][1:], o['directory']): | |
return False | |
return True | |
# Process a single file with arguments in a directory | |
def _process_file(self, file: str, args: list[str], dir: str) -> bool: | |
print(f'Processing {file}') | |
# Strip irrelevant arguments, else clang silently fails on us | |
args = [x for x in args if x.startswith('-D') or x.startswith('-I') or x.startswith('-std')] | |
vprint(f' [{",".join(args)}]') | |
olddir = os.getcwd() | |
try: | |
vprint(f'Entering {dir}') | |
os.chdir(dir) | |
# Parse the translation unit | |
tu = self._index.parse(file, args) | |
except TranslationUnitLoadError as e: | |
return False | |
finally: | |
vprint(f'Leaving {os.getcwd()}') | |
os.chdir(olddir) | |
for d in tu.diagnostics: | |
print(d) | |
# Pass 0: Find all 'throw' and build a list of direct throwers | |
self._process_cursor(tu.cursor, 0) | |
# Pass 1: Find all method calls and build a list of indirect throwers (i.e. calling a throwing method w/o try/catch) | |
self._process_cursor(tu.cursor, 1) | |
return True | |
def _parse_class_decl(self, cursor: Cursor, skip_fwd: bool) -> ClassInfo | None: | |
tokens = [x for x in cursor.get_tokens()] | |
tokens.reverse() | |
if len(tokens) < 2: return None | |
# Expect 'class' | |
if tokens.pop().spelling != 'class': | |
return None | |
# Expect 'class_name' | |
cls = ClassInfo(tokens.pop().spelling, []) | |
# Optional ':' | |
t = tokens.pop() | |
if t.spelling != ':': | |
# Skip forward decls as they lack base classes and such (usually) | |
if skip_fwd and t.spelling != '{': | |
return None | |
return cls | |
# [public|private|protected] ClassName [,] | |
while len(tokens) > 0: | |
if tokens[-1].spelling in ['private', 'protected', 'public']: | |
tokens.pop() | |
if tokens[-1].spelling == 'virtual': | |
tokens.pop() | |
cls.bases.append(tokens.pop().spelling) | |
# [,] | |
t = tokens.pop() | |
if len(tokens) > 0 and t.spelling != ',': | |
# Skip forward decls if requested | |
if skip_fwd and t.spelling != '{': | |
return None | |
break | |
return cls | |
# Generate a name for the current symbol | |
def _get_symbol_name(self) -> str: | |
return '::'.join(self._sym_stack) | |
# Generate a symbol name for the cursor | |
def _gen_symbol_name(self, cursor: Cursor) -> str: | |
result = '' | |
sp = cursor | |
while sp.kind in [CursorKind.CLASS_DECL, CursorKind.NAMESPACE] or sp == cursor: | |
if len(result) > 0: | |
result = f'{sp.spelling}::{result}' | |
else: | |
result = sp.spelling | |
sp = sp.semantic_parent | |
return result | |
# Get the exceptions for a symbol | |
def _get_exceptions_for_symbol(self, sym: str) -> list[str]: | |
o = self._report['throwers'].get(sym, None) | |
if o is not None: | |
return o['exceptions'] | |
return [] | |
# Process a single cursor position. pass 0 builds the initial list of throwers, pass 1 flattens the list | |
def _process_cursor(self, cursor: Cursor, pas: int): | |
has_pushed = False | |
match cursor.kind: | |
# Build some type information about the class | |
case CursorKind.CLASS_DECL: | |
c = self._parse_class_decl(cursor, True) | |
if c is not None: | |
name = f'{self._get_symbol_name()}::{c.name}' if len(self._sym_stack) > 0 else c.name | |
self._report['typeinfo'][name] = { | |
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}', | |
'bases': c.bases | |
} | |
# Handle nested classes | |
self._sym_stack.append(cursor.spelling) | |
has_pushed = True | |
# Recurse into methods we call | |
case CursorKind.CALL_EXPR: | |
# Skip this on pass 1 | |
if pas != 1: | |
return | |
usr = cursor.semantic_parent.get_usr() | |
print(f'{usr} called {cursor.get_definition().get_usr()}') | |
if self._report['throwers'][usr] is None: | |
self._report['throwers'][usr] = { | |
'pretty_name': self._get_symbol_name(), | |
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}', | |
'exceptions': [] | |
} | |
self._report['throwers'][usr]['exceptions'] += self._get_exceptions_for_symbol(cursor.get_definition().get_usr()) | |
# Keep track of the location of the symbol that throws | |
case CursorKind.CONSTRUCTOR | CursorKind.DESTRUCTOR | CursorKind.CXX_METHOD | CursorKind.FUNCTION_TEMPLATE: | |
if cursor.spelling in self._ignored_methods: | |
return | |
# If parent is the root node or an include, it's in global scope | |
if cursor.semantic_parent.kind in [CursorKind.TRANSLATION_UNIT, CursorKind.INCLUSION_DIRECTIVE]: | |
self._sym_stack.append(cursor.displayname) | |
else: | |
self._sym_stack.append(f'{cursor.semantic_parent.spelling}::{cursor.displayname}') | |
has_pushed = True | |
#print(cursor.get_usr()) | |
self._cur_usr = cursor.get_usr() | |
# Special handling for namespaces | |
case CursorKind.NAMESPACE: | |
# We're not interested in the STL here | |
if cursor.spelling == 'std' or cursor.spelling == '__gnu_cxx': | |
return | |
self._sym_stack.append(cursor.spelling) | |
has_pushed = True | |
# Ignore all exceptions that are caught | |
case CursorKind.CXX_TRY_STMT: | |
return | |
# Jackpot! | |
case CursorKind.CXX_THROW_EXPR: | |
# Skip throws on subsequent passes | |
if pas != 0: | |
return | |
vprint(f'{self._cur_usr} throws {" ".join([x.spelling for x in cursor.get_tokens()])}') | |
if self._cur_usr not in self._report['throwers']: | |
self._report['throwers'][self._cur_usr] = { | |
'pretty_name': self._get_symbol_name(), | |
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}', | |
'exceptions': [] | |
} | |
es = ' '.join([x.spelling for x in cursor.get_tokens()]) | |
if es not in self._report['throwers'][self._cur_usr]['exceptions']: | |
self._report['throwers'][self._cur_usr]['exceptions'].append(es) | |
case x: | |
pass | |
# Parse all children symbols | |
for c in cursor.get_children(): | |
try: | |
self._process_cursor(c, pas) | |
except: | |
continue | |
# Finished parsing, pop the symbol | |
if has_pushed: | |
self._sym_stack.pop() | |
self._cur_usr = '' | |
#print(self._report) | |
def main(): | |
args = parser.parse_args() | |
global verbose | |
verbose = args.v | |
# Make a list of files | |
l = [] | |
for f in args.files: | |
l.append(os.path.abspath(f)) | |
a = ExceptionAnalyzer(f'{args.p}/compile_commands.json', ['throwMe']) | |
fail = False | |
if not a.process_files(l if len(l) > 0 else None): | |
print('Failed to parse everything :(') | |
fail = True | |
with open(args.o, 'w') as fp: | |
json.dump(a._report, fp, indent=2) | |
exit(1 if fail else 0) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment