Skip to content

Instantly share code, notes, and snippets.

@JJL772
Created May 9, 2025 18:51
Show Gist options
  • Save JJL772/2db98832f8e0dceaef71638c10302a9f to your computer and use it in GitHub Desktop.
Save JJL772/2db98832f8e0dceaef71638c10302a9f to your computer and use it in GitHub Desktop.
Tiny exception analyzer thing using libclang
#!/usr/bin/env python3
from clang.cindex import Index, Cursor, CursorKind, TranslationUnitLoadError, TranslationUnit
import argparse
import json
import os
from dataclasses import dataclass
import clang.cindex
parser = argparse.ArgumentParser()
parser.add_argument('-p', type=str, required=True, help='Path to the directory containing compile_commands.json')
parser.add_argument('-v', action='store_true', help='Verbose mode')
parser.add_argument('-o', type=str, default='report.json', help='Report location')
parser.add_argument('files', action='store', nargs='*', help='List of files to parse. if not provided, parse all in the clangdb')
verbose = False
def vprint(msg: str):
if verbose:
print(msg)
@dataclass
class ClassInfo:
name: str
bases: list[str]
class ExceptionAnalyzer:
def __init__(self, compiledb: str, ignores: list[str]):
self._index = Index.create()
self._sym_stack = []
self._cur_usr = ''
self._report = { 'throwers': {}, 'typeinfo': {}}
self._ignored_methods = ignores
self._method_cursor: Cursor = None
with open(compiledb, 'r') as fp:
self._compiledb: dict = json.load(fp)
# Process all files
def process_files(self, filter: list[str]|None) -> bool:
for o in self._compiledb:
if filter is not None and o['file'] not in filter:
continue
if not self._process_file(o['file'], o['arguments'][1:], o['directory']):
return False
return True
# Process a single file with arguments in a directory
def _process_file(self, file: str, args: list[str], dir: str) -> bool:
print(f'Processing {file}')
# Strip irrelevant arguments, else clang silently fails on us
args = [x for x in args if x.startswith('-D') or x.startswith('-I') or x.startswith('-std')]
vprint(f' [{",".join(args)}]')
olddir = os.getcwd()
try:
vprint(f'Entering {dir}')
os.chdir(dir)
# Parse the translation unit
tu = self._index.parse(file, args)
except TranslationUnitLoadError as e:
return False
finally:
vprint(f'Leaving {os.getcwd()}')
os.chdir(olddir)
for d in tu.diagnostics:
print(d)
# Pass 0: Find all 'throw' and build a list of direct throwers
self._process_cursor(tu.cursor, 0)
# Pass 1: Find all method calls and build a list of indirect throwers (i.e. calling a throwing method w/o try/catch)
self._process_cursor(tu.cursor, 1)
return True
def _parse_class_decl(self, cursor: Cursor, skip_fwd: bool) -> ClassInfo | None:
tokens = [x for x in cursor.get_tokens()]
tokens.reverse()
if len(tokens) < 2: return None
# Expect 'class'
if tokens.pop().spelling != 'class':
return None
# Expect 'class_name'
cls = ClassInfo(tokens.pop().spelling, [])
# Optional ':'
t = tokens.pop()
if t.spelling != ':':
# Skip forward decls as they lack base classes and such (usually)
if skip_fwd and t.spelling != '{':
return None
return cls
# [public|private|protected] ClassName [,]
while len(tokens) > 0:
if tokens[-1].spelling in ['private', 'protected', 'public']:
tokens.pop()
if tokens[-1].spelling == 'virtual':
tokens.pop()
cls.bases.append(tokens.pop().spelling)
# [,]
t = tokens.pop()
if len(tokens) > 0 and t.spelling != ',':
# Skip forward decls if requested
if skip_fwd and t.spelling != '{':
return None
break
return cls
# Generate a name for the current symbol
def _get_symbol_name(self) -> str:
return '::'.join(self._sym_stack)
# Generate a symbol name for the cursor
def _gen_symbol_name(self, cursor: Cursor) -> str:
result = ''
sp = cursor
while sp.kind in [CursorKind.CLASS_DECL, CursorKind.NAMESPACE] or sp == cursor:
if len(result) > 0:
result = f'{sp.spelling}::{result}'
else:
result = sp.spelling
sp = sp.semantic_parent
return result
# Get the exceptions for a symbol
def _get_exceptions_for_symbol(self, sym: str) -> list[str]:
o = self._report['throwers'].get(sym, None)
if o is not None:
return o['exceptions']
return []
# Process a single cursor position. pass 0 builds the initial list of throwers, pass 1 flattens the list
def _process_cursor(self, cursor: Cursor, pas: int):
has_pushed = False
match cursor.kind:
# Build some type information about the class
case CursorKind.CLASS_DECL:
c = self._parse_class_decl(cursor, True)
if c is not None:
name = f'{self._get_symbol_name()}::{c.name}' if len(self._sym_stack) > 0 else c.name
self._report['typeinfo'][name] = {
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}',
'bases': c.bases
}
# Handle nested classes
self._sym_stack.append(cursor.spelling)
has_pushed = True
# Recurse into methods we call
case CursorKind.CALL_EXPR:
# Skip this on pass 1
if pas != 1:
return
usr = cursor.semantic_parent.get_usr()
print(f'{usr} called {cursor.get_definition().get_usr()}')
if self._report['throwers'][usr] is None:
self._report['throwers'][usr] = {
'pretty_name': self._get_symbol_name(),
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}',
'exceptions': []
}
self._report['throwers'][usr]['exceptions'] += self._get_exceptions_for_symbol(cursor.get_definition().get_usr())
# Keep track of the location of the symbol that throws
case CursorKind.CONSTRUCTOR | CursorKind.DESTRUCTOR | CursorKind.CXX_METHOD | CursorKind.FUNCTION_TEMPLATE:
if cursor.spelling in self._ignored_methods:
return
# If parent is the root node or an include, it's in global scope
if cursor.semantic_parent.kind in [CursorKind.TRANSLATION_UNIT, CursorKind.INCLUSION_DIRECTIVE]:
self._sym_stack.append(cursor.displayname)
else:
self._sym_stack.append(f'{cursor.semantic_parent.spelling}::{cursor.displayname}')
has_pushed = True
#print(cursor.get_usr())
self._cur_usr = cursor.get_usr()
# Special handling for namespaces
case CursorKind.NAMESPACE:
# We're not interested in the STL here
if cursor.spelling == 'std' or cursor.spelling == '__gnu_cxx':
return
self._sym_stack.append(cursor.spelling)
has_pushed = True
# Ignore all exceptions that are caught
case CursorKind.CXX_TRY_STMT:
return
# Jackpot!
case CursorKind.CXX_THROW_EXPR:
# Skip throws on subsequent passes
if pas != 0:
return
vprint(f'{self._cur_usr} throws {" ".join([x.spelling for x in cursor.get_tokens()])}')
if self._cur_usr not in self._report['throwers']:
self._report['throwers'][self._cur_usr] = {
'pretty_name': self._get_symbol_name(),
'location': f'{cursor.extent.start.file}:{cursor.extent.start.line}',
'exceptions': []
}
es = ' '.join([x.spelling for x in cursor.get_tokens()])
if es not in self._report['throwers'][self._cur_usr]['exceptions']:
self._report['throwers'][self._cur_usr]['exceptions'].append(es)
case x:
pass
# Parse all children symbols
for c in cursor.get_children():
try:
self._process_cursor(c, pas)
except:
continue
# Finished parsing, pop the symbol
if has_pushed:
self._sym_stack.pop()
self._cur_usr = ''
#print(self._report)
def main():
args = parser.parse_args()
global verbose
verbose = args.v
# Make a list of files
l = []
for f in args.files:
l.append(os.path.abspath(f))
a = ExceptionAnalyzer(f'{args.p}/compile_commands.json', ['throwMe'])
fail = False
if not a.process_files(l if len(l) > 0 else None):
print('Failed to parse everything :(')
fail = True
with open(args.o, 'w') as fp:
json.dump(a._report, fp, indent=2)
exit(1 if fail else 0)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment