sskeirik · July 8, 2025 14:30
diff --git a/perf.md b/perf.md
diff --git a/perfsum.py b/perfsum.py
 #!/usr/bin/python3
 """
 This script takes multiple ASCII reports generated by perf record + report and aggregates the per-function percentages across runs.
 It tries to minimize the name of really long C++ template functions to simplify the aggregated report.
 It also collapses function name distinctions based on:
 - class method object refness/constness specifiers
 - geneated plt tables
 """

 from pathlib import Path
 from collections import defaultdict
 import sys

 def read_file(filepath, maxobjlen=0):
  total = 0
  data = defaultdict(lambda: 0)
  for line in Path(filepath).read_text().splitlines():
      idx = line.find("%")
      if idx <= 0: continue
      chunks  = line.split()
      percent = float(chunks[0][:-1])
      obj     = chunks[2]
      func    = ' '.join(chunks[4:]).strip()
      data[(obj,func)] += percent
      maxobjlen = max(len(obj), maxobjlen)
  return data, maxobjlen

 def merge_files(datalist):
  cnt = len(datalist)
  if cnt == 0: return dict()
  total = datalist.pop()

  for data in datalist:
    for k,v in data.items():
      total[k] += v

  for (k1,k2),v in total.items():
    total[(k1,k2)] = v / cnt

  return total

 def countMatchers(s, ind, opener, closer, stack=1):
    assert ind >= 0
    while ind > 0 and stack > 0:
        if s[ind] == opener:
            stack += 1
        elif s[ind] == closer:
            stack -= 1
            if stack == 0: break
        ind -= 1
    return ind if stack == 0 else None

 def truncate(s, cutoff):
    if len(s) <= cutoff: return s
    # remove suffixes
    s = s.removesuffix(' &')
    s = s.removesuffix(' const')
    s = s.removesuffix('@plt')
    # remove arg types
    if s.endswith(')') and (match := countMatchers(s, len(s)-2, ')', '(')):
        s = s[:match+1] + '...' + ')'
    if len(s) <= cutoff: return s
    # remove last set of template args
    if (lastOpener := s.rfind('>')) > 0 and (match := countMatchers(s, lastOpener-1, '>', '<')):
        s = s[:match+1] + '...' + s[lastOpener:]
    if len(s) <= cutoff: return s
    # truncate name
    over = (len(s) + 1 - cutoff) // 2
    mid = len(s) // 2
    fstPart, sndPart = mid - over, mid + over
    trunc = s[:fstPart] + '...' + s[sndPart:]
    return trunc

 def main():
  threshold = float(sys.argv[1])
  cutoff = int(sys.argv[2]) + 3
  filepaths = sys.stdin.read().split()
  datalist = []
  maxobjlen = 0
  for filepath in filepaths:
    data, maxobjlen = read_file(filepath, maxobjlen)
    datalist.append(data)
  data = merge_files(datalist)

  sum = 0.0
  for (k1,k2),v in dict(sorted(data.items(), key=lambda item: item[1], reverse=True)).items():
      if v > threshold:
        k1 = f"{k1:<{maxobjlen}}".replace(',', ';')
        k2 = truncate(k2, cutoff - 3).replace(',', ';')
        k2 = f"{k2:<{cutoff}}"
        print(f"{k1}, {k2}, {v:.3f}")
        sum += v
  print(f"Total percent: {sum:.2f}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/python3
	"""
	This script takes multiple ASCII reports generated by perf record + report and aggregates the per-function percentages across runs.
	It tries to minimize the name of really long C++ template functions to simplify the aggregated report.
	It also collapses function name distinctions based on:
	- class method object refness/constness specifiers
	- geneated plt tables
	"""

	from pathlib import Path
	from collections import defaultdict
	import sys

	def read_file(filepath, maxobjlen=0):
	total = 0
	data = defaultdict(lambda: 0)
	for line in Path(filepath).read_text().splitlines():
	idx = line.find("%")
	if idx <= 0: continue
	chunks = line.split()
	percent = float(chunks[0][:-1])
	obj = chunks[2]
	func = ' '.join(chunks[4:]).strip()
	data[(obj,func)] += percent
	maxobjlen = max(len(obj), maxobjlen)
	return data, maxobjlen

	def merge_files(datalist):
	cnt = len(datalist)
	if cnt == 0: return dict()
	total = datalist.pop()

	for data in datalist:
	for k,v in data.items():
	total[k] += v

	for (k1,k2),v in total.items():
	total[(k1,k2)] = v / cnt

	return total

	def countMatchers(s, ind, opener, closer, stack=1):
	assert ind >= 0
	while ind > 0 and stack > 0:
	if s[ind] == opener:
	stack += 1
	elif s[ind] == closer:
	stack -= 1
	if stack == 0: break
	ind -= 1
	return ind if stack == 0 else None

	def truncate(s, cutoff):
	if len(s) <= cutoff: return s
	# remove suffixes
	s = s.removesuffix(' &')
	s = s.removesuffix(' const')
	s = s.removesuffix('@plt')
	# remove arg types
	if s.endswith(')') and (match := countMatchers(s, len(s)-2, ')', '(')):
	s = s[:match+1] + '...' + ')'
	if len(s) <= cutoff: return s
	# remove last set of template args
	if (lastOpener := s.rfind('>')) > 0 and (match := countMatchers(s, lastOpener-1, '>', '<')):
	s = s[:match+1] + '...' + s[lastOpener:]
	if len(s) <= cutoff: return s
	# truncate name
	over = (len(s) + 1 - cutoff) // 2
	mid = len(s) // 2
	fstPart, sndPart = mid - over, mid + over
	trunc = s[:fstPart] + '...' + s[sndPart:]
	return trunc

	def main():
	threshold = float(sys.argv[1])
	cutoff = int(sys.argv[2]) + 3
	filepaths = sys.stdin.read().split()
	datalist = []
	maxobjlen = 0
	for filepath in filepaths:
	data, maxobjlen = read_file(filepath, maxobjlen)
	datalist.append(data)
	data = merge_files(datalist)

	sum = 0.0
	for (k1,k2),v in dict(sorted(data.items(), key=lambda item: item[1], reverse=True)).items():
	if v > threshold:
	k1 = f"{k1:<{maxobjlen}}".replace(',', ';')
	k2 = truncate(k2, cutoff - 3).replace(',', ';')
	k2 = f"{k2:<{cutoff}}"
	print(f"{k1}, {k2}, {v:.3f}")
	sum += v
	print(f"Total percent: {sum:.2f}")

	if __name__ == "__main__":
	main()