capital-G · September 7, 2024 23:02
diff --git a/analyze_log.py b/analyze_log.py
 import re
 import sys

 import pandas as pd

 # please insert a *recent* "View raw logs" URL of a macOS/linux test run here
 # as the url is only valid within a timeframe, the URL needs to have been generated recently
 #
 # paste the URL into here or pass it as an argument
 LOG_URL = ""

 if not LOG_URL:
    LOG_URL = sys.argv[1]

 print(f"Try to parse {LOG_URL}")

 # load log file and split datetime and message (this is only approx but works 99.9%)
 raw_df = pd.read_fwf(LOG_URL, colspecs=[(0, 29), (29, -1)], header=None)

 # set column names for convenience
 raw_df.columns = ['time', 'message']

 # force str cast
 raw_df['message'] = raw_df['message'].astype(str)

 # remove color formatting escape chars
 ascii_escape_chars = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
 raw_df['message'] = raw_df['message'].apply(lambda x: ascii_escape_chars.sub('', x))

 # extract info via regex
 regex = r"\[\d*/\d*\]\s*(?P<file_name>.*):(?P<test_name>\S*)\s*\((?P<time>\S*)s\)"
 df = raw_df['message'].str.extract(regex)

 # remove un-matched rows
 df = df.dropna()

 # force cast of time
 df['time'] = df['time'].astype(float)

 # save values to html table
 df_top50 = df.sort_values('time', ascending=False).head(50)
 df_top50.to_html("top_50.html")

 top30_files_df = df.groupby("file_name")['time',].sum().sort_values('time', ascending=False).head(30)
 top30_files_df.to_html("top_30_files.html")

 print("Finished")
	import re
	import sys

	import pandas as pd

	# please insert a recent "View raw logs" URL of a macOS/linux test run here
	# as the url is only valid within a timeframe, the URL needs to have been generated recently
	#
	# paste the URL into here or pass it as an argument
	LOG_URL = ""

	if not LOG_URL:
	LOG_URL = sys.argv[1]

	print(f"Try to parse {LOG_URL}")

	# load log file and split datetime and message (this is only approx but works 99.9%)
	raw_df = pd.read_fwf(LOG_URL, colspecs=[(0, 29), (29, -1)], header=None)

	# set column names for convenience
	raw_df.columns = ['time', 'message']

	# force str cast
	raw_df['message'] = raw_df['message'].astype(str)

	# remove color formatting escape chars
	ascii_escape_chars = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
	raw_df['message'] = raw_df['message'].apply(lambda x: ascii_escape_chars.sub('', x))

	# extract info via regex
	regex = r"\[\d/\d\]\s(?P<file_name>.):(?P<test_name>\S)\s\((?P<time>\S*)s\)"
	df = raw_df['message'].str.extract(regex)

	# remove un-matched rows
	df = df.dropna()

	# force cast of time
	df['time'] = df['time'].astype(float)

	# save values to html table
	df_top50 = df.sort_values('time', ascending=False).head(50)
	df_top50.to_html("top_50.html")

	top30_files_df = df.groupby("file_name")['time',].sum().sort_values('time', ascending=False).head(30)
	top30_files_df.to_html("top_30_files.html")

	print("Finished")