Last active
July 11, 2023 09:09
Revisions
-
riga revised this gist
Jul 5, 2023 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -93,7 +93,6 @@ def evaluate_sample( # potentially run in parallel if n_parallel > 1: with ProcessPool(n_parallel) as pool: list(tqdm( pool.imap(_evaluate_file_mp, evaluation_args), @@ -109,7 +108,7 @@ def evaluate_sample( def evaluate_file(input_file_path: str, output_file_path: str) -> None: # prepare expressions expressions = klub_index_columns + klub_input_columns # load the klub array f = uproot.open(input_file_path) -
riga created this gist
Jul 5, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,147 @@ # coding: utf-8 from __future__ import annotations import os from fnmatch import fnmatch from multiprocessing import Pool as ProcessPool from typing import Any from tqdm import tqdm import numpy as np import awkward as ak import uproot # # configurations # masses = [ 250, 260, 270, 280, 300, 320, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000, ] spins = [0, 2] baseline_selection = ( "isLeptrigger & " "((pairType == 0) | (pairType == 1) | (pairType == 2)) & " "(nleps == 0) & " "(nbjetscand > 1)" ) klub_index_columns = [ "EventNumber", "RunNumber", "lumi", ] klub_input_columns = [ # TODO: add the full list of required klub input branches here ] # # NN evaluation # def evaluate_events(events: ak.Array) -> ak.Array: # TODO: add actual evaluation and return an ak array with results return ak.zip({"dnn_output": np.ones(len(events))}) # # high-level evaluation functions # def evaluate_samples( skim_directory: str, output_directory: str, n_parallel: int = 1, ) -> None: # get a list of all sample names in the klub directory sample_names = [] for sample_name in os.listdir(skim_directory): sample_dir = os.path.join(skim_directory, sample_name) if os.path.isdir(sample_dir) and os.path.exists(os.path.join(sample_dir, "output_0.root")): sample_names.append(sample_name) # start the evaluation print(f"evaluating {len(sample_names)} samples") for sample_name in sample_names: evaluate_sample(skim_directory, output_directory, sample_name, n_parallel=n_parallel) def evaluate_sample( skim_directory: str, output_directory: str, sample_name: str, n_parallel: int = 1, ) -> None: print(f"evaluate {sample_name} ...") # ensure that the output directory exists output_sample_dir = os.path.join(output_directory, sample_name) output_sample_dir = os.path.expandvars(os.path.expanduser(output_sample_dir)) if not os.path.exists(output_sample_dir): os.makedirs(output_sample_dir) # determine all file names to load input_sample_dir = os.path.join(skim_directory, sample_name) evaluation_args = [ (os.path.join(input_sample_dir, file_name), os.path.join(output_sample_dir, file_name)) for file_name in os.listdir(input_sample_dir) if fnmatch(file_name, "output_*.root") ] # potentially run in parallel if n_parallel > 1: # run in parallel with ProcessPool(n_parallel) as pool: list(tqdm( pool.imap(_evaluate_file_mp, evaluation_args), total=len(evaluation_args), )) else: list(tqdm( map(_evaluate_file_mp, evaluation_args), total=len(evaluation_args), )) print("done") def evaluate_file(input_file_path: str, output_file_path: str) -> None: # prepare expressions expressions = klub_index_columns + klub_index_columns # load the klub array f = uproot.open(input_file_path) input_array = f["HTauTauTree"].arrays(expressions=expressions, cut=baseline_selection) # run the evaluation output_array = evaluate_events(input_array) # add index columns for c in klub_index_columns: output_array = ak.with_field(output_array, input_array[c], c) # save the output as root output_file = uproot.recreate(output_file_path) output_file["evaluation"] = dict(zip(output_array.fields, ak.unzip(output_array))) def _evaluate_file_mp(args: Any) -> None: return evaluate_file(*args) # entry hook if __name__ == "__main__": # evaluate_samples( # skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017", # output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017", # n_parallel=1, # ) evaluate_sample( skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017", output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017", sample_name="SKIM_ggF_Radion_m900", n_parallel=1, )