Skip to content

Instantly share code, notes, and snippets.

@riga
Last active July 11, 2023 09:09

Revisions

  1. riga revised this gist Jul 5, 2023. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions evaluate_klub.py
    Original file line number Diff line number Diff line change
    @@ -93,7 +93,6 @@ def evaluate_sample(

    # potentially run in parallel
    if n_parallel > 1:
    # run in parallel
    with ProcessPool(n_parallel) as pool:
    list(tqdm(
    pool.imap(_evaluate_file_mp, evaluation_args),
    @@ -109,7 +108,7 @@ def evaluate_sample(

    def evaluate_file(input_file_path: str, output_file_path: str) -> None:
    # prepare expressions
    expressions = klub_index_columns + klub_index_columns
    expressions = klub_index_columns + klub_input_columns

    # load the klub array
    f = uproot.open(input_file_path)
  2. riga created this gist Jul 5, 2023.
    147 changes: 147 additions & 0 deletions evaluate_klub.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,147 @@
    # coding: utf-8

    from __future__ import annotations

    import os
    from fnmatch import fnmatch
    from multiprocessing import Pool as ProcessPool
    from typing import Any

    from tqdm import tqdm
    import numpy as np
    import awkward as ak
    import uproot


    #
    # configurations
    #

    masses = [
    250, 260, 270, 280, 300, 320, 350, 400, 450, 500, 550, 600, 650,
    700, 750, 800, 850, 900, 1000, 1250, 1500, 1750, 2000, 2500, 3000,
    ]
    spins = [0, 2]
    baseline_selection = (
    "isLeptrigger & "
    "((pairType == 0) | (pairType == 1) | (pairType == 2)) & "
    "(nleps == 0) & "
    "(nbjetscand > 1)"
    )
    klub_index_columns = [
    "EventNumber",
    "RunNumber",
    "lumi",
    ]
    klub_input_columns = [
    # TODO: add the full list of required klub input branches here
    ]


    #
    # NN evaluation
    #

    def evaluate_events(events: ak.Array) -> ak.Array:
    # TODO: add actual evaluation and return an ak array with results
    return ak.zip({"dnn_output": np.ones(len(events))})


    #
    # high-level evaluation functions
    #

    def evaluate_samples(
    skim_directory: str,
    output_directory: str,
    n_parallel: int = 1,
    ) -> None:
    # get a list of all sample names in the klub directory
    sample_names = []
    for sample_name in os.listdir(skim_directory):
    sample_dir = os.path.join(skim_directory, sample_name)
    if os.path.isdir(sample_dir) and os.path.exists(os.path.join(sample_dir, "output_0.root")):
    sample_names.append(sample_name)

    # start the evaluation
    print(f"evaluating {len(sample_names)} samples")
    for sample_name in sample_names:
    evaluate_sample(skim_directory, output_directory, sample_name, n_parallel=n_parallel)


    def evaluate_sample(
    skim_directory: str,
    output_directory: str,
    sample_name: str,
    n_parallel: int = 1,
    ) -> None:
    print(f"evaluate {sample_name} ...")

    # ensure that the output directory exists
    output_sample_dir = os.path.join(output_directory, sample_name)
    output_sample_dir = os.path.expandvars(os.path.expanduser(output_sample_dir))
    if not os.path.exists(output_sample_dir):
    os.makedirs(output_sample_dir)

    # determine all file names to load
    input_sample_dir = os.path.join(skim_directory, sample_name)
    evaluation_args = [
    (os.path.join(input_sample_dir, file_name), os.path.join(output_sample_dir, file_name))
    for file_name in os.listdir(input_sample_dir)
    if fnmatch(file_name, "output_*.root")
    ]

    # potentially run in parallel
    if n_parallel > 1:
    # run in parallel
    with ProcessPool(n_parallel) as pool:
    list(tqdm(
    pool.imap(_evaluate_file_mp, evaluation_args),
    total=len(evaluation_args),
    ))
    else:
    list(tqdm(
    map(_evaluate_file_mp, evaluation_args),
    total=len(evaluation_args),
    ))
    print("done")


    def evaluate_file(input_file_path: str, output_file_path: str) -> None:
    # prepare expressions
    expressions = klub_index_columns + klub_index_columns

    # load the klub array
    f = uproot.open(input_file_path)
    input_array = f["HTauTauTree"].arrays(expressions=expressions, cut=baseline_selection)

    # run the evaluation
    output_array = evaluate_events(input_array)

    # add index columns
    for c in klub_index_columns:
    output_array = ak.with_field(output_array, input_array[c], c)

    # save the output as root
    output_file = uproot.recreate(output_file_path)
    output_file["evaluation"] = dict(zip(output_array.fields, ak.unzip(output_array)))


    def _evaluate_file_mp(args: Any) -> None:
    return evaluate_file(*args)


    # entry hook
    if __name__ == "__main__":
    # evaluate_samples(
    # skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017",
    # output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017",
    # n_parallel=1,
    # )

    evaluate_sample(
    skim_directory="/eos/user/t/tokramer/hhbbtautau/skims/2017",
    output_directory="/eos/user/m/mrieger/hhres_dnn_datacards/nn/2017",
    sample_name="SKIM_ggF_Radion_m900",
    n_parallel=1,
    )