import xmltodict
import json

# Specify the input and output file paths
input_file = 'input.xml'
output_file = 'output.jsonl'

# Open the input and output files
with open(input_file, 'r') as xml_file, open(output_file, 'w') as jsonl_file:
    # Parse the XML file using xmltodict
    xml_data = xml_file.read()
    parsed_data = xmltodict.parse(xml_data)

    # Extract the relevant data from the parsed XML data
    llm_data = parsed_data['LLMData']
    records = llm_data['Record']

    # Write each record to the output file as a newline-delimited JSONL record
    for record in records:
        jsonl_record = json.dumps(record) + '\n'
        jsonl_file.write(jsonl_record)