evandhoffman · June 6, 2025 12:13
diff --git a/weight.py b/weight.py
 import xml.etree.ElementTree as ET
 import logging
 import re
 import json
 import csv
 from dateutil import parser as date_parser
 from datetime import datetime

 def extract_body_weight_records(xml_path):
    weights = {}
    seen = set()  # (datetime, weight) tuples to dedupe
    in_record = False
    buffer = []

    record_start = re.compile(r'<Record\b')
    record_end = re.compile(r'/>\s*$')

    with open(xml_path, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            try:
                if not in_record:
                    if record_start.search(line):
                        buffer = [line.strip()]
                        if record_end.search(line):
                            process_record("".join(buffer), weights, seen)
                            buffer = []
                        else:
                            in_record = True
                else:
                    buffer.append(line.strip())
                    if record_end.search(line):
                        in_record = False
                        process_record("".join(buffer), weights, seen)
                        buffer = []
            except Exception as e:
                logging.warning(f"Error processing record at line {line_num}: {e}")
                in_record = False
                buffer = []

    return weights

 def process_record(record_str, weights, seen):
    try:
        elem = ET.fromstring(record_str)
        if elem.attrib.get("type") == "HKQuantityTypeIdentifierBodyMass":
            weight_lb = float(elem.attrib["value"])
            dt_obj = date_parser.parse(elem.attrib["startDate"])
            key = (dt_obj, weight_lb)
            if key not in seen:
                seen.add(key)
                weights[dt_obj] = weight_lb
    except Exception as e:
        logging.warning(f"Malformed record skipped: {e}")

 def write_json(weights, filename="weight.json"):
    serializable_data = {
        dt.isoformat(): weight for dt, weight in sorted(weights.items())
    }
    with open(filename, "w", encoding="utf-8") as out_file:
        json.dump(serializable_data, out_file, indent=2)
    print(f"Wrote {len(weights)} records to {filename}")

 def write_csv(weights, filename="weight.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["timestamp", "weight_lb"])
        for dt, weight in sorted(weights.items()):
            writer.writerow([dt.strftime("%Y-%m-%d %H:%M:%S"), weight])
    print(f"Wrote {len(weights)} records to {filename}")

 if __name__ == "__main__":
    import argparse

    logging.basicConfig(level=logging.WARNING)

    parser = argparse.ArgumentParser(description="Extract and deduplicate Apple Health body weight data to JSON and CSV.")
    parser.add_argument("xml_file", help="Path to export.xml")
    args = parser.parse_args()

    weight_data = extract_body_weight_records(args.xml_file)

    write_json(weight_data)
    write_csv(weight_data)
	import xml.etree.ElementTree as ET
	import logging
	import re
	import json
	import csv
	from dateutil import parser as date_parser
	from datetime import datetime

	def extract_body_weight_records(xml_path):
	weights = {}
	seen = set() # (datetime, weight) tuples to dedupe
	in_record = False
	buffer = []

	record_start = re.compile(r'<Record\b')
	record_end = re.compile(r'/>\s*$')

	with open(xml_path, 'r', encoding='utf-8') as f:
	for line_num, line in enumerate(f, 1):
	try:
	if not in_record:
	if record_start.search(line):
	buffer = [line.strip()]
	if record_end.search(line):
	process_record("".join(buffer), weights, seen)
	buffer = []
	else:
	in_record = True
	else:
	buffer.append(line.strip())
	if record_end.search(line):
	in_record = False
	process_record("".join(buffer), weights, seen)
	buffer = []
	except Exception as e:
	logging.warning(f"Error processing record at line {line_num}: {e}")
	in_record = False
	buffer = []

	return weights

	def process_record(record_str, weights, seen):
	try:
	elem = ET.fromstring(record_str)
	if elem.attrib.get("type") == "HKQuantityTypeIdentifierBodyMass":
	weight_lb = float(elem.attrib["value"])
	dt_obj = date_parser.parse(elem.attrib["startDate"])
	key = (dt_obj, weight_lb)
	if key not in seen:
	seen.add(key)
	weights[dt_obj] = weight_lb
	except Exception as e:
	logging.warning(f"Malformed record skipped: {e}")

	def write_json(weights, filename="weight.json"):
	serializable_data = {
	dt.isoformat(): weight for dt, weight in sorted(weights.items())
	}
	with open(filename, "w", encoding="utf-8") as out_file:
	json.dump(serializable_data, out_file, indent=2)
	print(f"Wrote {len(weights)} records to {filename}")

	def write_csv(weights, filename="weight.csv"):
	with open(filename, "w", newline="", encoding="utf-8") as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(["timestamp", "weight_lb"])
	for dt, weight in sorted(weights.items()):
	writer.writerow([dt.strftime("%Y-%m-%d %H:%M:%S"), weight])
	print(f"Wrote {len(weights)} records to {filename}")

	if __name__ == "__main__":
	import argparse

	logging.basicConfig(level=logging.WARNING)

	parser = argparse.ArgumentParser(description="Extract and deduplicate Apple Health body weight data to JSON and CSV.")
	parser.add_argument("xml_file", help="Path to export.xml")
	args = parser.parse_args()

	weight_data = extract_body_weight_records(args.xml_file)

	write_json(weight_data)
	write_csv(weight_data)