Created
June 6, 2025 12:13
-
-
Save evandhoffman/a05074c452d638488ed5bd743f2c4367 to your computer and use it in GitHub Desktop.
Extract weight from healthkit export.xml and export it to json & CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import logging | |
import re | |
import json | |
import csv | |
from dateutil import parser as date_parser | |
from datetime import datetime | |
def extract_body_weight_records(xml_path): | |
weights = {} | |
seen = set() # (datetime, weight) tuples to dedupe | |
in_record = False | |
buffer = [] | |
record_start = re.compile(r'<Record\b') | |
record_end = re.compile(r'/>\s*$') | |
with open(xml_path, 'r', encoding='utf-8') as f: | |
for line_num, line in enumerate(f, 1): | |
try: | |
if not in_record: | |
if record_start.search(line): | |
buffer = [line.strip()] | |
if record_end.search(line): | |
process_record("".join(buffer), weights, seen) | |
buffer = [] | |
else: | |
in_record = True | |
else: | |
buffer.append(line.strip()) | |
if record_end.search(line): | |
in_record = False | |
process_record("".join(buffer), weights, seen) | |
buffer = [] | |
except Exception as e: | |
logging.warning(f"Error processing record at line {line_num}: {e}") | |
in_record = False | |
buffer = [] | |
return weights | |
def process_record(record_str, weights, seen): | |
try: | |
elem = ET.fromstring(record_str) | |
if elem.attrib.get("type") == "HKQuantityTypeIdentifierBodyMass": | |
weight_lb = float(elem.attrib["value"]) | |
dt_obj = date_parser.parse(elem.attrib["startDate"]) | |
key = (dt_obj, weight_lb) | |
if key not in seen: | |
seen.add(key) | |
weights[dt_obj] = weight_lb | |
except Exception as e: | |
logging.warning(f"Malformed record skipped: {e}") | |
def write_json(weights, filename="weight.json"): | |
serializable_data = { | |
dt.isoformat(): weight for dt, weight in sorted(weights.items()) | |
} | |
with open(filename, "w", encoding="utf-8") as out_file: | |
json.dump(serializable_data, out_file, indent=2) | |
print(f"Wrote {len(weights)} records to {filename}") | |
def write_csv(weights, filename="weight.csv"): | |
with open(filename, "w", newline="", encoding="utf-8") as csvfile: | |
writer = csv.writer(csvfile) | |
writer.writerow(["timestamp", "weight_lb"]) | |
for dt, weight in sorted(weights.items()): | |
writer.writerow([dt.strftime("%Y-%m-%d %H:%M:%S"), weight]) | |
print(f"Wrote {len(weights)} records to {filename}") | |
if __name__ == "__main__": | |
import argparse | |
logging.basicConfig(level=logging.WARNING) | |
parser = argparse.ArgumentParser(description="Extract and deduplicate Apple Health body weight data to JSON and CSV.") | |
parser.add_argument("xml_file", help="Path to export.xml") | |
args = parser.parse_args() | |
weight_data = extract_body_weight_records(args.xml_file) | |
write_json(weight_data) | |
write_csv(weight_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment