Skip to content

Instantly share code, notes, and snippets.

@evandhoffman
Created June 6, 2025 12:13
Show Gist options
  • Save evandhoffman/a05074c452d638488ed5bd743f2c4367 to your computer and use it in GitHub Desktop.
Save evandhoffman/a05074c452d638488ed5bd743f2c4367 to your computer and use it in GitHub Desktop.
Extract weight from healthkit export.xml and export it to json & CSV
import xml.etree.ElementTree as ET
import logging
import re
import json
import csv
from dateutil import parser as date_parser
from datetime import datetime
def extract_body_weight_records(xml_path):
weights = {}
seen = set() # (datetime, weight) tuples to dedupe
in_record = False
buffer = []
record_start = re.compile(r'<Record\b')
record_end = re.compile(r'/>\s*$')
with open(xml_path, 'r', encoding='utf-8') as f:
for line_num, line in enumerate(f, 1):
try:
if not in_record:
if record_start.search(line):
buffer = [line.strip()]
if record_end.search(line):
process_record("".join(buffer), weights, seen)
buffer = []
else:
in_record = True
else:
buffer.append(line.strip())
if record_end.search(line):
in_record = False
process_record("".join(buffer), weights, seen)
buffer = []
except Exception as e:
logging.warning(f"Error processing record at line {line_num}: {e}")
in_record = False
buffer = []
return weights
def process_record(record_str, weights, seen):
try:
elem = ET.fromstring(record_str)
if elem.attrib.get("type") == "HKQuantityTypeIdentifierBodyMass":
weight_lb = float(elem.attrib["value"])
dt_obj = date_parser.parse(elem.attrib["startDate"])
key = (dt_obj, weight_lb)
if key not in seen:
seen.add(key)
weights[dt_obj] = weight_lb
except Exception as e:
logging.warning(f"Malformed record skipped: {e}")
def write_json(weights, filename="weight.json"):
serializable_data = {
dt.isoformat(): weight for dt, weight in sorted(weights.items())
}
with open(filename, "w", encoding="utf-8") as out_file:
json.dump(serializable_data, out_file, indent=2)
print(f"Wrote {len(weights)} records to {filename}")
def write_csv(weights, filename="weight.csv"):
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["timestamp", "weight_lb"])
for dt, weight in sorted(weights.items()):
writer.writerow([dt.strftime("%Y-%m-%d %H:%M:%S"), weight])
print(f"Wrote {len(weights)} records to {filename}")
if __name__ == "__main__":
import argparse
logging.basicConfig(level=logging.WARNING)
parser = argparse.ArgumentParser(description="Extract and deduplicate Apple Health body weight data to JSON and CSV.")
parser.add_argument("xml_file", help="Path to export.xml")
args = parser.parse_args()
weight_data = extract_body_weight_records(args.xml_file)
write_json(weight_data)
write_csv(weight_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment