From f609931ebc88b4a0bc8b8015050bfb57f8092037 Mon Sep 17 00:00:00 2001 From: owain Date: Sat, 6 Jun 2026 19:17:51 +0100 Subject: [PATCH] Remove fitparse entirely - use Garmin SDK only with messages dict approach --- backend/app/services/fit_parser.py | 135 +++++++---------- backend/app/services/wellness_parser.py | 189 ++++++++---------------- backend/requirements.txt | 3 +- 3 files changed, 119 insertions(+), 208 deletions(-) diff --git a/backend/app/services/fit_parser.py b/backend/app/services/fit_parser.py index 13ac510..0788e21 100644 --- a/backend/app/services/fit_parser.py +++ b/backend/app/services/fit_parser.py @@ -1,24 +1,15 @@ """ -FIT and GPX file parser using: -- Official Garmin FIT Python SDK (garmin-fit-sdk) for .fit files -- gpxpy for .gpx files - -The official SDK correctly handles scale/offset, component expansion, -semicircle-to-degree conversion, and HR message merging. +FIT and GPX file parser using the official Garmin FIT Python SDK. """ import math -from pathlib import Path -from datetime import datetime, timezone, timedelta +from datetime import datetime, timezone from typing import Optional import gpxpy import polyline as polyline_lib - - -FIT_EPOCH_S = 631065600 +from garmin_fit_sdk import Decoder, Stream def haversine_distance(lat1, lon1, lat2, lon2) -> float: - """Distance in metres between two GPS points.""" R = 6371000 phi1, phi2 = math.radians(lat1), math.radians(lat2) dphi = math.radians(lat2 - lat1) @@ -43,26 +34,22 @@ def _bounding_box(coords: list) -> Optional[dict]: "min_lon": min(lons), "max_lon": max(lons)} +def _ensure_utc(dt) -> Optional[datetime]: + if dt is None: + return None + if isinstance(dt, datetime): + if dt.tzinfo is None: + return dt.replace(tzinfo=timezone.utc) + return dt + return None + + def parse_fit_file(filepath: str) -> dict: """Parse a Garmin .fit activity file using the official Garmin SDK.""" - from garmin_fit_sdk import Decoder, Stream - - session = {} - records = [] - laps = [] - - def listener(mesg_num: int, msg: dict): - nonlocal session - if mesg_num == 18: # session - session = msg - elif mesg_num == 20: # record - records.append(msg) - elif mesg_num == 19: # lap - laps.append(msg) - stream = Stream.from_file(filepath) decoder = Decoder(stream) - decoder.read( + + messages, errors = decoder.read( apply_scale_and_offset=True, convert_datetimes_to_dates=True, convert_types_to_strings=True, @@ -70,58 +57,62 @@ def parse_fit_file(filepath: str) -> dict: expand_sub_fields=True, expand_components=True, merge_heart_rates=True, - mesg_listener=listener, ) - # Map sport type + sessions = messages.get("session", [{}]) + session = sessions[0] if sessions else {} + records = messages.get("record", []) + laps = messages.get("lap", []) + sport = str(session.get("sport", "generic")).lower() sport_map = { - "running": "running", "cycling": "cycling", "swimming": "swimming", - "hiking": "hiking", "walking": "walking", "generic": "other", - "open_water_swimming": "swimming", "trail_running": "running", - "e_biking": "cycling", + "running": "running", "cycling": "cycling", + "hiking": "hiking", "walking": "walking", + "generic": "other", "trail_running": "running", + "e_biking": "cycling", "open_water_swimming": "other", } sport_type = sport_map.get(sport, sport) - start_time = session.get("start_time") - if isinstance(start_time, datetime) and start_time.tzinfo is None: - start_time = start_time.replace(tzinfo=timezone.utc) + start_time = _ensure_utc(session.get("start_time")) + + coords = [] + for r in records: + lat = r.get("position_lat") + lon = r.get("position_long") + if lat is not None and lon is not None: + if -90 <= lat <= 90 and -180 <= lon <= 180: + coords.append((lat, lon)) - # Build GPS track - coords = [ - (r["position_lat"], r["position_long"]) - for r in records - if r.get("position_lat") is not None and r.get("position_long") is not None - ] encoded_polyline = polyline_lib.encode(coords) if coords else None bounding_box = _bounding_box(coords) - # Normalize data points normalized_points = [] for r in records: - ts = r.get("timestamp") - if isinstance(ts, datetime) and ts.tzinfo is None: - ts = ts.replace(tzinfo=timezone.utc) + ts = _ensure_utc(r.get("timestamp")) + lat = r.get("position_lat") + lon = r.get("position_long") + + if lat is not None and not (-90 <= lat <= 90): + lat = None + if lon is not None and not (-180 <= lon <= 180): + lon = None normalized_points.append({ "timestamp": ts.isoformat() if ts else None, - "latitude": r.get("position_lat"), - "longitude": r.get("position_long"), - "altitude_m": r.get("altitude") or r.get("enhanced_altitude"), - "heart_rate": r.get("heart_rate"), - "cadence": r.get("cadence") or r.get("fractional_cadence"), - "speed_ms": r.get("speed") or r.get("enhanced_speed"), - "power": r.get("power"), - "temperature_c": r.get("temperature"), - "distance_m": r.get("distance"), + "latitude": _safe_float(lat), + "longitude": _safe_float(lon), + "altitude_m": _safe_float(r.get("altitude") or r.get("enhanced_altitude")), + "heart_rate": _safe_float(r.get("heart_rate")), + "cadence": _safe_float(r.get("cadence")), + "speed_ms": _safe_float(r.get("speed") or r.get("enhanced_speed")), + "power": _safe_float(r.get("power")), + "temperature_c": _safe_float(r.get("temperature")), + "distance_m": _safe_float(r.get("distance")), }) - # Normalize laps normalized_laps = [] for i, lap in enumerate(laps): - ls = lap.get("start_time") - if isinstance(ls, datetime) and ls.tzinfo is None: - ls = ls.replace(tzinfo=timezone.utc) + ls = _ensure_utc(lap.get("start_time")) normalized_laps.append({ "lap_number": i + 1, "start_time": ls.isoformat() if ls else None, @@ -133,8 +124,7 @@ def parse_fit_file(filepath: str) -> dict: "avg_power": _safe_float(lap.get("avg_power")), }) - # Build activity name - name = session.get("sport", "Activity").title() + name = sport_type.title() if start_time: name += " " + start_time.strftime("%Y-%m-%d") @@ -209,7 +199,6 @@ def parse_gpx_file(filepath: str) -> dict: encoded_polyline = polyline_lib.encode(coords) if coords else None bounding_box = _bounding_box(coords) - # Add cumulative distance total_dist = 0.0 prev = None for p in data_points: @@ -219,7 +208,6 @@ def parse_gpx_file(filepath: str) -> dict: prev = (p["latitude"], p["longitude"]) p["distance_m"] = total_dist - # Elevation gain/loss uphill, downhill = 0.0, 0.0 alts = [p["altitude_m"] for p in data_points if p["altitude_m"]] for i in range(1, len(alts)): @@ -267,20 +255,7 @@ def parse_gpx_file(filepath: str) -> dict: def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict: - """ - Calculate % time in each HR zone using the user's configured max HR. - - Zones follow the standard 5-zone model as % of max HR: - Z1 Recovery: < 60% - Z2 Base: 60 - 70% - Z3 Tempo: 70 - 80% - Z4 Threshold: 80 - 90% - Z5 Max: > 90% - - user_max_hr should be the user's actual physiological max HR, NOT the - highest HR recorded in this activity. Using activity max shifts all zones - upward and makes easy runs look harder than they are. - """ + """Calculate % time in each HR zone using user's configured max HR.""" if not user_max_hr or user_max_hr < 100: return {} @@ -300,8 +275,8 @@ def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict: zones[key] += 1 break else: - zones["z5"] += 1 # anything above 90% goes to z5 + zones["z5"] += 1 if total: return {k: round(v / total * 100, 1) for k, v in zones.items()} - return {} + return {} \ No newline at end of file diff --git a/backend/app/services/wellness_parser.py b/backend/app/services/wellness_parser.py index 6352429..34964c6 100644 --- a/backend/app/services/wellness_parser.py +++ b/backend/app/services/wellness_parser.py @@ -1,56 +1,45 @@ """ Garmin wellness FIT file parser using the official Garmin FIT Python SDK. - -The official SDK (garmin-fit-sdk) correctly handles: -- Standard FIT messages (monitoring, hrv_status_summary, sleep_level etc.) -- Garmin proprietary messages stored by numeric mesg_num -- Unknown fields stored by field definition number -- Scale/offset application, component expansion, HR merging - -Fenix 6X proprietary message numbers identified by binary analysis: - 55 - activity accumulation snapshots (cumulative steps, HR per interval) - 103 - daily totals summary (total steps, floors, calories) - 211 - resting HR + HRV summary - 227 - per-minute stress level + heart rate (most valuable for health dashboard) """ -from datetime import datetime, timezone, timedelta, date +from datetime import datetime, timezone, date from typing import Optional +from garmin_fit_sdk import Decoder, Stream -FIT_EPOCH_S = 631065600 # seconds between Unix epoch and FIT epoch (Dec 31 1989) +FIT_EPOCH_S = 631065600 -def fit_ts(seconds) -> Optional[datetime]: - """Convert FIT timestamp to UTC datetime.""" - if seconds is None: +def _fit_ts(raw) -> Optional[datetime]: + if raw is None: return None try: - s = int(seconds) - if s == 0 or s == 0xFFFFFFFF: + s = int(raw) + if s <= 0 or s == 0xFFFFFFFF: return None return datetime.fromtimestamp(s + FIT_EPOCH_S, tz=timezone.utc) except (TypeError, ValueError, OverflowError, OSError): return None -def _is_datetime(v) -> bool: - return isinstance(v, datetime) +def _to_date(val) -> Optional[date]: + if val is None: + return None + if isinstance(val, datetime): + if val.tzinfo is None: + val = val.replace(tzinfo=timezone.utc) + return val.date() + if isinstance(val, (int, float)): + dt = _fit_ts(val) + return dt.date() if dt else None + return None def parse_wellness_fit(file_path: str) -> dict: """ - Parse a Garmin wellness/monitoring FIT file using the official Garmin SDK. - + Parse a Garmin wellness/monitoring FIT file. Returns {"days": {date: metrics_dict}, "error": str|None} """ - try: - from garmin_fit_sdk import Decoder, Stream - except ImportError: - # Fall back to fitparse-based parser if SDK not installed yet - from app.services.wellness_parser_fallback import parse_wellness_fit as _fb - return _fb(file_path) - - daily = {} # date -> aggregation dict + daily = {} def ensure_day(d: date) -> dict: if d not in daily: @@ -70,60 +59,37 @@ def parse_wellness_fit(file_path: str) -> dict: } return daily[d] - def get_date(msg: dict, *keys) -> Optional[date]: - """Extract a date from a message, trying multiple field names.""" - for key in keys: - v = msg.get(key) - if v is None: - continue - if _is_datetime(v): - return v.date() - if isinstance(v, (int, float)): - dt = fit_ts(v) - if dt: - return dt.date() - return None - def listener(mesg_num: int, msg: dict): - """Called for every message after full decoding.""" - # ── Standard: monitoring (148) ──────────────────────────────────── - if mesg_num == 148: - d = get_date(msg, "timestamp", "local_timestamp") + if mesg_num == 147: + d = _to_date(msg.get("timestamp") or msg.get("local_timestamp")) + rhr = msg.get("resting_heart_rate") + if d and rhr and 20 < rhr < 120: + ensure_day(d)["resting_hr"] = int(rhr) + + elif mesg_num == 148: + d = _to_date(msg.get("timestamp") or msg.get("local_timestamp")) if not d: return entry = ensure_day(d) - hr = msg.get("heart_rate") if hr and 20 < hr < 250: entry["heart_rates"].append(int(hr)) - steps = msg.get("steps") or msg.get("cycles") if steps and steps > 0: entry["steps"] = max(entry["steps"] or 0, int(steps)) - stress = msg.get("stress_level_value") if stress is not None and stress >= 0: entry["stress_values"].append(int(stress)) - # ── Standard: monitoring_info (147) ─────────────────────────────── - elif mesg_num == 147: - d = get_date(msg, "timestamp", "local_timestamp") - if not d: - return - rhr = msg.get("resting_heart_rate") - if rhr and 20 < rhr < 120: - ensure_day(d)["resting_hr"] = int(rhr) - - # ── Standard: hrv_status_summary (275) ──────────────────────────── elif mesg_num == 275: - d = get_date(msg, "timestamp") + d = _to_date(msg.get("timestamp")) if not d: return entry = ensure_day(d) for key in ("weekly_average", "last_night_avg", "hrv_nightly_avg"): v = msg.get(key) - if v: + if v and v > 0: entry["hrv_nightly_avg"] = float(v) break high = msg.get("last_night_5_min_high") @@ -133,120 +99,93 @@ def parse_wellness_fit(file_path: str) -> dict: if status: entry["hrv_status"] = str(status) - # ── Standard: stress_level (132) ────────────────────────────────── elif mesg_num == 132: - d = get_date(msg, "stress_level_time", "timestamp") + d = _to_date(msg.get("stress_level_time") or msg.get("timestamp")) if not d: return stress = msg.get("stress_level_value") if stress is not None and stress >= 0: ensure_day(d)["stress_values"].append(int(stress)) - # ── Standard: spo2_data (258) ───────────────────────────────────── elif mesg_num == 258: - d = get_date(msg, "timestamp") + d = _to_date(msg.get("timestamp")) if not d: return spo2 = msg.get("spo2_percent") or msg.get("reading_spo2") if spo2 and 50 < spo2 <= 100: ensure_day(d)["spo2_readings"].append(float(spo2)) - # ── Standard: sleep_level (269) ─────────────────────────────────── elif mesg_num == 269: - d = get_date(msg, "timestamp") + d = _to_date(msg.get("timestamp")) if not d: return level = msg.get("sleep_level") if level is not None: - # Convert string level names to numeric codes if SDK decoded them if isinstance(level, str): level_map = {"unmeasurable": 0, "awake": 1, "light": 2, "deep": 3, "rem": 4} level = level_map.get(level.lower()) if level is not None: ensure_day(d)["sleep_levels"].append(int(level)) - # ── Proprietary 227: per-minute stress + HR ─────────────────────── - # field_1 = FIT timestamp, field_2 = heart rate bpm, field_0 = stress elif mesg_num == 227: - # SDK stores unknown fields as "unknown_N" or by def_num - ts_raw = msg.get(1) or msg.get("unknown_1") or msg.get("field_1") - hr_raw = msg.get(2) or msg.get("unknown_2") or msg.get("field_2") - stress_raw = msg.get(0) or msg.get("unknown_0") or msg.get("field_0") - - ts = fit_ts(ts_raw) if isinstance(ts_raw, (int, float)) else ( - ts_raw if _is_datetime(ts_raw) else None - ) - if not ts: + ts_raw = msg.get(1) or msg.get("1") or msg.get("unknown_1") + hr_raw = msg.get(2) or msg.get("2") or msg.get("unknown_2") + stress_raw = msg.get(0) or msg.get("0") or msg.get("unknown_0") + d = _to_date(ts_raw) + if not d: return - entry = ensure_day(ts.date()) - + entry = ensure_day(d) if hr_raw and isinstance(hr_raw, (int, float)) and 20 < hr_raw < 250: entry["heart_rates"].append(int(hr_raw)) - if stress_raw is not None and isinstance(stress_raw, (int, float)) and stress_raw >= 0: entry["stress_values"].append(int(stress_raw)) - # ── Proprietary 103: daily totals summary ───────────────────────── - # field_253 = timestamp, field_3 = steps, field_4 = floors, field_5/7 = cal elif mesg_num == 103: - ts_v = msg.get(253) or msg.get("timestamp") - ts = ts_v if _is_datetime(ts_v) else fit_ts(ts_v) - if not ts: + ts_raw = msg.get(253) or msg.get("253") or msg.get("timestamp") + d = _to_date(ts_raw) + if not d: return - entry = ensure_day(ts.date()) - - steps = msg.get(3) + entry = ensure_day(d) + steps = msg.get(3) or msg.get("3") if steps and isinstance(steps, (int, float)) and steps > 0: entry["steps"] = int(steps) - - floors = msg.get(4) + floors = msg.get(4) or msg.get("4") if floors and isinstance(floors, (int, float)) and floors > 0: f = float(floors) - if f > 1000: - f = f / 100 - entry["floors_climbed"] = round(f, 1) - - active_cal = msg.get(5) + entry["floors_climbed"] = round(f / 100 if f > 1000 else f, 1) + active_cal = msg.get(5) or msg.get("5") if active_cal and isinstance(active_cal, (int, float)) and active_cal > 0: entry["active_calories"] = float(active_cal) - - total_cal = msg.get(7) + total_cal = msg.get(7) or msg.get("7") if total_cal and isinstance(total_cal, (int, float)) and total_cal > 0: entry["total_calories"] = float(total_cal) - # ── Proprietary 211: resting HR + HRV summary ───────────────────── elif mesg_num == 211: - ts_v = msg.get(253) or msg.get("timestamp") - ts = ts_v if _is_datetime(ts_v) else fit_ts(ts_v) - if not ts: + ts_raw = msg.get(253) or msg.get("253") or msg.get("timestamp") + d = _to_date(ts_raw) + if not d: return - entry = ensure_day(ts.date()) - - rhr = msg.get(0) + entry = ensure_day(d) + rhr = msg.get(0) or msg.get("0") if rhr and isinstance(rhr, (int, float)) and 20 < rhr < 120: entry["resting_hr"] = int(rhr) - - hrv = msg.get(1) + hrv = msg.get(1) or msg.get("1") if hrv and isinstance(hrv, (int, float)) and 5 < hrv < 300: entry["hrv_nightly_avg"] = float(hrv) - # ── Proprietary 55: activity accumulation snapshots ─────────────── elif mesg_num == 55: - ts_v = msg.get(253) or msg.get("timestamp") - ts = ts_v if _is_datetime(ts_v) else fit_ts(ts_v) - if not ts: + ts_raw = msg.get(253) or msg.get("253") or msg.get("timestamp") + d = _to_date(ts_raw) + if not d: return - entry = ensure_day(ts.date()) - - steps = msg.get(2) + entry = ensure_day(d) + steps = msg.get(2) or msg.get("2") if steps and isinstance(steps, (int, float)) and steps > 0: entry["steps"] = max(entry["steps"] or 0, int(steps)) - - hr = msg.get(19) + hr = msg.get(19) or msg.get("19") if hr and isinstance(hr, (int, float)) and 20 < hr < 250: entry["heart_rates"].append(int(hr)) - # Decode the file try: stream = Stream.from_file(file_path) decoder = Decoder(stream) @@ -254,16 +193,15 @@ def parse_wellness_fit(file_path: str) -> dict: apply_scale_and_offset=True, convert_datetimes_to_dates=True, convert_types_to_strings=True, - enable_crc_check=False, # wellness files sometimes have bad CRCs + enable_crc_check=False, expand_sub_fields=True, expand_components=True, - merge_heart_rates=True, + merge_heart_rates=False, mesg_listener=listener, ) except Exception as e: return {"error": str(e), "days": {}} - # Aggregate per-day result = {} for day_date, data in daily.items(): hrs = data.pop("heart_rates", []) @@ -276,7 +214,6 @@ def parse_wellness_fit(file_path: str) -> dict: avg_stress = round(sum(s for s in stresses if s >= 0) / len(stresses), 1) if stresses else None spo2_avg = round(sum(spo2s) / len(spo2s), 1) if spo2s else None - # Sleep stage seconds (each level record = 30s epoch) if sleep_levels: sleep_deep_s = sum(30 for l in sleep_levels if l == 3) or None sleep_light_s = sum(30 for l in sleep_levels if l == 2) or None @@ -306,4 +243,4 @@ def parse_wellness_fit(file_path: str) -> dict: "sleep_awake_s": sleep_awake_s, } - return {"days": result, "error": None} + return {"days": result, "error": None} \ No newline at end of file diff --git a/backend/requirements.txt b/backend/requirements.txt index eed09f5..3c9c905 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -13,7 +13,6 @@ httpx==0.27.0 redis[hiredis]==5.0.4 celery[redis]==5.4.0 garmin-fit-sdk==21.195.0 -fitparse==1.2.0 gpxpy==1.6.2 numpy==1.26.4 scipy==1.13.0 @@ -23,4 +22,4 @@ Pillow==10.3.0 aiofiles==23.2.1 python-dateutil==2.9.0 pytz==2024.1 -psycopg2-binary==2.9.9 +psycopg2-binary==2.9.9 \ No newline at end of file