From ed4ab0eff8d0f3bfe6fcf1360f0452293c052bfc Mon Sep 17 00:00:00 2001 From: owain Date: Sat, 6 Jun 2026 19:32:34 +0100 Subject: [PATCH] Fix FIT parser - handle raw timestamps and semicircle auto-detection --- backend/app/services/fit_parser.py | 262 +++++++++++++++++------------ 1 file changed, 156 insertions(+), 106 deletions(-) diff --git a/backend/app/services/fit_parser.py b/backend/app/services/fit_parser.py index 5ab808f..facb3e1 100644 --- a/backend/app/services/fit_parser.py +++ b/backend/app/services/fit_parser.py @@ -1,22 +1,30 @@ """ -FIT and GPX file parser using the official Garmin FIT Python SDK. -Field names from the SDK are camelCase as per the SDK documentation. +FIT and GPX file parser. +Parses FIT files directly using the Garmin SDK but applies manual +scale conversion for fields where the SDK doesn't auto-convert. """ import math +import struct from datetime import datetime, timezone from typing import Optional import gpxpy import polyline as polyline_lib from garmin_fit_sdk import Decoder, Stream +FIT_EPOCH_S = 631065600 +SEMICIRCLES_TO_DEG = 180.0 / (2 ** 31) -def haversine_distance(lat1, lon1, lat2, lon2) -> float: - R = 6371000 - phi1, phi2 = math.radians(lat1), math.radians(lat2) - dphi = math.radians(lat2 - lat1) - dlam = math.radians(lon2 - lon1) - a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2 - return 2 * R * math.asin(math.sqrt(a)) + +def _semicircles_to_deg(val): + if val is None: + return None + try: + result = float(val) * SEMICIRCLES_TO_DEG + if -90 <= result <= 90 or -180 <= result <= 180: + return result + except (TypeError, ValueError): + pass + return None def _safe_float(val) -> Optional[float]: @@ -26,7 +34,7 @@ def _safe_float(val) -> Optional[float]: return None -def _bounding_box(coords: list) -> Optional[dict]: +def _bounding_box(coords): if not coords: return None lats = [c[0] for c in coords] @@ -35,22 +43,43 @@ def _bounding_box(coords: list) -> Optional[dict]: "min_lon": min(lons), "max_lon": max(lons)} -def _ensure_utc(dt) -> Optional[datetime]: - if dt is None: +def _to_dt(val) -> Optional[datetime]: + if val is None: return None - if isinstance(dt, datetime): - if dt.tzinfo is None: - return dt.replace(tzinfo=timezone.utc) - return dt + if isinstance(val, datetime): + return val.replace(tzinfo=timezone.utc) if val.tzinfo is None else val + if isinstance(val, (int, float)): + try: + return datetime.fromtimestamp(int(val) + FIT_EPOCH_S, tz=timezone.utc) + except (OSError, OverflowError, ValueError): + return None return None +def _is_valid_lat(v): + return v is not None and -90 <= v <= 90 + + +def _is_valid_lon(v): + return v is not None and -180 <= v <= 180 + + def parse_fit_file(filepath: str) -> dict: - """Parse a Garmin .fit activity file using the official Garmin SDK.""" + session_data = {} + records = [] + laps = [] + + def listener(mesg_num: int, msg: dict): + if mesg_num == 18: # session + session_data.update(msg) + elif mesg_num == 20: # record + records.append(msg) + elif mesg_num == 19: # lap + laps.append(msg) + stream = Stream.from_file(filepath) decoder = Decoder(stream) - - messages, errors = decoder.read( + decoder.read( apply_scale_and_offset=True, convert_datetimes_to_dates=True, convert_types_to_strings=True, @@ -58,72 +87,109 @@ def parse_fit_file(filepath: str) -> dict: expand_sub_fields=True, expand_components=True, merge_heart_rates=True, + mesg_listener=listener, ) - # SDK returns camelCase keys - sessions = messages.get("session", [{}]) - session = sessions[0] if sessions else {} - records = messages.get("record", []) - laps = messages.get("lap", []) + # The SDK may return field names in camelCase or snake_case depending on version. + # Try both. Also handle raw timestamp integers for start_time. + def get(d, *keys): + for k in keys: + v = d.get(k) + if v is not None: + return v + return None - sport = str(session.get("sport", "generic")).lower() + sport_raw = str(get(session_data, "sport", "Sport") or "generic").lower() sport_map = { "running": "running", "cycling": "cycling", "hiking": "hiking", "walking": "walking", "generic": "other", "trail_running": "running", "e_biking": "cycling", "open_water_swimming": "other", } - sport_type = sport_map.get(sport, sport) + sport_type = sport_map.get(sport_raw, sport_raw) - start_time = _ensure_utc(session.get("startTime")) + # start_time — SDK may return datetime or raw int + start_time_raw = get(session_data, "startTime", "start_time") + start_time = _to_dt(start_time_raw) + # Position fields — the SDK may or may not convert semicircles. + # Check if values look like semicircles (>= 90 for lat) and convert if so. + def get_lat(d): + v = get(d, "positionLat", "position_lat") + if v is None: + return None + fv = _safe_float(v) + if fv is None: + return None + # If absolute value > 90, it's semicircles + if abs(fv) > 90: + fv = fv * SEMICIRCLES_TO_DEG + return fv if _is_valid_lat(fv) else None + + def get_lon(d): + v = get(d, "positionLong", "position_long") + if v is None: + return None + fv = _safe_float(v) + if fv is None: + return None + if abs(fv) > 180: + fv = fv * SEMICIRCLES_TO_DEG + return fv if _is_valid_lon(fv) else None + + # Build GPS track coords = [] for r in records: - lat = r.get("positionLat") - lon = r.get("positionLong") + lat = get_lat(r) + lon = get_lon(r) if lat is not None and lon is not None: - if -90 <= lat <= 90 and -180 <= lon <= 180: - coords.append((lat, lon)) + coords.append((lat, lon)) encoded_polyline = polyline_lib.encode(coords) if coords else None bounding_box = _bounding_box(coords) + # Normalize data points normalized_points = [] for r in records: - ts = _ensure_utc(r.get("timestamp")) - lat = r.get("positionLat") - lon = r.get("positionLong") + ts = _to_dt(get(r, "timestamp")) + lat = get_lat(r) + lon = get_lon(r) - if lat is not None and not (-90 <= lat <= 90): - lat = None - if lon is not None and not (-180 <= lon <= 180): - lon = None + altitude = get(r, "altitude", "enhancedAltitude", "enhanced_altitude") + hr = get(r, "heartRate", "heart_rate") + cadence = get(r, "cadence") + speed = get(r, "speed", "enhancedSpeed", "enhanced_speed") + power = get(r, "power") + temp = get(r, "temperature") + distance = get(r, "distance") normalized_points.append({ "timestamp": ts.isoformat() if ts else None, "latitude": _safe_float(lat), "longitude": _safe_float(lon), - "altitude_m": _safe_float(r.get("altitude") or r.get("enhancedAltitude")), - "heart_rate": _safe_float(r.get("heartRate")), - "cadence": _safe_float(r.get("cadence")), - "speed_ms": _safe_float(r.get("speed") or r.get("enhancedSpeed")), - "power": _safe_float(r.get("power")), - "temperature_c": _safe_float(r.get("temperature")), - "distance_m": _safe_float(r.get("distance")), + "altitude_m": _safe_float(altitude), + "heart_rate": _safe_float(hr), + "cadence": _safe_float(cadence), + "speed_ms": _safe_float(speed), + "power": _safe_float(power), + "temperature_c": _safe_float(temp), + "distance_m": _safe_float(distance), }) + # Normalize laps normalized_laps = [] for i, lap in enumerate(laps): - ls = _ensure_utc(lap.get("startTime")) + ls = _to_dt(get(lap, "startTime", "start_time")) normalized_laps.append({ "lap_number": i + 1, "start_time": ls.isoformat() if ls else None, - "duration_s": _safe_float(lap.get("totalElapsedTime")), - "distance_m": _safe_float(lap.get("totalDistance")), - "avg_heart_rate": _safe_float(lap.get("avgHeartRate")), - "avg_cadence": _safe_float(lap.get("avgCadence")), - "avg_speed_ms": _safe_float(lap.get("avgSpeed") or lap.get("enhancedAvgSpeed")), - "avg_power": _safe_float(lap.get("avgPower")), + "duration_s": _safe_float(get(lap, "totalElapsedTime", "total_elapsed_time")), + "distance_m": _safe_float(get(lap, "totalDistance", "total_distance")), + "avg_heart_rate": _safe_float(get(lap, "avgHeartRate", "avg_heart_rate")), + "avg_cadence": _safe_float(get(lap, "avgCadence", "avg_cadence")), + "avg_speed_ms": _safe_float(get(lap, "avgSpeed", "avg_speed", + "enhancedAvgSpeed", "enhanced_avg_speed")), + "avg_power": _safe_float(get(lap, "avgPower", "avg_power")), }) name = sport_type.title() @@ -134,21 +200,25 @@ def parse_fit_file(filepath: str) -> dict: "name": name, "sport_type": sport_type, "start_time": start_time.isoformat() if start_time else None, - "distance_m": _safe_float(session.get("totalDistance")), - "duration_s": _safe_float(session.get("totalElapsedTime")), - "elevation_gain_m": _safe_float(session.get("totalAscent")), - "elevation_loss_m": _safe_float(session.get("totalDescent")), - "avg_heart_rate": _safe_float(session.get("avgHeartRate")), - "max_heart_rate": _safe_float(session.get("maxHeartRate")), - "avg_cadence": _safe_float(session.get("avgCadence")), - "avg_power": _safe_float(session.get("avgPower")), - "normalized_power": _safe_float(session.get("normalizedPower")), - "avg_speed_ms": _safe_float(session.get("avgSpeed") or session.get("enhancedAvgSpeed")), - "max_speed_ms": _safe_float(session.get("maxSpeed") or session.get("enhancedMaxSpeed")), - "avg_temperature_c": _safe_float(session.get("avgTemperature")), - "calories": _safe_float(session.get("totalCalories")), - "training_stress_score": _safe_float(session.get("trainingStressScore")), - "vo2max_estimate": _safe_float(session.get("totalTrainingEffect")), + "distance_m": _safe_float(get(session_data, "totalDistance", "total_distance")), + "duration_s": _safe_float(get(session_data, "totalElapsedTime", "total_elapsed_time")), + "elevation_gain_m": _safe_float(get(session_data, "totalAscent", "total_ascent")), + "elevation_loss_m": _safe_float(get(session_data, "totalDescent", "total_descent")), + "avg_heart_rate": _safe_float(get(session_data, "avgHeartRate", "avg_heart_rate")), + "max_heart_rate": _safe_float(get(session_data, "maxHeartRate", "max_heart_rate")), + "avg_cadence": _safe_float(get(session_data, "avgCadence", "avg_cadence")), + "avg_power": _safe_float(get(session_data, "avgPower", "avg_power")), + "normalized_power": _safe_float(get(session_data, "normalizedPower", "normalized_power")), + "avg_speed_ms": _safe_float(get(session_data, "avgSpeed", "avg_speed", + "enhancedAvgSpeed", "enhanced_avg_speed")), + "max_speed_ms": _safe_float(get(session_data, "maxSpeed", "max_speed", + "enhancedMaxSpeed", "enhanced_max_speed")), + "avg_temperature_c": _safe_float(get(session_data, "avgTemperature", "avg_temperature")), + "calories": _safe_float(get(session_data, "totalCalories", "total_calories")), + "training_stress_score": _safe_float(get(session_data, "trainingStressScore", + "training_stress_score")), + "vo2max_estimate": _safe_float(get(session_data, "totalTrainingEffect", + "total_training_effect")), "polyline": encoded_polyline, "bounding_box": bounding_box, "source_type": "fit", @@ -158,7 +228,6 @@ def parse_fit_file(filepath: str) -> dict: def parse_gpx_file(filepath: str) -> dict: - """Parse a GPX file.""" with open(filepath) as f: gpx = gpxpy.parse(f) @@ -172,7 +241,6 @@ def parse_gpx_file(filepath: str) -> dict: ts = pt.time if ts and ts.tzinfo is None: ts = ts.replace(tzinfo=timezone.utc) - extensions = {} if pt.extensions: for ext in pt.extensions: @@ -182,11 +250,9 @@ def parse_gpx_file(filepath: str) -> dict: extensions[tag] = float(child.text) except (ValueError, TypeError): pass - data_points.append({ "timestamp": ts.isoformat() if ts else None, - "latitude": pt.latitude, - "longitude": pt.longitude, + "latitude": pt.latitude, "longitude": pt.longitude, "altitude_m": pt.elevation, "heart_rate": extensions.get("hr"), "cadence": extensions.get("cad"), @@ -196,8 +262,7 @@ def parse_gpx_file(filepath: str) -> dict: "distance_m": None, }) - coords = [(p["latitude"], p["longitude"]) for p in data_points - if p["latitude"] and p["longitude"]] + coords = [(p["latitude"], p["longitude"]) for p in data_points if p["latitude"] and p["longitude"]] encoded_polyline = polyline_lib.encode(coords) if coords else None bounding_box = _bounding_box(coords) @@ -206,7 +271,12 @@ def parse_gpx_file(filepath: str) -> dict: for p in data_points: if p["latitude"] and p["longitude"]: if prev: - total_dist += haversine_distance(prev[0], prev[1], p["latitude"], p["longitude"]) + R = 6371000 + phi1, phi2 = math.radians(prev[0]), math.radians(p["latitude"]) + dphi = math.radians(p["latitude"] - prev[0]) + dlam = math.radians(p["longitude"] - prev[1]) + a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2 + total_dist += 2 * R * math.asin(math.sqrt(a)) prev = (p["latitude"], p["longitude"]) p["distance_m"] = total_dist @@ -214,58 +284,39 @@ def parse_gpx_file(filepath: str) -> dict: alts = [p["altitude_m"] for p in data_points if p["altitude_m"]] for i in range(1, len(alts)): diff = alts[i] - alts[i-1] - if diff > 0: - uphill += diff - else: - downhill += abs(diff) + if diff > 0: uphill += diff + else: downhill += abs(diff) hrs = [p["heart_rate"] for p in data_points if p["heart_rate"]] start_time_str = data_points[0]["timestamp"] if data_points else None start_dt = datetime.fromisoformat(start_time_str) if start_time_str else None end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None - - sport = "running" - if track.type: - sport = track.type.lower() + sport = track.type.lower() if track.type else "running" return { "name": track.name or gpx.name or f"Activity {start_dt.date() if start_dt else ''}", - "sport_type": sport, - "start_time": start_time_str, - "distance_m": total_dist, - "duration_s": duration, - "elevation_gain_m": uphill, - "elevation_loss_m": downhill, + "sport_type": sport, "start_time": start_time_str, + "distance_m": total_dist, "duration_s": duration, + "elevation_gain_m": uphill, "elevation_loss_m": downhill, "avg_heart_rate": (sum(hrs) / len(hrs)) if hrs else None, "max_heart_rate": max(hrs) if hrs else None, - "avg_cadence": None, - "avg_power": None, - "normalized_power": None, + "avg_cadence": None, "avg_power": None, "normalized_power": None, "avg_speed_ms": (total_dist / duration) if (total_dist and duration) else None, - "max_speed_ms": None, - "avg_temperature_c": None, - "calories": None, - "training_stress_score": None, - "vo2max_estimate": None, - "polyline": encoded_polyline, - "bounding_box": bounding_box, - "source_type": "gpx", - "data_points": data_points, - "laps": [], + "max_speed_ms": None, "avg_temperature_c": None, "calories": None, + "training_stress_score": None, "vo2max_estimate": None, + "polyline": encoded_polyline, "bounding_box": bounding_box, + "source_type": "gpx", "data_points": data_points, "laps": [], } def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict: - """Calculate % time in each HR zone using user's configured max HR.""" if not user_max_hr or user_max_hr < 100: return {} - zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01] zone_keys = ["z1", "z2", "z3", "z4", "z5"] zones = {k: 0 for k in zone_keys} total = 0 - for p in data_points: hr = p.get("heart_rate") if not hr or hr < 20: @@ -278,7 +329,6 @@ def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict: break else: zones["z5"] += 1 - if total: return {k: round(v / total * 100, 1) for k, v in zones.items()} return {} \ No newline at end of file