Fix FIT parser - handle raw timestamps and semicircle auto-detection
Build and push images / validate (push) Successful in 3s
Build and push images / build-backend (push) Successful in 6s
Build and push images / build-worker (push) Successful in 5s
Build and push images / build-frontend (push) Successful in 5s

This commit is contained in:
2026-06-06 19:32:34 +01:00
parent 0fd3ff7414
commit ed4ab0eff8
+156 -106
View File
@@ -1,22 +1,30 @@
""" """
FIT and GPX file parser using the official Garmin FIT Python SDK. FIT and GPX file parser.
Field names from the SDK are camelCase as per the SDK documentation. Parses FIT files directly using the Garmin SDK but applies manual
scale conversion for fields where the SDK doesn't auto-convert.
""" """
import math import math
import struct
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional from typing import Optional
import gpxpy import gpxpy
import polyline as polyline_lib import polyline as polyline_lib
from garmin_fit_sdk import Decoder, Stream from garmin_fit_sdk import Decoder, Stream
FIT_EPOCH_S = 631065600
SEMICIRCLES_TO_DEG = 180.0 / (2 ** 31)
def haversine_distance(lat1, lon1, lat2, lon2) -> float:
R = 6371000 def _semicircles_to_deg(val):
phi1, phi2 = math.radians(lat1), math.radians(lat2) if val is None:
dphi = math.radians(lat2 - lat1) return None
dlam = math.radians(lon2 - lon1) try:
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2 result = float(val) * SEMICIRCLES_TO_DEG
return 2 * R * math.asin(math.sqrt(a)) if -90 <= result <= 90 or -180 <= result <= 180:
return result
except (TypeError, ValueError):
pass
return None
def _safe_float(val) -> Optional[float]: def _safe_float(val) -> Optional[float]:
@@ -26,7 +34,7 @@ def _safe_float(val) -> Optional[float]:
return None return None
def _bounding_box(coords: list) -> Optional[dict]: def _bounding_box(coords):
if not coords: if not coords:
return None return None
lats = [c[0] for c in coords] lats = [c[0] for c in coords]
@@ -35,22 +43,43 @@ def _bounding_box(coords: list) -> Optional[dict]:
"min_lon": min(lons), "max_lon": max(lons)} "min_lon": min(lons), "max_lon": max(lons)}
def _ensure_utc(dt) -> Optional[datetime]: def _to_dt(val) -> Optional[datetime]:
if dt is None: if val is None:
return None return None
if isinstance(dt, datetime): if isinstance(val, datetime):
if dt.tzinfo is None: return val.replace(tzinfo=timezone.utc) if val.tzinfo is None else val
return dt.replace(tzinfo=timezone.utc) if isinstance(val, (int, float)):
return dt try:
return datetime.fromtimestamp(int(val) + FIT_EPOCH_S, tz=timezone.utc)
except (OSError, OverflowError, ValueError):
return None
return None return None
def _is_valid_lat(v):
return v is not None and -90 <= v <= 90
def _is_valid_lon(v):
return v is not None and -180 <= v <= 180
def parse_fit_file(filepath: str) -> dict: def parse_fit_file(filepath: str) -> dict:
"""Parse a Garmin .fit activity file using the official Garmin SDK.""" session_data = {}
records = []
laps = []
def listener(mesg_num: int, msg: dict):
if mesg_num == 18: # session
session_data.update(msg)
elif mesg_num == 20: # record
records.append(msg)
elif mesg_num == 19: # lap
laps.append(msg)
stream = Stream.from_file(filepath) stream = Stream.from_file(filepath)
decoder = Decoder(stream) decoder = Decoder(stream)
decoder.read(
messages, errors = decoder.read(
apply_scale_and_offset=True, apply_scale_and_offset=True,
convert_datetimes_to_dates=True, convert_datetimes_to_dates=True,
convert_types_to_strings=True, convert_types_to_strings=True,
@@ -58,72 +87,109 @@ def parse_fit_file(filepath: str) -> dict:
expand_sub_fields=True, expand_sub_fields=True,
expand_components=True, expand_components=True,
merge_heart_rates=True, merge_heart_rates=True,
mesg_listener=listener,
) )
# SDK returns camelCase keys # The SDK may return field names in camelCase or snake_case depending on version.
sessions = messages.get("session", [{}]) # Try both. Also handle raw timestamp integers for start_time.
session = sessions[0] if sessions else {} def get(d, *keys):
records = messages.get("record", []) for k in keys:
laps = messages.get("lap", []) v = d.get(k)
if v is not None:
return v
return None
sport = str(session.get("sport", "generic")).lower() sport_raw = str(get(session_data, "sport", "Sport") or "generic").lower()
sport_map = { sport_map = {
"running": "running", "cycling": "cycling", "running": "running", "cycling": "cycling",
"hiking": "hiking", "walking": "walking", "hiking": "hiking", "walking": "walking",
"generic": "other", "trail_running": "running", "generic": "other", "trail_running": "running",
"e_biking": "cycling", "open_water_swimming": "other", "e_biking": "cycling", "open_water_swimming": "other",
} }
sport_type = sport_map.get(sport, sport) sport_type = sport_map.get(sport_raw, sport_raw)
start_time = _ensure_utc(session.get("startTime")) # start_time — SDK may return datetime or raw int
start_time_raw = get(session_data, "startTime", "start_time")
start_time = _to_dt(start_time_raw)
# Position fields — the SDK may or may not convert semicircles.
# Check if values look like semicircles (>= 90 for lat) and convert if so.
def get_lat(d):
v = get(d, "positionLat", "position_lat")
if v is None:
return None
fv = _safe_float(v)
if fv is None:
return None
# If absolute value > 90, it's semicircles
if abs(fv) > 90:
fv = fv * SEMICIRCLES_TO_DEG
return fv if _is_valid_lat(fv) else None
def get_lon(d):
v = get(d, "positionLong", "position_long")
if v is None:
return None
fv = _safe_float(v)
if fv is None:
return None
if abs(fv) > 180:
fv = fv * SEMICIRCLES_TO_DEG
return fv if _is_valid_lon(fv) else None
# Build GPS track
coords = [] coords = []
for r in records: for r in records:
lat = r.get("positionLat") lat = get_lat(r)
lon = r.get("positionLong") lon = get_lon(r)
if lat is not None and lon is not None: if lat is not None and lon is not None:
if -90 <= lat <= 90 and -180 <= lon <= 180: coords.append((lat, lon))
coords.append((lat, lon))
encoded_polyline = polyline_lib.encode(coords) if coords else None encoded_polyline = polyline_lib.encode(coords) if coords else None
bounding_box = _bounding_box(coords) bounding_box = _bounding_box(coords)
# Normalize data points
normalized_points = [] normalized_points = []
for r in records: for r in records:
ts = _ensure_utc(r.get("timestamp")) ts = _to_dt(get(r, "timestamp"))
lat = r.get("positionLat") lat = get_lat(r)
lon = r.get("positionLong") lon = get_lon(r)
if lat is not None and not (-90 <= lat <= 90): altitude = get(r, "altitude", "enhancedAltitude", "enhanced_altitude")
lat = None hr = get(r, "heartRate", "heart_rate")
if lon is not None and not (-180 <= lon <= 180): cadence = get(r, "cadence")
lon = None speed = get(r, "speed", "enhancedSpeed", "enhanced_speed")
power = get(r, "power")
temp = get(r, "temperature")
distance = get(r, "distance")
normalized_points.append({ normalized_points.append({
"timestamp": ts.isoformat() if ts else None, "timestamp": ts.isoformat() if ts else None,
"latitude": _safe_float(lat), "latitude": _safe_float(lat),
"longitude": _safe_float(lon), "longitude": _safe_float(lon),
"altitude_m": _safe_float(r.get("altitude") or r.get("enhancedAltitude")), "altitude_m": _safe_float(altitude),
"heart_rate": _safe_float(r.get("heartRate")), "heart_rate": _safe_float(hr),
"cadence": _safe_float(r.get("cadence")), "cadence": _safe_float(cadence),
"speed_ms": _safe_float(r.get("speed") or r.get("enhancedSpeed")), "speed_ms": _safe_float(speed),
"power": _safe_float(r.get("power")), "power": _safe_float(power),
"temperature_c": _safe_float(r.get("temperature")), "temperature_c": _safe_float(temp),
"distance_m": _safe_float(r.get("distance")), "distance_m": _safe_float(distance),
}) })
# Normalize laps
normalized_laps = [] normalized_laps = []
for i, lap in enumerate(laps): for i, lap in enumerate(laps):
ls = _ensure_utc(lap.get("startTime")) ls = _to_dt(get(lap, "startTime", "start_time"))
normalized_laps.append({ normalized_laps.append({
"lap_number": i + 1, "lap_number": i + 1,
"start_time": ls.isoformat() if ls else None, "start_time": ls.isoformat() if ls else None,
"duration_s": _safe_float(lap.get("totalElapsedTime")), "duration_s": _safe_float(get(lap, "totalElapsedTime", "total_elapsed_time")),
"distance_m": _safe_float(lap.get("totalDistance")), "distance_m": _safe_float(get(lap, "totalDistance", "total_distance")),
"avg_heart_rate": _safe_float(lap.get("avgHeartRate")), "avg_heart_rate": _safe_float(get(lap, "avgHeartRate", "avg_heart_rate")),
"avg_cadence": _safe_float(lap.get("avgCadence")), "avg_cadence": _safe_float(get(lap, "avgCadence", "avg_cadence")),
"avg_speed_ms": _safe_float(lap.get("avgSpeed") or lap.get("enhancedAvgSpeed")), "avg_speed_ms": _safe_float(get(lap, "avgSpeed", "avg_speed",
"avg_power": _safe_float(lap.get("avgPower")), "enhancedAvgSpeed", "enhanced_avg_speed")),
"avg_power": _safe_float(get(lap, "avgPower", "avg_power")),
}) })
name = sport_type.title() name = sport_type.title()
@@ -134,21 +200,25 @@ def parse_fit_file(filepath: str) -> dict:
"name": name, "name": name,
"sport_type": sport_type, "sport_type": sport_type,
"start_time": start_time.isoformat() if start_time else None, "start_time": start_time.isoformat() if start_time else None,
"distance_m": _safe_float(session.get("totalDistance")), "distance_m": _safe_float(get(session_data, "totalDistance", "total_distance")),
"duration_s": _safe_float(session.get("totalElapsedTime")), "duration_s": _safe_float(get(session_data, "totalElapsedTime", "total_elapsed_time")),
"elevation_gain_m": _safe_float(session.get("totalAscent")), "elevation_gain_m": _safe_float(get(session_data, "totalAscent", "total_ascent")),
"elevation_loss_m": _safe_float(session.get("totalDescent")), "elevation_loss_m": _safe_float(get(session_data, "totalDescent", "total_descent")),
"avg_heart_rate": _safe_float(session.get("avgHeartRate")), "avg_heart_rate": _safe_float(get(session_data, "avgHeartRate", "avg_heart_rate")),
"max_heart_rate": _safe_float(session.get("maxHeartRate")), "max_heart_rate": _safe_float(get(session_data, "maxHeartRate", "max_heart_rate")),
"avg_cadence": _safe_float(session.get("avgCadence")), "avg_cadence": _safe_float(get(session_data, "avgCadence", "avg_cadence")),
"avg_power": _safe_float(session.get("avgPower")), "avg_power": _safe_float(get(session_data, "avgPower", "avg_power")),
"normalized_power": _safe_float(session.get("normalizedPower")), "normalized_power": _safe_float(get(session_data, "normalizedPower", "normalized_power")),
"avg_speed_ms": _safe_float(session.get("avgSpeed") or session.get("enhancedAvgSpeed")), "avg_speed_ms": _safe_float(get(session_data, "avgSpeed", "avg_speed",
"max_speed_ms": _safe_float(session.get("maxSpeed") or session.get("enhancedMaxSpeed")), "enhancedAvgSpeed", "enhanced_avg_speed")),
"avg_temperature_c": _safe_float(session.get("avgTemperature")), "max_speed_ms": _safe_float(get(session_data, "maxSpeed", "max_speed",
"calories": _safe_float(session.get("totalCalories")), "enhancedMaxSpeed", "enhanced_max_speed")),
"training_stress_score": _safe_float(session.get("trainingStressScore")), "avg_temperature_c": _safe_float(get(session_data, "avgTemperature", "avg_temperature")),
"vo2max_estimate": _safe_float(session.get("totalTrainingEffect")), "calories": _safe_float(get(session_data, "totalCalories", "total_calories")),
"training_stress_score": _safe_float(get(session_data, "trainingStressScore",
"training_stress_score")),
"vo2max_estimate": _safe_float(get(session_data, "totalTrainingEffect",
"total_training_effect")),
"polyline": encoded_polyline, "polyline": encoded_polyline,
"bounding_box": bounding_box, "bounding_box": bounding_box,
"source_type": "fit", "source_type": "fit",
@@ -158,7 +228,6 @@ def parse_fit_file(filepath: str) -> dict:
def parse_gpx_file(filepath: str) -> dict: def parse_gpx_file(filepath: str) -> dict:
"""Parse a GPX file."""
with open(filepath) as f: with open(filepath) as f:
gpx = gpxpy.parse(f) gpx = gpxpy.parse(f)
@@ -172,7 +241,6 @@ def parse_gpx_file(filepath: str) -> dict:
ts = pt.time ts = pt.time
if ts and ts.tzinfo is None: if ts and ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc) ts = ts.replace(tzinfo=timezone.utc)
extensions = {} extensions = {}
if pt.extensions: if pt.extensions:
for ext in pt.extensions: for ext in pt.extensions:
@@ -182,11 +250,9 @@ def parse_gpx_file(filepath: str) -> dict:
extensions[tag] = float(child.text) extensions[tag] = float(child.text)
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
data_points.append({ data_points.append({
"timestamp": ts.isoformat() if ts else None, "timestamp": ts.isoformat() if ts else None,
"latitude": pt.latitude, "latitude": pt.latitude, "longitude": pt.longitude,
"longitude": pt.longitude,
"altitude_m": pt.elevation, "altitude_m": pt.elevation,
"heart_rate": extensions.get("hr"), "heart_rate": extensions.get("hr"),
"cadence": extensions.get("cad"), "cadence": extensions.get("cad"),
@@ -196,8 +262,7 @@ def parse_gpx_file(filepath: str) -> dict:
"distance_m": None, "distance_m": None,
}) })
coords = [(p["latitude"], p["longitude"]) for p in data_points coords = [(p["latitude"], p["longitude"]) for p in data_points if p["latitude"] and p["longitude"]]
if p["latitude"] and p["longitude"]]
encoded_polyline = polyline_lib.encode(coords) if coords else None encoded_polyline = polyline_lib.encode(coords) if coords else None
bounding_box = _bounding_box(coords) bounding_box = _bounding_box(coords)
@@ -206,7 +271,12 @@ def parse_gpx_file(filepath: str) -> dict:
for p in data_points: for p in data_points:
if p["latitude"] and p["longitude"]: if p["latitude"] and p["longitude"]:
if prev: if prev:
total_dist += haversine_distance(prev[0], prev[1], p["latitude"], p["longitude"]) R = 6371000
phi1, phi2 = math.radians(prev[0]), math.radians(p["latitude"])
dphi = math.radians(p["latitude"] - prev[0])
dlam = math.radians(p["longitude"] - prev[1])
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2
total_dist += 2 * R * math.asin(math.sqrt(a))
prev = (p["latitude"], p["longitude"]) prev = (p["latitude"], p["longitude"])
p["distance_m"] = total_dist p["distance_m"] = total_dist
@@ -214,58 +284,39 @@ def parse_gpx_file(filepath: str) -> dict:
alts = [p["altitude_m"] for p in data_points if p["altitude_m"]] alts = [p["altitude_m"] for p in data_points if p["altitude_m"]]
for i in range(1, len(alts)): for i in range(1, len(alts)):
diff = alts[i] - alts[i-1] diff = alts[i] - alts[i-1]
if diff > 0: if diff > 0: uphill += diff
uphill += diff else: downhill += abs(diff)
else:
downhill += abs(diff)
hrs = [p["heart_rate"] for p in data_points if p["heart_rate"]] hrs = [p["heart_rate"] for p in data_points if p["heart_rate"]]
start_time_str = data_points[0]["timestamp"] if data_points else None start_time_str = data_points[0]["timestamp"] if data_points else None
start_dt = datetime.fromisoformat(start_time_str) if start_time_str else None start_dt = datetime.fromisoformat(start_time_str) if start_time_str else None
end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None
duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None
sport = track.type.lower() if track.type else "running"
sport = "running"
if track.type:
sport = track.type.lower()
return { return {
"name": track.name or gpx.name or f"Activity {start_dt.date() if start_dt else ''}", "name": track.name or gpx.name or f"Activity {start_dt.date() if start_dt else ''}",
"sport_type": sport, "sport_type": sport, "start_time": start_time_str,
"start_time": start_time_str, "distance_m": total_dist, "duration_s": duration,
"distance_m": total_dist, "elevation_gain_m": uphill, "elevation_loss_m": downhill,
"duration_s": duration,
"elevation_gain_m": uphill,
"elevation_loss_m": downhill,
"avg_heart_rate": (sum(hrs) / len(hrs)) if hrs else None, "avg_heart_rate": (sum(hrs) / len(hrs)) if hrs else None,
"max_heart_rate": max(hrs) if hrs else None, "max_heart_rate": max(hrs) if hrs else None,
"avg_cadence": None, "avg_cadence": None, "avg_power": None, "normalized_power": None,
"avg_power": None,
"normalized_power": None,
"avg_speed_ms": (total_dist / duration) if (total_dist and duration) else None, "avg_speed_ms": (total_dist / duration) if (total_dist and duration) else None,
"max_speed_ms": None, "max_speed_ms": None, "avg_temperature_c": None, "calories": None,
"avg_temperature_c": None, "training_stress_score": None, "vo2max_estimate": None,
"calories": None, "polyline": encoded_polyline, "bounding_box": bounding_box,
"training_stress_score": None, "source_type": "gpx", "data_points": data_points, "laps": [],
"vo2max_estimate": None,
"polyline": encoded_polyline,
"bounding_box": bounding_box,
"source_type": "gpx",
"data_points": data_points,
"laps": [],
} }
def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict: def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
"""Calculate % time in each HR zone using user's configured max HR."""
if not user_max_hr or user_max_hr < 100: if not user_max_hr or user_max_hr < 100:
return {} return {}
zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01] zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01]
zone_keys = ["z1", "z2", "z3", "z4", "z5"] zone_keys = ["z1", "z2", "z3", "z4", "z5"]
zones = {k: 0 for k in zone_keys} zones = {k: 0 for k in zone_keys}
total = 0 total = 0
for p in data_points: for p in data_points:
hr = p.get("heart_rate") hr = p.get("heart_rate")
if not hr or hr < 20: if not hr or hr < 20:
@@ -278,7 +329,6 @@ def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
break break
else: else:
zones["z5"] += 1 zones["z5"] += 1
if total: if total:
return {k: round(v / total * 100, 1) for k, v in zones.items()} return {k: round(v / total * 100, 1) for k, v in zones.items()}
return {} return {}