Fix FIT parser - handle raw timestamps and semicircle auto-detection
Build and push images / validate (push) Successful in 3s
Build and push images / build-backend (push) Successful in 6s
Build and push images / build-worker (push) Successful in 5s
Build and push images / build-frontend (push) Successful in 5s

This commit is contained in:
2026-06-06 19:32:34 +01:00
parent 0fd3ff7414
commit ed4ab0eff8
+155 -105
View File
@@ -1,22 +1,30 @@
"""
FIT and GPX file parser using the official Garmin FIT Python SDK.
Field names from the SDK are camelCase as per the SDK documentation.
FIT and GPX file parser.
Parses FIT files directly using the Garmin SDK but applies manual
scale conversion for fields where the SDK doesn't auto-convert.
"""
import math
import struct
from datetime import datetime, timezone
from typing import Optional
import gpxpy
import polyline as polyline_lib
from garmin_fit_sdk import Decoder, Stream
FIT_EPOCH_S = 631065600
SEMICIRCLES_TO_DEG = 180.0 / (2 ** 31)
def haversine_distance(lat1, lon1, lat2, lon2) -> float:
R = 6371000
phi1, phi2 = math.radians(lat1), math.radians(lat2)
dphi = math.radians(lat2 - lat1)
dlam = math.radians(lon2 - lon1)
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2
return 2 * R * math.asin(math.sqrt(a))
def _semicircles_to_deg(val):
if val is None:
return None
try:
result = float(val) * SEMICIRCLES_TO_DEG
if -90 <= result <= 90 or -180 <= result <= 180:
return result
except (TypeError, ValueError):
pass
return None
def _safe_float(val) -> Optional[float]:
@@ -26,7 +34,7 @@ def _safe_float(val) -> Optional[float]:
return None
def _bounding_box(coords: list) -> Optional[dict]:
def _bounding_box(coords):
if not coords:
return None
lats = [c[0] for c in coords]
@@ -35,22 +43,43 @@ def _bounding_box(coords: list) -> Optional[dict]:
"min_lon": min(lons), "max_lon": max(lons)}
def _ensure_utc(dt) -> Optional[datetime]:
if dt is None:
def _to_dt(val) -> Optional[datetime]:
if val is None:
return None
if isinstance(dt, datetime):
if dt.tzinfo is None:
return dt.replace(tzinfo=timezone.utc)
return dt
if isinstance(val, datetime):
return val.replace(tzinfo=timezone.utc) if val.tzinfo is None else val
if isinstance(val, (int, float)):
try:
return datetime.fromtimestamp(int(val) + FIT_EPOCH_S, tz=timezone.utc)
except (OSError, OverflowError, ValueError):
return None
return None
def _is_valid_lat(v):
return v is not None and -90 <= v <= 90
def _is_valid_lon(v):
return v is not None and -180 <= v <= 180
def parse_fit_file(filepath: str) -> dict:
"""Parse a Garmin .fit activity file using the official Garmin SDK."""
session_data = {}
records = []
laps = []
def listener(mesg_num: int, msg: dict):
if mesg_num == 18: # session
session_data.update(msg)
elif mesg_num == 20: # record
records.append(msg)
elif mesg_num == 19: # lap
laps.append(msg)
stream = Stream.from_file(filepath)
decoder = Decoder(stream)
messages, errors = decoder.read(
decoder.read(
apply_scale_and_offset=True,
convert_datetimes_to_dates=True,
convert_types_to_strings=True,
@@ -58,72 +87,109 @@ def parse_fit_file(filepath: str) -> dict:
expand_sub_fields=True,
expand_components=True,
merge_heart_rates=True,
mesg_listener=listener,
)
# SDK returns camelCase keys
sessions = messages.get("session", [{}])
session = sessions[0] if sessions else {}
records = messages.get("record", [])
laps = messages.get("lap", [])
# The SDK may return field names in camelCase or snake_case depending on version.
# Try both. Also handle raw timestamp integers for start_time.
def get(d, *keys):
for k in keys:
v = d.get(k)
if v is not None:
return v
return None
sport = str(session.get("sport", "generic")).lower()
sport_raw = str(get(session_data, "sport", "Sport") or "generic").lower()
sport_map = {
"running": "running", "cycling": "cycling",
"hiking": "hiking", "walking": "walking",
"generic": "other", "trail_running": "running",
"e_biking": "cycling", "open_water_swimming": "other",
}
sport_type = sport_map.get(sport, sport)
sport_type = sport_map.get(sport_raw, sport_raw)
start_time = _ensure_utc(session.get("startTime"))
# start_time — SDK may return datetime or raw int
start_time_raw = get(session_data, "startTime", "start_time")
start_time = _to_dt(start_time_raw)
# Position fields — the SDK may or may not convert semicircles.
# Check if values look like semicircles (>= 90 for lat) and convert if so.
def get_lat(d):
v = get(d, "positionLat", "position_lat")
if v is None:
return None
fv = _safe_float(v)
if fv is None:
return None
# If absolute value > 90, it's semicircles
if abs(fv) > 90:
fv = fv * SEMICIRCLES_TO_DEG
return fv if _is_valid_lat(fv) else None
def get_lon(d):
v = get(d, "positionLong", "position_long")
if v is None:
return None
fv = _safe_float(v)
if fv is None:
return None
if abs(fv) > 180:
fv = fv * SEMICIRCLES_TO_DEG
return fv if _is_valid_lon(fv) else None
# Build GPS track
coords = []
for r in records:
lat = r.get("positionLat")
lon = r.get("positionLong")
lat = get_lat(r)
lon = get_lon(r)
if lat is not None and lon is not None:
if -90 <= lat <= 90 and -180 <= lon <= 180:
coords.append((lat, lon))
encoded_polyline = polyline_lib.encode(coords) if coords else None
bounding_box = _bounding_box(coords)
# Normalize data points
normalized_points = []
for r in records:
ts = _ensure_utc(r.get("timestamp"))
lat = r.get("positionLat")
lon = r.get("positionLong")
ts = _to_dt(get(r, "timestamp"))
lat = get_lat(r)
lon = get_lon(r)
if lat is not None and not (-90 <= lat <= 90):
lat = None
if lon is not None and not (-180 <= lon <= 180):
lon = None
altitude = get(r, "altitude", "enhancedAltitude", "enhanced_altitude")
hr = get(r, "heartRate", "heart_rate")
cadence = get(r, "cadence")
speed = get(r, "speed", "enhancedSpeed", "enhanced_speed")
power = get(r, "power")
temp = get(r, "temperature")
distance = get(r, "distance")
normalized_points.append({
"timestamp": ts.isoformat() if ts else None,
"latitude": _safe_float(lat),
"longitude": _safe_float(lon),
"altitude_m": _safe_float(r.get("altitude") or r.get("enhancedAltitude")),
"heart_rate": _safe_float(r.get("heartRate")),
"cadence": _safe_float(r.get("cadence")),
"speed_ms": _safe_float(r.get("speed") or r.get("enhancedSpeed")),
"power": _safe_float(r.get("power")),
"temperature_c": _safe_float(r.get("temperature")),
"distance_m": _safe_float(r.get("distance")),
"altitude_m": _safe_float(altitude),
"heart_rate": _safe_float(hr),
"cadence": _safe_float(cadence),
"speed_ms": _safe_float(speed),
"power": _safe_float(power),
"temperature_c": _safe_float(temp),
"distance_m": _safe_float(distance),
})
# Normalize laps
normalized_laps = []
for i, lap in enumerate(laps):
ls = _ensure_utc(lap.get("startTime"))
ls = _to_dt(get(lap, "startTime", "start_time"))
normalized_laps.append({
"lap_number": i + 1,
"start_time": ls.isoformat() if ls else None,
"duration_s": _safe_float(lap.get("totalElapsedTime")),
"distance_m": _safe_float(lap.get("totalDistance")),
"avg_heart_rate": _safe_float(lap.get("avgHeartRate")),
"avg_cadence": _safe_float(lap.get("avgCadence")),
"avg_speed_ms": _safe_float(lap.get("avgSpeed") or lap.get("enhancedAvgSpeed")),
"avg_power": _safe_float(lap.get("avgPower")),
"duration_s": _safe_float(get(lap, "totalElapsedTime", "total_elapsed_time")),
"distance_m": _safe_float(get(lap, "totalDistance", "total_distance")),
"avg_heart_rate": _safe_float(get(lap, "avgHeartRate", "avg_heart_rate")),
"avg_cadence": _safe_float(get(lap, "avgCadence", "avg_cadence")),
"avg_speed_ms": _safe_float(get(lap, "avgSpeed", "avg_speed",
"enhancedAvgSpeed", "enhanced_avg_speed")),
"avg_power": _safe_float(get(lap, "avgPower", "avg_power")),
})
name = sport_type.title()
@@ -134,21 +200,25 @@ def parse_fit_file(filepath: str) -> dict:
"name": name,
"sport_type": sport_type,
"start_time": start_time.isoformat() if start_time else None,
"distance_m": _safe_float(session.get("totalDistance")),
"duration_s": _safe_float(session.get("totalElapsedTime")),
"elevation_gain_m": _safe_float(session.get("totalAscent")),
"elevation_loss_m": _safe_float(session.get("totalDescent")),
"avg_heart_rate": _safe_float(session.get("avgHeartRate")),
"max_heart_rate": _safe_float(session.get("maxHeartRate")),
"avg_cadence": _safe_float(session.get("avgCadence")),
"avg_power": _safe_float(session.get("avgPower")),
"normalized_power": _safe_float(session.get("normalizedPower")),
"avg_speed_ms": _safe_float(session.get("avgSpeed") or session.get("enhancedAvgSpeed")),
"max_speed_ms": _safe_float(session.get("maxSpeed") or session.get("enhancedMaxSpeed")),
"avg_temperature_c": _safe_float(session.get("avgTemperature")),
"calories": _safe_float(session.get("totalCalories")),
"training_stress_score": _safe_float(session.get("trainingStressScore")),
"vo2max_estimate": _safe_float(session.get("totalTrainingEffect")),
"distance_m": _safe_float(get(session_data, "totalDistance", "total_distance")),
"duration_s": _safe_float(get(session_data, "totalElapsedTime", "total_elapsed_time")),
"elevation_gain_m": _safe_float(get(session_data, "totalAscent", "total_ascent")),
"elevation_loss_m": _safe_float(get(session_data, "totalDescent", "total_descent")),
"avg_heart_rate": _safe_float(get(session_data, "avgHeartRate", "avg_heart_rate")),
"max_heart_rate": _safe_float(get(session_data, "maxHeartRate", "max_heart_rate")),
"avg_cadence": _safe_float(get(session_data, "avgCadence", "avg_cadence")),
"avg_power": _safe_float(get(session_data, "avgPower", "avg_power")),
"normalized_power": _safe_float(get(session_data, "normalizedPower", "normalized_power")),
"avg_speed_ms": _safe_float(get(session_data, "avgSpeed", "avg_speed",
"enhancedAvgSpeed", "enhanced_avg_speed")),
"max_speed_ms": _safe_float(get(session_data, "maxSpeed", "max_speed",
"enhancedMaxSpeed", "enhanced_max_speed")),
"avg_temperature_c": _safe_float(get(session_data, "avgTemperature", "avg_temperature")),
"calories": _safe_float(get(session_data, "totalCalories", "total_calories")),
"training_stress_score": _safe_float(get(session_data, "trainingStressScore",
"training_stress_score")),
"vo2max_estimate": _safe_float(get(session_data, "totalTrainingEffect",
"total_training_effect")),
"polyline": encoded_polyline,
"bounding_box": bounding_box,
"source_type": "fit",
@@ -158,7 +228,6 @@ def parse_fit_file(filepath: str) -> dict:
def parse_gpx_file(filepath: str) -> dict:
"""Parse a GPX file."""
with open(filepath) as f:
gpx = gpxpy.parse(f)
@@ -172,7 +241,6 @@ def parse_gpx_file(filepath: str) -> dict:
ts = pt.time
if ts and ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
extensions = {}
if pt.extensions:
for ext in pt.extensions:
@@ -182,11 +250,9 @@ def parse_gpx_file(filepath: str) -> dict:
extensions[tag] = float(child.text)
except (ValueError, TypeError):
pass
data_points.append({
"timestamp": ts.isoformat() if ts else None,
"latitude": pt.latitude,
"longitude": pt.longitude,
"latitude": pt.latitude, "longitude": pt.longitude,
"altitude_m": pt.elevation,
"heart_rate": extensions.get("hr"),
"cadence": extensions.get("cad"),
@@ -196,8 +262,7 @@ def parse_gpx_file(filepath: str) -> dict:
"distance_m": None,
})
coords = [(p["latitude"], p["longitude"]) for p in data_points
if p["latitude"] and p["longitude"]]
coords = [(p["latitude"], p["longitude"]) for p in data_points if p["latitude"] and p["longitude"]]
encoded_polyline = polyline_lib.encode(coords) if coords else None
bounding_box = _bounding_box(coords)
@@ -206,7 +271,12 @@ def parse_gpx_file(filepath: str) -> dict:
for p in data_points:
if p["latitude"] and p["longitude"]:
if prev:
total_dist += haversine_distance(prev[0], prev[1], p["latitude"], p["longitude"])
R = 6371000
phi1, phi2 = math.radians(prev[0]), math.radians(p["latitude"])
dphi = math.radians(p["latitude"] - prev[0])
dlam = math.radians(p["longitude"] - prev[1])
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2
total_dist += 2 * R * math.asin(math.sqrt(a))
prev = (p["latitude"], p["longitude"])
p["distance_m"] = total_dist
@@ -214,58 +284,39 @@ def parse_gpx_file(filepath: str) -> dict:
alts = [p["altitude_m"] for p in data_points if p["altitude_m"]]
for i in range(1, len(alts)):
diff = alts[i] - alts[i-1]
if diff > 0:
uphill += diff
else:
downhill += abs(diff)
if diff > 0: uphill += diff
else: downhill += abs(diff)
hrs = [p["heart_rate"] for p in data_points if p["heart_rate"]]
start_time_str = data_points[0]["timestamp"] if data_points else None
start_dt = datetime.fromisoformat(start_time_str) if start_time_str else None
end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None
duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None
sport = "running"
if track.type:
sport = track.type.lower()
sport = track.type.lower() if track.type else "running"
return {
"name": track.name or gpx.name or f"Activity {start_dt.date() if start_dt else ''}",
"sport_type": sport,
"start_time": start_time_str,
"distance_m": total_dist,
"duration_s": duration,
"elevation_gain_m": uphill,
"elevation_loss_m": downhill,
"sport_type": sport, "start_time": start_time_str,
"distance_m": total_dist, "duration_s": duration,
"elevation_gain_m": uphill, "elevation_loss_m": downhill,
"avg_heart_rate": (sum(hrs) / len(hrs)) if hrs else None,
"max_heart_rate": max(hrs) if hrs else None,
"avg_cadence": None,
"avg_power": None,
"normalized_power": None,
"avg_cadence": None, "avg_power": None, "normalized_power": None,
"avg_speed_ms": (total_dist / duration) if (total_dist and duration) else None,
"max_speed_ms": None,
"avg_temperature_c": None,
"calories": None,
"training_stress_score": None,
"vo2max_estimate": None,
"polyline": encoded_polyline,
"bounding_box": bounding_box,
"source_type": "gpx",
"data_points": data_points,
"laps": [],
"max_speed_ms": None, "avg_temperature_c": None, "calories": None,
"training_stress_score": None, "vo2max_estimate": None,
"polyline": encoded_polyline, "bounding_box": bounding_box,
"source_type": "gpx", "data_points": data_points, "laps": [],
}
def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
"""Calculate % time in each HR zone using user's configured max HR."""
if not user_max_hr or user_max_hr < 100:
return {}
zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01]
zone_keys = ["z1", "z2", "z3", "z4", "z5"]
zones = {k: 0 for k in zone_keys}
total = 0
for p in data_points:
hr = p.get("heart_rate")
if not hr or hr < 20:
@@ -278,7 +329,6 @@ def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
break
else:
zones["z5"] += 1
if total:
return {k: round(v / total * 100, 1) for k, v in zones.items()}
return {}