All tweaks added
Build and push images / build-backend (push) Successful in 33s
Build and push images / build-worker (push) Successful in 32s
Build and push images / build-frontend (push) Failing after 6s

This commit is contained in:
2026-06-06 18:10:35 +01:00
parent 043b3b7269
commit ec5a01d12a
92 changed files with 7517 additions and 784 deletions
+108 -142
View File
@@ -1,21 +1,24 @@
"""
Parses Garmin .fit files and GPX files into normalized activity data.
Handles full Strava and Garmin data export archives.
FIT and GPX file parser using:
- Official Garmin FIT Python SDK (garmin-fit-sdk) for .fit files
- gpxpy for .gpx files
The official SDK correctly handles scale/offset, component expansion,
semicircle-to-degree conversion, and HR message merging.
"""
import os
import zipfile
import json
import math
from pathlib import Path
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from typing import Optional
import fitparse
import gpxpy
import polyline as polyline_lib
FIT_EPOCH_S = 631065600
def haversine_distance(lat1, lon1, lat2, lon2) -> float:
"""Returns distance in metres between two GPS points."""
"""Distance in metres between two GPS points."""
R = 6371000
phi1, phi2 = math.radians(lat1), math.radians(lat2)
dphi = math.radians(lat2 - lat1)
@@ -24,106 +27,100 @@ def haversine_distance(lat1, lon1, lat2, lon2) -> float:
return 2 * R * math.asin(math.sqrt(a))
def semicircles_to_degrees(sc: int) -> float:
return sc * (180 / 2**31)
def _safe_float(val) -> Optional[float]:
try:
return float(val) if val is not None else None
except (TypeError, ValueError):
return None
def _bounding_box(coords: list) -> Optional[dict]:
if not coords:
return None
lats = [c[0] for c in coords]
lons = [c[1] for c in coords]
return {"min_lat": min(lats), "max_lat": max(lats),
"min_lon": min(lons), "max_lon": max(lons)}
def parse_fit_file(filepath: str) -> dict:
"""Parse a Garmin .fit file and return normalized activity dict."""
fit = fitparse.FitFile(filepath)
"""Parse a Garmin .fit activity file using the official Garmin SDK."""
from garmin_fit_sdk import Decoder, Stream
data_points = []
laps = []
session = {}
records = []
laps = []
for record in fit.get_messages():
name = record.name
def listener(mesg_num: int, msg: dict):
nonlocal session
if mesg_num == 18: # session
session = msg
elif mesg_num == 20: # record
records.append(msg)
elif mesg_num == 19: # lap
laps.append(msg)
if name == "session":
for f in record:
session[f.name] = f.value
stream = Stream.from_file(filepath)
decoder = Decoder(stream)
decoder.read(
apply_scale_and_offset=True,
convert_datetimes_to_dates=True,
convert_types_to_strings=True,
enable_crc_check=False,
expand_sub_fields=True,
expand_components=True,
merge_heart_rates=True,
mesg_listener=listener,
)
elif name == "lap":
lap = {}
for f in record:
lap[f.name] = f.value
laps.append(lap)
elif name == "record":
point = {}
for f in record:
point[f.name] = f.value
if point:
# Convert semicircles to degrees
if "position_lat" in point and point["position_lat"] is not None:
point["position_lat"] = semicircles_to_degrees(point["position_lat"])
if "position_long" in point and point["position_long"] is not None:
point["position_long"] = semicircles_to_degrees(point["position_long"])
data_points.append(point)
# Build normalized output
# Map sport type
sport = str(session.get("sport", "generic")).lower()
sport_map = {
"running": "running", "cycling": "cycling", "swimming": "swimming",
"hiking": "hiking", "walking": "walking", "generic": "other",
"open_water_swimming": "swimming", "trail_running": "running",
"e_biking": "cycling",
}
sport_type = sport_map.get(sport, sport)
start_time = session.get("start_time")
if start_time and start_time.tzinfo is None:
if isinstance(start_time, datetime) and start_time.tzinfo is None:
start_time = start_time.replace(tzinfo=timezone.utc)
# Build GPS track for polyline
# Build GPS track
coords = [
(p["position_lat"], p["position_long"])
for p in data_points
if p.get("position_lat") is not None and p.get("position_long") is not None
(r["position_lat"], r["position_long"])
for r in records
if r.get("position_lat") is not None and r.get("position_long") is not None
]
encoded_polyline = polyline_lib.encode(coords) if coords else None
bounding_box = _bounding_box(coords)
# Calculate cumulative distance if not in FIT
cumulative_dist = 0.0
prev_lat, prev_lon = None, None
# Normalize data points
normalized_points = []
for p in data_points:
ts = p.get("timestamp")
if ts and ts.tzinfo is None:
for r in records:
ts = r.get("timestamp")
if isinstance(ts, datetime) and ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
lat = p.get("position_lat")
lon = p.get("position_long")
dist = p.get("distance")
if dist is None and lat and lon and prev_lat and prev_lon:
cumulative_dist += haversine_distance(prev_lat, prev_lon, lat, lon)
dist = cumulative_dist
elif dist is not None:
cumulative_dist = float(dist)
if lat and lon:
prev_lat, prev_lon = lat, lon
normalized_points.append({
"timestamp": ts.isoformat() if ts else None,
"latitude": lat,
"longitude": lon,
"altitude_m": p.get("altitude"),
"heart_rate": p.get("heart_rate"),
"cadence": p.get("cadence"),
"speed_ms": p.get("speed"),
"power": p.get("power"),
"temperature_c": p.get("temperature"),
"distance_m": dist,
"latitude": r.get("position_lat"),
"longitude": r.get("position_long"),
"altitude_m": r.get("altitude") or r.get("enhanced_altitude"),
"heart_rate": r.get("heart_rate"),
"cadence": r.get("cadence") or r.get("fractional_cadence"),
"speed_ms": r.get("speed") or r.get("enhanced_speed"),
"power": r.get("power"),
"temperature_c": r.get("temperature"),
"distance_m": r.get("distance"),
})
# Parse laps
# Normalize laps
normalized_laps = []
for i, lap in enumerate(laps):
ls = lap.get("start_time")
if ls and ls.tzinfo is None:
if isinstance(ls, datetime) and ls.tzinfo is None:
ls = ls.replace(tzinfo=timezone.utc)
normalized_laps.append({
"lap_number": i + 1,
@@ -132,13 +129,17 @@ def parse_fit_file(filepath: str) -> dict:
"distance_m": _safe_float(lap.get("total_distance")),
"avg_heart_rate": _safe_float(lap.get("avg_heart_rate")),
"avg_cadence": _safe_float(lap.get("avg_cadence")),
"avg_speed_ms": _safe_float(lap.get("avg_speed")),
"avg_speed_ms": _safe_float(lap.get("avg_speed") or lap.get("enhanced_avg_speed")),
"avg_power": _safe_float(lap.get("avg_power")),
})
# Build activity name
name = session.get("sport", "Activity").title()
if start_time:
name += " " + start_time.strftime("%Y-%m-%d")
return {
"name": session.get("sport", "Activity").title() + " " + (
start_time.strftime("%Y-%m-%d") if start_time else ""),
"name": name,
"sport_type": sport_type,
"start_time": start_time.isoformat() if start_time else None,
"distance_m": _safe_float(session.get("total_distance")),
@@ -150,12 +151,12 @@ def parse_fit_file(filepath: str) -> dict:
"avg_cadence": _safe_float(session.get("avg_cadence")),
"avg_power": _safe_float(session.get("avg_power")),
"normalized_power": _safe_float(session.get("normalized_power")),
"avg_speed_ms": _safe_float(session.get("avg_speed")),
"max_speed_ms": _safe_float(session.get("max_speed")),
"avg_speed_ms": _safe_float(session.get("avg_speed") or session.get("enhanced_avg_speed")),
"max_speed_ms": _safe_float(session.get("max_speed") or session.get("enhanced_max_speed")),
"avg_temperature_c": _safe_float(session.get("avg_temperature")),
"calories": _safe_float(session.get("total_calories")),
"training_stress_score": _safe_float(session.get("training_stress_score")),
"vo2max_estimate": _safe_float(session.get("estimated_sweat_loss")), # varies by device
"vo2max_estimate": _safe_float(session.get("total_training_effect")),
"polyline": encoded_polyline,
"bounding_box": bounding_box,
"source_type": "fit",
@@ -165,13 +166,12 @@ def parse_fit_file(filepath: str) -> dict:
def parse_gpx_file(filepath: str) -> dict:
"""Parse a GPX file into normalized activity dict."""
"""Parse a GPX file."""
with open(filepath) as f:
gpx = gpxpy.parse(f)
data_points = []
track = gpx.tracks[0] if gpx.tracks else None
if not track:
raise ValueError("No tracks found in GPX file")
@@ -204,7 +204,6 @@ def parse_gpx_file(filepath: str) -> dict:
"distance_m": None,
})
# Calculate distance and elevation
coords = [(p["latitude"], p["longitude"]) for p in data_points
if p["latitude"] and p["longitude"]]
encoded_polyline = polyline_lib.encode(coords) if coords else None
@@ -220,6 +219,7 @@ def parse_gpx_file(filepath: str) -> dict:
prev = (p["latitude"], p["longitude"])
p["distance_m"] = total_dist
# Elevation gain/loss
uphill, downhill = 0.0, 0.0
alts = [p["altitude_m"] for p in data_points if p["altitude_m"]]
for i in range(1, len(alts)):
@@ -235,7 +235,7 @@ def parse_gpx_file(filepath: str) -> dict:
end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None
duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None
sport = "running" # GPX doesn't always include sport; default to running
sport = "running"
if track.type:
sport = track.type.lower()
@@ -266,76 +266,42 @@ def parse_gpx_file(filepath: str) -> dict:
}
def parse_strava_export(export_dir: str) -> list[dict]:
def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
"""
Parse a full Strava data export directory.
Structure: activities.csv + activities/ folder with .gpx/.fit.gz files
Calculate % time in each HR zone using the user's configured max HR.
Zones follow the standard 5-zone model as % of max HR:
Z1 Recovery: < 60%
Z2 Base: 60 - 70%
Z3 Tempo: 70 - 80%
Z4 Threshold: 80 - 90%
Z5 Max: > 90%
user_max_hr should be the user's actual physiological max HR, NOT the
highest HR recorded in this activity. Using activity max shifts all zones
upward and makes easy runs look harder than they are.
"""
activities = []
activities_dir = Path(export_dir) / "activities"
if not activities_dir.exists():
return activities
for fname in sorted(activities_dir.iterdir()):
if fname.suffix in (".fit", ".gpx"):
try:
if fname.suffix == ".fit":
act = parse_fit_file(str(fname))
else:
act = parse_gpx_file(str(fname))
act["source_type"] = "strava_" + fname.suffix[1:]
activities.append(act)
except Exception as e:
print(f"Error parsing {fname}: {e}")
return activities
def calculate_hr_zones(data_points: list[dict], max_hr: float) -> dict:
"""Calculate percentage of time spent in each HR zone."""
if not max_hr:
if not user_max_hr or user_max_hr < 100:
return {}
zones = {"z1": 0, "z2": 0, "z3": 0, "z4": 0, "z5": 0}
zone_bounds = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01]
zone_keys = ["z1", "z2", "z3", "z4", "z5"]
zones = {k: 0 for k in zone_keys}
total = 0
for p in data_points:
hr = p.get("heart_rate")
if not hr:
if not hr or hr < 20:
continue
pct = hr / max_hr
pct = hr / user_max_hr
total += 1
if pct < zone_bounds[1]:
zones["z1"] += 1
elif pct < zone_bounds[2]:
zones["z2"] += 1
elif pct < zone_bounds[3]:
zones["z3"] += 1
elif pct < zone_bounds[4]:
zones["z4"] += 1
for i, key in enumerate(zone_keys):
if zone_bounds[i] <= pct < zone_bounds[i+1]:
zones[key] += 1
break
else:
zones["z5"] += 1
zones["z5"] += 1 # anything above 90% goes to z5
if total:
return {k: round(v / total * 100, 1) for k, v in zones.items()}
return {}
def _safe_float(val) -> Optional[float]:
try:
return float(val) if val is not None else None
except (TypeError, ValueError):
return None
def _bounding_box(coords: list[tuple]) -> Optional[dict]:
if not coords:
return None
lats = [c[0] for c in coords]
lons = [c[1] for c in coords]
return {
"min_lat": min(lats), "max_lat": max(lats),
"min_lon": min(lons), "max_lon": max(lons),
}
+1 -1
View File
@@ -306,4 +306,4 @@ def parse_wellness_fit(file_path: str) -> dict:
"sleep_awake_s": sleep_awake_s,
}
return {"days": result, "error": None}
return {"days": result, "error": None}