Fix FIT parser - handle raw timestamps and semicircle auto-detection
This commit is contained in:
+155
-105
@@ -1,22 +1,30 @@
|
||||
"""
|
||||
FIT and GPX file parser using the official Garmin FIT Python SDK.
|
||||
Field names from the SDK are camelCase as per the SDK documentation.
|
||||
FIT and GPX file parser.
|
||||
Parses FIT files directly using the Garmin SDK but applies manual
|
||||
scale conversion for fields where the SDK doesn't auto-convert.
|
||||
"""
|
||||
import math
|
||||
import struct
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
import gpxpy
|
||||
import polyline as polyline_lib
|
||||
from garmin_fit_sdk import Decoder, Stream
|
||||
|
||||
FIT_EPOCH_S = 631065600
|
||||
SEMICIRCLES_TO_DEG = 180.0 / (2 ** 31)
|
||||
|
||||
def haversine_distance(lat1, lon1, lat2, lon2) -> float:
|
||||
R = 6371000
|
||||
phi1, phi2 = math.radians(lat1), math.radians(lat2)
|
||||
dphi = math.radians(lat2 - lat1)
|
||||
dlam = math.radians(lon2 - lon1)
|
||||
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2
|
||||
return 2 * R * math.asin(math.sqrt(a))
|
||||
|
||||
def _semicircles_to_deg(val):
|
||||
if val is None:
|
||||
return None
|
||||
try:
|
||||
result = float(val) * SEMICIRCLES_TO_DEG
|
||||
if -90 <= result <= 90 or -180 <= result <= 180:
|
||||
return result
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _safe_float(val) -> Optional[float]:
|
||||
@@ -26,7 +34,7 @@ def _safe_float(val) -> Optional[float]:
|
||||
return None
|
||||
|
||||
|
||||
def _bounding_box(coords: list) -> Optional[dict]:
|
||||
def _bounding_box(coords):
|
||||
if not coords:
|
||||
return None
|
||||
lats = [c[0] for c in coords]
|
||||
@@ -35,22 +43,43 @@ def _bounding_box(coords: list) -> Optional[dict]:
|
||||
"min_lon": min(lons), "max_lon": max(lons)}
|
||||
|
||||
|
||||
def _ensure_utc(dt) -> Optional[datetime]:
|
||||
if dt is None:
|
||||
def _to_dt(val) -> Optional[datetime]:
|
||||
if val is None:
|
||||
return None
|
||||
if isinstance(dt, datetime):
|
||||
if dt.tzinfo is None:
|
||||
return dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
if isinstance(val, datetime):
|
||||
return val.replace(tzinfo=timezone.utc) if val.tzinfo is None else val
|
||||
if isinstance(val, (int, float)):
|
||||
try:
|
||||
return datetime.fromtimestamp(int(val) + FIT_EPOCH_S, tz=timezone.utc)
|
||||
except (OSError, OverflowError, ValueError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _is_valid_lat(v):
|
||||
return v is not None and -90 <= v <= 90
|
||||
|
||||
|
||||
def _is_valid_lon(v):
|
||||
return v is not None and -180 <= v <= 180
|
||||
|
||||
|
||||
def parse_fit_file(filepath: str) -> dict:
|
||||
"""Parse a Garmin .fit activity file using the official Garmin SDK."""
|
||||
session_data = {}
|
||||
records = []
|
||||
laps = []
|
||||
|
||||
def listener(mesg_num: int, msg: dict):
|
||||
if mesg_num == 18: # session
|
||||
session_data.update(msg)
|
||||
elif mesg_num == 20: # record
|
||||
records.append(msg)
|
||||
elif mesg_num == 19: # lap
|
||||
laps.append(msg)
|
||||
|
||||
stream = Stream.from_file(filepath)
|
||||
decoder = Decoder(stream)
|
||||
|
||||
messages, errors = decoder.read(
|
||||
decoder.read(
|
||||
apply_scale_and_offset=True,
|
||||
convert_datetimes_to_dates=True,
|
||||
convert_types_to_strings=True,
|
||||
@@ -58,72 +87,109 @@ def parse_fit_file(filepath: str) -> dict:
|
||||
expand_sub_fields=True,
|
||||
expand_components=True,
|
||||
merge_heart_rates=True,
|
||||
mesg_listener=listener,
|
||||
)
|
||||
|
||||
# SDK returns camelCase keys
|
||||
sessions = messages.get("session", [{}])
|
||||
session = sessions[0] if sessions else {}
|
||||
records = messages.get("record", [])
|
||||
laps = messages.get("lap", [])
|
||||
# The SDK may return field names in camelCase or snake_case depending on version.
|
||||
# Try both. Also handle raw timestamp integers for start_time.
|
||||
def get(d, *keys):
|
||||
for k in keys:
|
||||
v = d.get(k)
|
||||
if v is not None:
|
||||
return v
|
||||
return None
|
||||
|
||||
sport = str(session.get("sport", "generic")).lower()
|
||||
sport_raw = str(get(session_data, "sport", "Sport") or "generic").lower()
|
||||
sport_map = {
|
||||
"running": "running", "cycling": "cycling",
|
||||
"hiking": "hiking", "walking": "walking",
|
||||
"generic": "other", "trail_running": "running",
|
||||
"e_biking": "cycling", "open_water_swimming": "other",
|
||||
}
|
||||
sport_type = sport_map.get(sport, sport)
|
||||
sport_type = sport_map.get(sport_raw, sport_raw)
|
||||
|
||||
start_time = _ensure_utc(session.get("startTime"))
|
||||
# start_time — SDK may return datetime or raw int
|
||||
start_time_raw = get(session_data, "startTime", "start_time")
|
||||
start_time = _to_dt(start_time_raw)
|
||||
|
||||
# Position fields — the SDK may or may not convert semicircles.
|
||||
# Check if values look like semicircles (>= 90 for lat) and convert if so.
|
||||
def get_lat(d):
|
||||
v = get(d, "positionLat", "position_lat")
|
||||
if v is None:
|
||||
return None
|
||||
fv = _safe_float(v)
|
||||
if fv is None:
|
||||
return None
|
||||
# If absolute value > 90, it's semicircles
|
||||
if abs(fv) > 90:
|
||||
fv = fv * SEMICIRCLES_TO_DEG
|
||||
return fv if _is_valid_lat(fv) else None
|
||||
|
||||
def get_lon(d):
|
||||
v = get(d, "positionLong", "position_long")
|
||||
if v is None:
|
||||
return None
|
||||
fv = _safe_float(v)
|
||||
if fv is None:
|
||||
return None
|
||||
if abs(fv) > 180:
|
||||
fv = fv * SEMICIRCLES_TO_DEG
|
||||
return fv if _is_valid_lon(fv) else None
|
||||
|
||||
# Build GPS track
|
||||
coords = []
|
||||
for r in records:
|
||||
lat = r.get("positionLat")
|
||||
lon = r.get("positionLong")
|
||||
lat = get_lat(r)
|
||||
lon = get_lon(r)
|
||||
if lat is not None and lon is not None:
|
||||
if -90 <= lat <= 90 and -180 <= lon <= 180:
|
||||
coords.append((lat, lon))
|
||||
|
||||
encoded_polyline = polyline_lib.encode(coords) if coords else None
|
||||
bounding_box = _bounding_box(coords)
|
||||
|
||||
# Normalize data points
|
||||
normalized_points = []
|
||||
for r in records:
|
||||
ts = _ensure_utc(r.get("timestamp"))
|
||||
lat = r.get("positionLat")
|
||||
lon = r.get("positionLong")
|
||||
ts = _to_dt(get(r, "timestamp"))
|
||||
lat = get_lat(r)
|
||||
lon = get_lon(r)
|
||||
|
||||
if lat is not None and not (-90 <= lat <= 90):
|
||||
lat = None
|
||||
if lon is not None and not (-180 <= lon <= 180):
|
||||
lon = None
|
||||
altitude = get(r, "altitude", "enhancedAltitude", "enhanced_altitude")
|
||||
hr = get(r, "heartRate", "heart_rate")
|
||||
cadence = get(r, "cadence")
|
||||
speed = get(r, "speed", "enhancedSpeed", "enhanced_speed")
|
||||
power = get(r, "power")
|
||||
temp = get(r, "temperature")
|
||||
distance = get(r, "distance")
|
||||
|
||||
normalized_points.append({
|
||||
"timestamp": ts.isoformat() if ts else None,
|
||||
"latitude": _safe_float(lat),
|
||||
"longitude": _safe_float(lon),
|
||||
"altitude_m": _safe_float(r.get("altitude") or r.get("enhancedAltitude")),
|
||||
"heart_rate": _safe_float(r.get("heartRate")),
|
||||
"cadence": _safe_float(r.get("cadence")),
|
||||
"speed_ms": _safe_float(r.get("speed") or r.get("enhancedSpeed")),
|
||||
"power": _safe_float(r.get("power")),
|
||||
"temperature_c": _safe_float(r.get("temperature")),
|
||||
"distance_m": _safe_float(r.get("distance")),
|
||||
"altitude_m": _safe_float(altitude),
|
||||
"heart_rate": _safe_float(hr),
|
||||
"cadence": _safe_float(cadence),
|
||||
"speed_ms": _safe_float(speed),
|
||||
"power": _safe_float(power),
|
||||
"temperature_c": _safe_float(temp),
|
||||
"distance_m": _safe_float(distance),
|
||||
})
|
||||
|
||||
# Normalize laps
|
||||
normalized_laps = []
|
||||
for i, lap in enumerate(laps):
|
||||
ls = _ensure_utc(lap.get("startTime"))
|
||||
ls = _to_dt(get(lap, "startTime", "start_time"))
|
||||
normalized_laps.append({
|
||||
"lap_number": i + 1,
|
||||
"start_time": ls.isoformat() if ls else None,
|
||||
"duration_s": _safe_float(lap.get("totalElapsedTime")),
|
||||
"distance_m": _safe_float(lap.get("totalDistance")),
|
||||
"avg_heart_rate": _safe_float(lap.get("avgHeartRate")),
|
||||
"avg_cadence": _safe_float(lap.get("avgCadence")),
|
||||
"avg_speed_ms": _safe_float(lap.get("avgSpeed") or lap.get("enhancedAvgSpeed")),
|
||||
"avg_power": _safe_float(lap.get("avgPower")),
|
||||
"duration_s": _safe_float(get(lap, "totalElapsedTime", "total_elapsed_time")),
|
||||
"distance_m": _safe_float(get(lap, "totalDistance", "total_distance")),
|
||||
"avg_heart_rate": _safe_float(get(lap, "avgHeartRate", "avg_heart_rate")),
|
||||
"avg_cadence": _safe_float(get(lap, "avgCadence", "avg_cadence")),
|
||||
"avg_speed_ms": _safe_float(get(lap, "avgSpeed", "avg_speed",
|
||||
"enhancedAvgSpeed", "enhanced_avg_speed")),
|
||||
"avg_power": _safe_float(get(lap, "avgPower", "avg_power")),
|
||||
})
|
||||
|
||||
name = sport_type.title()
|
||||
@@ -134,21 +200,25 @@ def parse_fit_file(filepath: str) -> dict:
|
||||
"name": name,
|
||||
"sport_type": sport_type,
|
||||
"start_time": start_time.isoformat() if start_time else None,
|
||||
"distance_m": _safe_float(session.get("totalDistance")),
|
||||
"duration_s": _safe_float(session.get("totalElapsedTime")),
|
||||
"elevation_gain_m": _safe_float(session.get("totalAscent")),
|
||||
"elevation_loss_m": _safe_float(session.get("totalDescent")),
|
||||
"avg_heart_rate": _safe_float(session.get("avgHeartRate")),
|
||||
"max_heart_rate": _safe_float(session.get("maxHeartRate")),
|
||||
"avg_cadence": _safe_float(session.get("avgCadence")),
|
||||
"avg_power": _safe_float(session.get("avgPower")),
|
||||
"normalized_power": _safe_float(session.get("normalizedPower")),
|
||||
"avg_speed_ms": _safe_float(session.get("avgSpeed") or session.get("enhancedAvgSpeed")),
|
||||
"max_speed_ms": _safe_float(session.get("maxSpeed") or session.get("enhancedMaxSpeed")),
|
||||
"avg_temperature_c": _safe_float(session.get("avgTemperature")),
|
||||
"calories": _safe_float(session.get("totalCalories")),
|
||||
"training_stress_score": _safe_float(session.get("trainingStressScore")),
|
||||
"vo2max_estimate": _safe_float(session.get("totalTrainingEffect")),
|
||||
"distance_m": _safe_float(get(session_data, "totalDistance", "total_distance")),
|
||||
"duration_s": _safe_float(get(session_data, "totalElapsedTime", "total_elapsed_time")),
|
||||
"elevation_gain_m": _safe_float(get(session_data, "totalAscent", "total_ascent")),
|
||||
"elevation_loss_m": _safe_float(get(session_data, "totalDescent", "total_descent")),
|
||||
"avg_heart_rate": _safe_float(get(session_data, "avgHeartRate", "avg_heart_rate")),
|
||||
"max_heart_rate": _safe_float(get(session_data, "maxHeartRate", "max_heart_rate")),
|
||||
"avg_cadence": _safe_float(get(session_data, "avgCadence", "avg_cadence")),
|
||||
"avg_power": _safe_float(get(session_data, "avgPower", "avg_power")),
|
||||
"normalized_power": _safe_float(get(session_data, "normalizedPower", "normalized_power")),
|
||||
"avg_speed_ms": _safe_float(get(session_data, "avgSpeed", "avg_speed",
|
||||
"enhancedAvgSpeed", "enhanced_avg_speed")),
|
||||
"max_speed_ms": _safe_float(get(session_data, "maxSpeed", "max_speed",
|
||||
"enhancedMaxSpeed", "enhanced_max_speed")),
|
||||
"avg_temperature_c": _safe_float(get(session_data, "avgTemperature", "avg_temperature")),
|
||||
"calories": _safe_float(get(session_data, "totalCalories", "total_calories")),
|
||||
"training_stress_score": _safe_float(get(session_data, "trainingStressScore",
|
||||
"training_stress_score")),
|
||||
"vo2max_estimate": _safe_float(get(session_data, "totalTrainingEffect",
|
||||
"total_training_effect")),
|
||||
"polyline": encoded_polyline,
|
||||
"bounding_box": bounding_box,
|
||||
"source_type": "fit",
|
||||
@@ -158,7 +228,6 @@ def parse_fit_file(filepath: str) -> dict:
|
||||
|
||||
|
||||
def parse_gpx_file(filepath: str) -> dict:
|
||||
"""Parse a GPX file."""
|
||||
with open(filepath) as f:
|
||||
gpx = gpxpy.parse(f)
|
||||
|
||||
@@ -172,7 +241,6 @@ def parse_gpx_file(filepath: str) -> dict:
|
||||
ts = pt.time
|
||||
if ts and ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
|
||||
extensions = {}
|
||||
if pt.extensions:
|
||||
for ext in pt.extensions:
|
||||
@@ -182,11 +250,9 @@ def parse_gpx_file(filepath: str) -> dict:
|
||||
extensions[tag] = float(child.text)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
data_points.append({
|
||||
"timestamp": ts.isoformat() if ts else None,
|
||||
"latitude": pt.latitude,
|
||||
"longitude": pt.longitude,
|
||||
"latitude": pt.latitude, "longitude": pt.longitude,
|
||||
"altitude_m": pt.elevation,
|
||||
"heart_rate": extensions.get("hr"),
|
||||
"cadence": extensions.get("cad"),
|
||||
@@ -196,8 +262,7 @@ def parse_gpx_file(filepath: str) -> dict:
|
||||
"distance_m": None,
|
||||
})
|
||||
|
||||
coords = [(p["latitude"], p["longitude"]) for p in data_points
|
||||
if p["latitude"] and p["longitude"]]
|
||||
coords = [(p["latitude"], p["longitude"]) for p in data_points if p["latitude"] and p["longitude"]]
|
||||
encoded_polyline = polyline_lib.encode(coords) if coords else None
|
||||
bounding_box = _bounding_box(coords)
|
||||
|
||||
@@ -206,7 +271,12 @@ def parse_gpx_file(filepath: str) -> dict:
|
||||
for p in data_points:
|
||||
if p["latitude"] and p["longitude"]:
|
||||
if prev:
|
||||
total_dist += haversine_distance(prev[0], prev[1], p["latitude"], p["longitude"])
|
||||
R = 6371000
|
||||
phi1, phi2 = math.radians(prev[0]), math.radians(p["latitude"])
|
||||
dphi = math.radians(p["latitude"] - prev[0])
|
||||
dlam = math.radians(p["longitude"] - prev[1])
|
||||
a = math.sin(dphi/2)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlam/2)**2
|
||||
total_dist += 2 * R * math.asin(math.sqrt(a))
|
||||
prev = (p["latitude"], p["longitude"])
|
||||
p["distance_m"] = total_dist
|
||||
|
||||
@@ -214,58 +284,39 @@ def parse_gpx_file(filepath: str) -> dict:
|
||||
alts = [p["altitude_m"] for p in data_points if p["altitude_m"]]
|
||||
for i in range(1, len(alts)):
|
||||
diff = alts[i] - alts[i-1]
|
||||
if diff > 0:
|
||||
uphill += diff
|
||||
else:
|
||||
downhill += abs(diff)
|
||||
if diff > 0: uphill += diff
|
||||
else: downhill += abs(diff)
|
||||
|
||||
hrs = [p["heart_rate"] for p in data_points if p["heart_rate"]]
|
||||
start_time_str = data_points[0]["timestamp"] if data_points else None
|
||||
start_dt = datetime.fromisoformat(start_time_str) if start_time_str else None
|
||||
end_dt = datetime.fromisoformat(data_points[-1]["timestamp"]) if data_points else None
|
||||
duration = (end_dt - start_dt).total_seconds() if (start_dt and end_dt) else None
|
||||
|
||||
sport = "running"
|
||||
if track.type:
|
||||
sport = track.type.lower()
|
||||
sport = track.type.lower() if track.type else "running"
|
||||
|
||||
return {
|
||||
"name": track.name or gpx.name or f"Activity {start_dt.date() if start_dt else ''}",
|
||||
"sport_type": sport,
|
||||
"start_time": start_time_str,
|
||||
"distance_m": total_dist,
|
||||
"duration_s": duration,
|
||||
"elevation_gain_m": uphill,
|
||||
"elevation_loss_m": downhill,
|
||||
"sport_type": sport, "start_time": start_time_str,
|
||||
"distance_m": total_dist, "duration_s": duration,
|
||||
"elevation_gain_m": uphill, "elevation_loss_m": downhill,
|
||||
"avg_heart_rate": (sum(hrs) / len(hrs)) if hrs else None,
|
||||
"max_heart_rate": max(hrs) if hrs else None,
|
||||
"avg_cadence": None,
|
||||
"avg_power": None,
|
||||
"normalized_power": None,
|
||||
"avg_cadence": None, "avg_power": None, "normalized_power": None,
|
||||
"avg_speed_ms": (total_dist / duration) if (total_dist and duration) else None,
|
||||
"max_speed_ms": None,
|
||||
"avg_temperature_c": None,
|
||||
"calories": None,
|
||||
"training_stress_score": None,
|
||||
"vo2max_estimate": None,
|
||||
"polyline": encoded_polyline,
|
||||
"bounding_box": bounding_box,
|
||||
"source_type": "gpx",
|
||||
"data_points": data_points,
|
||||
"laps": [],
|
||||
"max_speed_ms": None, "avg_temperature_c": None, "calories": None,
|
||||
"training_stress_score": None, "vo2max_estimate": None,
|
||||
"polyline": encoded_polyline, "bounding_box": bounding_box,
|
||||
"source_type": "gpx", "data_points": data_points, "laps": [],
|
||||
}
|
||||
|
||||
|
||||
def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
|
||||
"""Calculate % time in each HR zone using user's configured max HR."""
|
||||
if not user_max_hr or user_max_hr < 100:
|
||||
return {}
|
||||
|
||||
zone_bounds = [0.0, 0.60, 0.70, 0.80, 0.90, 1.01]
|
||||
zone_keys = ["z1", "z2", "z3", "z4", "z5"]
|
||||
zones = {k: 0 for k in zone_keys}
|
||||
total = 0
|
||||
|
||||
for p in data_points:
|
||||
hr = p.get("heart_rate")
|
||||
if not hr or hr < 20:
|
||||
@@ -278,7 +329,6 @@ def calculate_hr_zones(data_points: list, user_max_hr: float) -> dict:
|
||||
break
|
||||
else:
|
||||
zones["z5"] += 1
|
||||
|
||||
if total:
|
||||
return {k: round(v / total * 100, 1) for k, v in zones.items()}
|
||||
return {}
|
||||
Reference in New Issue
Block a user