From b5b838bddc595129b717889bba7265bb607af124 Mon Sep 17 00:00:00 2001 From: owain Date: Thu, 11 Jun 2026 23:55:21 +0100 Subject: [PATCH] Fix treadmill/indoor distance over-measure and clean up polluted PRs - Garmin Connect sync now applies Garmins corrected summary distance/moving time, overriding the raw wrist-estimated FIT distance for treadmill/indoor runs - Exclude indoor (no-GPS) runs from distance personal records (bogus fast splits) - backfill_indoor_distances task: re-fetch corrected distance for historical indoor runs - recompute_personal_records_all task: wipe and rebuild PRs from valid activities Co-Authored-By: Claude Opus 4.8 --- backend/app/services/garmin_connect_sync.py | 9 +- backend/app/workers/tasks.py | 169 +++++++++++++++++++- 2 files changed, 175 insertions(+), 3 deletions(-) diff --git a/backend/app/services/garmin_connect_sync.py b/backend/app/services/garmin_connect_sync.py index 50a0b64..bad334f 100644 --- a/backend/app/services/garmin_connect_sync.py +++ b/backend/app/services/garmin_connect_sync.py @@ -181,7 +181,14 @@ def sync_activities(garmin, user_id: int, since: Optional[datetime], dest = dest_dir / f"{garmin_id}.fit" dest.write_bytes(fit_data) - process_activity_file.delay(str(dest), user_id, "fit", garmin_id) + # Pass Garmin's corrected summary so treadmill/indoor wrist-distance is + # overridden by the authoritative displayed distance/moving time. + summary = { + "distance": act.get("distance"), + "moving": act.get("movingDuration"), + "elapsed": act.get("elapsedDuration") or act.get("duration"), + } + process_activity_file.delay(str(dest), user_id, "fit", garmin_id, summary) queued += 1 if status_callback and (queued % 5 == 0 or queued == total): diff --git a/backend/app/workers/tasks.py b/backend/app/workers/tasks.py index 51ad184..e1ca172 100644 --- a/backend/app/workers/tasks.py +++ b/backend/app/workers/tasks.py @@ -52,10 +52,38 @@ def is_wellness_file(file_path: str) -> bool: return any(name.endswith(s.upper()) for s in WELLNESS_SUFFIXES) +def _apply_garmin_summary(parsed: dict, summary: dict): + """Override FIT-parsed distance/time with Garmin Connect's corrected summary + values. Treadmill/indoor runs report wrist-estimated distance in the raw FIT + that Garmin later corrects; the summary is the authoritative displayed value. + Scales the per-point distance stream so splits/charts stay consistent.""" + if not summary: + return + corr = summary.get("distance") + raw = parsed.get("distance_m") + if corr: + if raw and raw > 0 and abs(corr - raw) / raw > 0.01: + scale = corr / raw + for p in parsed.get("data_points", []): + if p.get("distance_m") is not None: + p["distance_m"] = p["distance_m"] * scale + parsed["distance_m"] = corr + if summary.get("moving") is not None: + parsed["moving_time_s"] = summary["moving"] + if summary.get("elapsed") is not None: + parsed["duration_s"] = summary["elapsed"] + dur = parsed.get("duration_s") + if parsed.get("distance_m") and dur: + parsed["avg_speed_ms"] = parsed["distance_m"] / dur + + @celery_app.task(bind=True, name="process_activity_file") def process_activity_file(self, file_path: str, user_id: int, source_type: str, - garmin_activity_id: str = None): - """Parse a FIT/GPX file. Routes wellness files to health parser.""" + garmin_activity_id: str = None, summary: dict = None): + """Parse a FIT/GPX file. Routes wellness files to health parser. + + `summary` (optional, from Garmin Connect sync) carries Garmin's corrected + distance/moving/elapsed values which override the raw FIT figures.""" if is_wellness_file(file_path): parse_wellness_fit.delay(file_path, user_id) @@ -80,6 +108,9 @@ def process_activity_file(self, file_path: str, user_id: int, source_type: str, if not parsed.get("start_time"): return {"status": "skipped", "reason": "no start_time", "file": file_path} + # Prefer Garmin's corrected distance/time (treadmill/indoor wrist-distance fix). + _apply_garmin_summary(parsed, summary) + # Reject activities whose average speed is implausible for the sport (e.g. a # car journey accidentally recorded). Surfaced to the upload UI as the reason. if parsed.get("rejected_reason"): @@ -387,6 +418,12 @@ def compute_personal_records(activity_id: int, user_id: int, parsed: dict): start_time_str = parsed.get("start_time") start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc) + # Treadmill/indoor runs have no GPS and an unreliable distance stream, which + # produces bogus fast splits — exclude them from distance records. + has_gps = any(p.get("latitude") is not None and p.get("longitude") is not None for p in data_points) + if sport == "running" and not has_gps: + return {"status": "skipped_indoor", "activity_id": activity_id} + # GPS can over/under-measure relative to the activity's official distance # (e.g. a 5 km run whose GPS track sums to 5.8 km), which would otherwise # produce a bogus "best 5 km" split. Scale the distance stream so its max @@ -865,3 +902,131 @@ def backfill_moving_time(user_id: int = None): db.commit() return {"status": "ok", "updated": updated, "skipped": skipped} + + +@celery_app.task(name="backfill_indoor_distances") +def backfill_indoor_distances(user_id: int): + """Re-fetch Garmin's corrected distance/moving-time for historical indoor + (no-GPS) running activities whose raw FIT held the wrist-estimated distance. + Scales the stored per-point distance stream to match. One-off maintenance.""" + import time + from app.services.garmin_connect_sync import authenticate_garmin + from app.core.database import SyncSessionLocal + from app.models.user import GarminConnectConfig, Activity + from sqlalchemy import select, text + + updated, skipped = 0, 0 + with SyncSessionLocal() as db: + cfg = db.execute( + select(GarminConnectConfig).where(GarminConnectConfig.user_id == user_id) + ).scalar_one_or_none() + if not cfg: + return {"status": "no_garmin_config"} + + garmin, new_token = authenticate_garmin(cfg.email, cfg.password_enc, cfg.token_store) + if new_token: + cfg.token_store = new_token + db.commit() + + acts = db.execute( + select(Activity).where( + Activity.user_id == user_id, + Activity.sport_type == "running", + Activity.polyline.is_(None), + Activity.garmin_activity_id.isnot(None), + ) + ).scalars().all() + + for a in acts: + try: + summ = (garmin.get_activity(int(a.garmin_activity_id)) or {}).get("summaryDTO", {}) + except Exception: + skipped += 1 + continue + corr = summ.get("distance") + if not corr: + skipped += 1 + continue + raw = a.distance_m or 0 + if raw > 0 and abs(corr - raw) / raw > 0.01: + db.execute( + text("UPDATE activity_data_points SET distance_m = distance_m * :s " + "WHERE activity_id = :aid AND distance_m IS NOT NULL"), + {"s": corr / raw, "aid": a.id}, + ) + a.distance_m = corr + if summ.get("movingDuration") is not None: + a.moving_time_s = summ["movingDuration"] + if summ.get("elapsedDuration") is not None: + a.duration_s = summ["elapsedDuration"] + if a.duration_s: + a.avg_speed_ms = corr / a.duration_s + db.commit() + updated += 1 + time.sleep(0.4) + + return {"status": "ok", "updated": updated, "skipped": skipped} + + +@celery_app.task(name="recompute_personal_records_all") +def recompute_personal_records_all(user_id: int): + """Wipe and rebuild all personal records from stored activity data, excluding + indoor (no-GPS) runs whose distance is unreliable. Fixes records polluted by + treadmill over-measurement and any duplicate current-record rows.""" + from app.services.route_matcher import compute_best_splits, STANDARD_DISTANCES + from app.core.database import SyncSessionLocal + from app.models.user import Activity, ActivityDataPoint, PersonalRecord + from sqlalchemy import select, delete + + dist_for_label = {l: d for d, l in STANDARD_DISTANCES} + with SyncSessionLocal() as db: + db.execute(delete(PersonalRecord).where(PersonalRecord.user_id == user_id)) + db.commit() + + acts = db.execute( + select(Activity).where( + Activity.user_id == user_id, + Activity.distance_m.isnot(None), + ) + ).scalars().all() + + best = {} # (sport, dist_m) -> {dur, aid, at, label} + for a in acts: + if a.sport_type == "running" and a.polyline is None: + continue # indoor/treadmill — unreliable distance + rows = db.execute( + select(ActivityDataPoint.timestamp, ActivityDataPoint.distance_m) + .where(ActivityDataPoint.activity_id == a.id) + .order_by(ActivityDataPoint.timestamp) + ).all() + dp = [{"timestamp": r[0], "distance_m": r[1]} for r in rows if r[1] is not None] + if len(dp) < 2: + continue + total = a.distance_m or 0 + gps_max = max(p["distance_m"] for p in dp) + if total > 0 and gps_max > 0 and abs(gps_max - total) / total > 0.02: + f = total / gps_max + for p in dp: + p["distance_m"] *= f + for label, dur in compute_best_splits(dp, total).items(): + dist_m = dist_for_label.get(label) + if dist_m is None: + continue + key = (a.sport_type, dist_m) + if key not in best or dur < best[key]["dur"]: + best[key] = {"dur": dur, "aid": a.id, "at": a.start_time, "label": label} + + for (sport, dist_m), b in best.items(): + db.add(PersonalRecord( + user_id=user_id, + activity_id=b["aid"], + sport_type=sport, + distance_m=dist_m, + distance_label=b["label"], + duration_s=b["dur"], + achieved_at=b["at"], + is_current_record=True, + )) + db.commit() + + return {"status": "ok", "records": len(best)}