Fix treadmill/indoor distance over-measure and clean up polluted PRs
Build and push images / validate (push) Successful in 3s
Build and push images / build-backend (push) Successful in 6s
Build and push images / build-worker (push) Successful in 6s
Build and push images / build-frontend (push) Successful in 5s

- Garmin Connect sync now applies Garmins corrected summary distance/moving time,
  overriding the raw wrist-estimated FIT distance for treadmill/indoor runs
- Exclude indoor (no-GPS) runs from distance personal records (bogus fast splits)
- backfill_indoor_distances task: re-fetch corrected distance for historical indoor runs
- recompute_personal_records_all task: wipe and rebuild PRs from valid activities

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 23:55:21 +01:00
parent e9cb1ea4e4
commit b5b838bddc
2 changed files with 175 additions and 3 deletions
+8 -1
View File
@@ -181,7 +181,14 @@ def sync_activities(garmin, user_id: int, since: Optional[datetime],
dest = dest_dir / f"{garmin_id}.fit" dest = dest_dir / f"{garmin_id}.fit"
dest.write_bytes(fit_data) dest.write_bytes(fit_data)
process_activity_file.delay(str(dest), user_id, "fit", garmin_id) # Pass Garmin's corrected summary so treadmill/indoor wrist-distance is
# overridden by the authoritative displayed distance/moving time.
summary = {
"distance": act.get("distance"),
"moving": act.get("movingDuration"),
"elapsed": act.get("elapsedDuration") or act.get("duration"),
}
process_activity_file.delay(str(dest), user_id, "fit", garmin_id, summary)
queued += 1 queued += 1
if status_callback and (queued % 5 == 0 or queued == total): if status_callback and (queued % 5 == 0 or queued == total):
+167 -2
View File
@@ -52,10 +52,38 @@ def is_wellness_file(file_path: str) -> bool:
return any(name.endswith(s.upper()) for s in WELLNESS_SUFFIXES) return any(name.endswith(s.upper()) for s in WELLNESS_SUFFIXES)
def _apply_garmin_summary(parsed: dict, summary: dict):
"""Override FIT-parsed distance/time with Garmin Connect's corrected summary
values. Treadmill/indoor runs report wrist-estimated distance in the raw FIT
that Garmin later corrects; the summary is the authoritative displayed value.
Scales the per-point distance stream so splits/charts stay consistent."""
if not summary:
return
corr = summary.get("distance")
raw = parsed.get("distance_m")
if corr:
if raw and raw > 0 and abs(corr - raw) / raw > 0.01:
scale = corr / raw
for p in parsed.get("data_points", []):
if p.get("distance_m") is not None:
p["distance_m"] = p["distance_m"] * scale
parsed["distance_m"] = corr
if summary.get("moving") is not None:
parsed["moving_time_s"] = summary["moving"]
if summary.get("elapsed") is not None:
parsed["duration_s"] = summary["elapsed"]
dur = parsed.get("duration_s")
if parsed.get("distance_m") and dur:
parsed["avg_speed_ms"] = parsed["distance_m"] / dur
@celery_app.task(bind=True, name="process_activity_file") @celery_app.task(bind=True, name="process_activity_file")
def process_activity_file(self, file_path: str, user_id: int, source_type: str, def process_activity_file(self, file_path: str, user_id: int, source_type: str,
garmin_activity_id: str = None): garmin_activity_id: str = None, summary: dict = None):
"""Parse a FIT/GPX file. Routes wellness files to health parser.""" """Parse a FIT/GPX file. Routes wellness files to health parser.
`summary` (optional, from Garmin Connect sync) carries Garmin's corrected
distance/moving/elapsed values which override the raw FIT figures."""
if is_wellness_file(file_path): if is_wellness_file(file_path):
parse_wellness_fit.delay(file_path, user_id) parse_wellness_fit.delay(file_path, user_id)
@@ -80,6 +108,9 @@ def process_activity_file(self, file_path: str, user_id: int, source_type: str,
if not parsed.get("start_time"): if not parsed.get("start_time"):
return {"status": "skipped", "reason": "no start_time", "file": file_path} return {"status": "skipped", "reason": "no start_time", "file": file_path}
# Prefer Garmin's corrected distance/time (treadmill/indoor wrist-distance fix).
_apply_garmin_summary(parsed, summary)
# Reject activities whose average speed is implausible for the sport (e.g. a # Reject activities whose average speed is implausible for the sport (e.g. a
# car journey accidentally recorded). Surfaced to the upload UI as the reason. # car journey accidentally recorded). Surfaced to the upload UI as the reason.
if parsed.get("rejected_reason"): if parsed.get("rejected_reason"):
@@ -387,6 +418,12 @@ def compute_personal_records(activity_id: int, user_id: int, parsed: dict):
start_time_str = parsed.get("start_time") start_time_str = parsed.get("start_time")
start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc) start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc)
# Treadmill/indoor runs have no GPS and an unreliable distance stream, which
# produces bogus fast splits — exclude them from distance records.
has_gps = any(p.get("latitude") is not None and p.get("longitude") is not None for p in data_points)
if sport == "running" and not has_gps:
return {"status": "skipped_indoor", "activity_id": activity_id}
# GPS can over/under-measure relative to the activity's official distance # GPS can over/under-measure relative to the activity's official distance
# (e.g. a 5 km run whose GPS track sums to 5.8 km), which would otherwise # (e.g. a 5 km run whose GPS track sums to 5.8 km), which would otherwise
# produce a bogus "best 5 km" split. Scale the distance stream so its max # produce a bogus "best 5 km" split. Scale the distance stream so its max
@@ -865,3 +902,131 @@ def backfill_moving_time(user_id: int = None):
db.commit() db.commit()
return {"status": "ok", "updated": updated, "skipped": skipped} return {"status": "ok", "updated": updated, "skipped": skipped}
@celery_app.task(name="backfill_indoor_distances")
def backfill_indoor_distances(user_id: int):
"""Re-fetch Garmin's corrected distance/moving-time for historical indoor
(no-GPS) running activities whose raw FIT held the wrist-estimated distance.
Scales the stored per-point distance stream to match. One-off maintenance."""
import time
from app.services.garmin_connect_sync import authenticate_garmin
from app.core.database import SyncSessionLocal
from app.models.user import GarminConnectConfig, Activity
from sqlalchemy import select, text
updated, skipped = 0, 0
with SyncSessionLocal() as db:
cfg = db.execute(
select(GarminConnectConfig).where(GarminConnectConfig.user_id == user_id)
).scalar_one_or_none()
if not cfg:
return {"status": "no_garmin_config"}
garmin, new_token = authenticate_garmin(cfg.email, cfg.password_enc, cfg.token_store)
if new_token:
cfg.token_store = new_token
db.commit()
acts = db.execute(
select(Activity).where(
Activity.user_id == user_id,
Activity.sport_type == "running",
Activity.polyline.is_(None),
Activity.garmin_activity_id.isnot(None),
)
).scalars().all()
for a in acts:
try:
summ = (garmin.get_activity(int(a.garmin_activity_id)) or {}).get("summaryDTO", {})
except Exception:
skipped += 1
continue
corr = summ.get("distance")
if not corr:
skipped += 1
continue
raw = a.distance_m or 0
if raw > 0 and abs(corr - raw) / raw > 0.01:
db.execute(
text("UPDATE activity_data_points SET distance_m = distance_m * :s "
"WHERE activity_id = :aid AND distance_m IS NOT NULL"),
{"s": corr / raw, "aid": a.id},
)
a.distance_m = corr
if summ.get("movingDuration") is not None:
a.moving_time_s = summ["movingDuration"]
if summ.get("elapsedDuration") is not None:
a.duration_s = summ["elapsedDuration"]
if a.duration_s:
a.avg_speed_ms = corr / a.duration_s
db.commit()
updated += 1
time.sleep(0.4)
return {"status": "ok", "updated": updated, "skipped": skipped}
@celery_app.task(name="recompute_personal_records_all")
def recompute_personal_records_all(user_id: int):
"""Wipe and rebuild all personal records from stored activity data, excluding
indoor (no-GPS) runs whose distance is unreliable. Fixes records polluted by
treadmill over-measurement and any duplicate current-record rows."""
from app.services.route_matcher import compute_best_splits, STANDARD_DISTANCES
from app.core.database import SyncSessionLocal
from app.models.user import Activity, ActivityDataPoint, PersonalRecord
from sqlalchemy import select, delete
dist_for_label = {l: d for d, l in STANDARD_DISTANCES}
with SyncSessionLocal() as db:
db.execute(delete(PersonalRecord).where(PersonalRecord.user_id == user_id))
db.commit()
acts = db.execute(
select(Activity).where(
Activity.user_id == user_id,
Activity.distance_m.isnot(None),
)
).scalars().all()
best = {} # (sport, dist_m) -> {dur, aid, at, label}
for a in acts:
if a.sport_type == "running" and a.polyline is None:
continue # indoor/treadmill — unreliable distance
rows = db.execute(
select(ActivityDataPoint.timestamp, ActivityDataPoint.distance_m)
.where(ActivityDataPoint.activity_id == a.id)
.order_by(ActivityDataPoint.timestamp)
).all()
dp = [{"timestamp": r[0], "distance_m": r[1]} for r in rows if r[1] is not None]
if len(dp) < 2:
continue
total = a.distance_m or 0
gps_max = max(p["distance_m"] for p in dp)
if total > 0 and gps_max > 0 and abs(gps_max - total) / total > 0.02:
f = total / gps_max
for p in dp:
p["distance_m"] *= f
for label, dur in compute_best_splits(dp, total).items():
dist_m = dist_for_label.get(label)
if dist_m is None:
continue
key = (a.sport_type, dist_m)
if key not in best or dur < best[key]["dur"]:
best[key] = {"dur": dur, "aid": a.id, "at": a.start_time, "label": label}
for (sport, dist_m), b in best.items():
db.add(PersonalRecord(
user_id=user_id,
activity_id=b["aid"],
sport_type=sport,
distance_m=dist_m,
distance_label=b["label"],
duration_s=b["dur"],
achieved_at=b["at"],
is_current_record=True,
))
db.commit()
return {"status": "ok", "records": len(best)}