Fix treadmill/indoor distance over-measure and clean up polluted PRs
- Garmin Connect sync now applies Garmins corrected summary distance/moving time, overriding the raw wrist-estimated FIT distance for treadmill/indoor runs - Exclude indoor (no-GPS) runs from distance personal records (bogus fast splits) - backfill_indoor_distances task: re-fetch corrected distance for historical indoor runs - recompute_personal_records_all task: wipe and rebuild PRs from valid activities Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -181,7 +181,14 @@ def sync_activities(garmin, user_id: int, since: Optional[datetime],
|
||||
dest = dest_dir / f"{garmin_id}.fit"
|
||||
dest.write_bytes(fit_data)
|
||||
|
||||
process_activity_file.delay(str(dest), user_id, "fit", garmin_id)
|
||||
# Pass Garmin's corrected summary so treadmill/indoor wrist-distance is
|
||||
# overridden by the authoritative displayed distance/moving time.
|
||||
summary = {
|
||||
"distance": act.get("distance"),
|
||||
"moving": act.get("movingDuration"),
|
||||
"elapsed": act.get("elapsedDuration") or act.get("duration"),
|
||||
}
|
||||
process_activity_file.delay(str(dest), user_id, "fit", garmin_id, summary)
|
||||
queued += 1
|
||||
|
||||
if status_callback and (queued % 5 == 0 or queued == total):
|
||||
|
||||
@@ -52,10 +52,38 @@ def is_wellness_file(file_path: str) -> bool:
|
||||
return any(name.endswith(s.upper()) for s in WELLNESS_SUFFIXES)
|
||||
|
||||
|
||||
def _apply_garmin_summary(parsed: dict, summary: dict):
|
||||
"""Override FIT-parsed distance/time with Garmin Connect's corrected summary
|
||||
values. Treadmill/indoor runs report wrist-estimated distance in the raw FIT
|
||||
that Garmin later corrects; the summary is the authoritative displayed value.
|
||||
Scales the per-point distance stream so splits/charts stay consistent."""
|
||||
if not summary:
|
||||
return
|
||||
corr = summary.get("distance")
|
||||
raw = parsed.get("distance_m")
|
||||
if corr:
|
||||
if raw and raw > 0 and abs(corr - raw) / raw > 0.01:
|
||||
scale = corr / raw
|
||||
for p in parsed.get("data_points", []):
|
||||
if p.get("distance_m") is not None:
|
||||
p["distance_m"] = p["distance_m"] * scale
|
||||
parsed["distance_m"] = corr
|
||||
if summary.get("moving") is not None:
|
||||
parsed["moving_time_s"] = summary["moving"]
|
||||
if summary.get("elapsed") is not None:
|
||||
parsed["duration_s"] = summary["elapsed"]
|
||||
dur = parsed.get("duration_s")
|
||||
if parsed.get("distance_m") and dur:
|
||||
parsed["avg_speed_ms"] = parsed["distance_m"] / dur
|
||||
|
||||
|
||||
@celery_app.task(bind=True, name="process_activity_file")
|
||||
def process_activity_file(self, file_path: str, user_id: int, source_type: str,
|
||||
garmin_activity_id: str = None):
|
||||
"""Parse a FIT/GPX file. Routes wellness files to health parser."""
|
||||
garmin_activity_id: str = None, summary: dict = None):
|
||||
"""Parse a FIT/GPX file. Routes wellness files to health parser.
|
||||
|
||||
`summary` (optional, from Garmin Connect sync) carries Garmin's corrected
|
||||
distance/moving/elapsed values which override the raw FIT figures."""
|
||||
|
||||
if is_wellness_file(file_path):
|
||||
parse_wellness_fit.delay(file_path, user_id)
|
||||
@@ -80,6 +108,9 @@ def process_activity_file(self, file_path: str, user_id: int, source_type: str,
|
||||
if not parsed.get("start_time"):
|
||||
return {"status": "skipped", "reason": "no start_time", "file": file_path}
|
||||
|
||||
# Prefer Garmin's corrected distance/time (treadmill/indoor wrist-distance fix).
|
||||
_apply_garmin_summary(parsed, summary)
|
||||
|
||||
# Reject activities whose average speed is implausible for the sport (e.g. a
|
||||
# car journey accidentally recorded). Surfaced to the upload UI as the reason.
|
||||
if parsed.get("rejected_reason"):
|
||||
@@ -387,6 +418,12 @@ def compute_personal_records(activity_id: int, user_id: int, parsed: dict):
|
||||
start_time_str = parsed.get("start_time")
|
||||
start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc)
|
||||
|
||||
# Treadmill/indoor runs have no GPS and an unreliable distance stream, which
|
||||
# produces bogus fast splits — exclude them from distance records.
|
||||
has_gps = any(p.get("latitude") is not None and p.get("longitude") is not None for p in data_points)
|
||||
if sport == "running" and not has_gps:
|
||||
return {"status": "skipped_indoor", "activity_id": activity_id}
|
||||
|
||||
# GPS can over/under-measure relative to the activity's official distance
|
||||
# (e.g. a 5 km run whose GPS track sums to 5.8 km), which would otherwise
|
||||
# produce a bogus "best 5 km" split. Scale the distance stream so its max
|
||||
@@ -865,3 +902,131 @@ def backfill_moving_time(user_id: int = None):
|
||||
db.commit()
|
||||
|
||||
return {"status": "ok", "updated": updated, "skipped": skipped}
|
||||
|
||||
|
||||
@celery_app.task(name="backfill_indoor_distances")
|
||||
def backfill_indoor_distances(user_id: int):
|
||||
"""Re-fetch Garmin's corrected distance/moving-time for historical indoor
|
||||
(no-GPS) running activities whose raw FIT held the wrist-estimated distance.
|
||||
Scales the stored per-point distance stream to match. One-off maintenance."""
|
||||
import time
|
||||
from app.services.garmin_connect_sync import authenticate_garmin
|
||||
from app.core.database import SyncSessionLocal
|
||||
from app.models.user import GarminConnectConfig, Activity
|
||||
from sqlalchemy import select, text
|
||||
|
||||
updated, skipped = 0, 0
|
||||
with SyncSessionLocal() as db:
|
||||
cfg = db.execute(
|
||||
select(GarminConnectConfig).where(GarminConnectConfig.user_id == user_id)
|
||||
).scalar_one_or_none()
|
||||
if not cfg:
|
||||
return {"status": "no_garmin_config"}
|
||||
|
||||
garmin, new_token = authenticate_garmin(cfg.email, cfg.password_enc, cfg.token_store)
|
||||
if new_token:
|
||||
cfg.token_store = new_token
|
||||
db.commit()
|
||||
|
||||
acts = db.execute(
|
||||
select(Activity).where(
|
||||
Activity.user_id == user_id,
|
||||
Activity.sport_type == "running",
|
||||
Activity.polyline.is_(None),
|
||||
Activity.garmin_activity_id.isnot(None),
|
||||
)
|
||||
).scalars().all()
|
||||
|
||||
for a in acts:
|
||||
try:
|
||||
summ = (garmin.get_activity(int(a.garmin_activity_id)) or {}).get("summaryDTO", {})
|
||||
except Exception:
|
||||
skipped += 1
|
||||
continue
|
||||
corr = summ.get("distance")
|
||||
if not corr:
|
||||
skipped += 1
|
||||
continue
|
||||
raw = a.distance_m or 0
|
||||
if raw > 0 and abs(corr - raw) / raw > 0.01:
|
||||
db.execute(
|
||||
text("UPDATE activity_data_points SET distance_m = distance_m * :s "
|
||||
"WHERE activity_id = :aid AND distance_m IS NOT NULL"),
|
||||
{"s": corr / raw, "aid": a.id},
|
||||
)
|
||||
a.distance_m = corr
|
||||
if summ.get("movingDuration") is not None:
|
||||
a.moving_time_s = summ["movingDuration"]
|
||||
if summ.get("elapsedDuration") is not None:
|
||||
a.duration_s = summ["elapsedDuration"]
|
||||
if a.duration_s:
|
||||
a.avg_speed_ms = corr / a.duration_s
|
||||
db.commit()
|
||||
updated += 1
|
||||
time.sleep(0.4)
|
||||
|
||||
return {"status": "ok", "updated": updated, "skipped": skipped}
|
||||
|
||||
|
||||
@celery_app.task(name="recompute_personal_records_all")
|
||||
def recompute_personal_records_all(user_id: int):
|
||||
"""Wipe and rebuild all personal records from stored activity data, excluding
|
||||
indoor (no-GPS) runs whose distance is unreliable. Fixes records polluted by
|
||||
treadmill over-measurement and any duplicate current-record rows."""
|
||||
from app.services.route_matcher import compute_best_splits, STANDARD_DISTANCES
|
||||
from app.core.database import SyncSessionLocal
|
||||
from app.models.user import Activity, ActivityDataPoint, PersonalRecord
|
||||
from sqlalchemy import select, delete
|
||||
|
||||
dist_for_label = {l: d for d, l in STANDARD_DISTANCES}
|
||||
with SyncSessionLocal() as db:
|
||||
db.execute(delete(PersonalRecord).where(PersonalRecord.user_id == user_id))
|
||||
db.commit()
|
||||
|
||||
acts = db.execute(
|
||||
select(Activity).where(
|
||||
Activity.user_id == user_id,
|
||||
Activity.distance_m.isnot(None),
|
||||
)
|
||||
).scalars().all()
|
||||
|
||||
best = {} # (sport, dist_m) -> {dur, aid, at, label}
|
||||
for a in acts:
|
||||
if a.sport_type == "running" and a.polyline is None:
|
||||
continue # indoor/treadmill — unreliable distance
|
||||
rows = db.execute(
|
||||
select(ActivityDataPoint.timestamp, ActivityDataPoint.distance_m)
|
||||
.where(ActivityDataPoint.activity_id == a.id)
|
||||
.order_by(ActivityDataPoint.timestamp)
|
||||
).all()
|
||||
dp = [{"timestamp": r[0], "distance_m": r[1]} for r in rows if r[1] is not None]
|
||||
if len(dp) < 2:
|
||||
continue
|
||||
total = a.distance_m or 0
|
||||
gps_max = max(p["distance_m"] for p in dp)
|
||||
if total > 0 and gps_max > 0 and abs(gps_max - total) / total > 0.02:
|
||||
f = total / gps_max
|
||||
for p in dp:
|
||||
p["distance_m"] *= f
|
||||
for label, dur in compute_best_splits(dp, total).items():
|
||||
dist_m = dist_for_label.get(label)
|
||||
if dist_m is None:
|
||||
continue
|
||||
key = (a.sport_type, dist_m)
|
||||
if key not in best or dur < best[key]["dur"]:
|
||||
best[key] = {"dur": dur, "aid": a.id, "at": a.start_time, "label": label}
|
||||
|
||||
for (sport, dist_m), b in best.items():
|
||||
db.add(PersonalRecord(
|
||||
user_id=user_id,
|
||||
activity_id=b["aid"],
|
||||
sport_type=sport,
|
||||
distance_m=dist_m,
|
||||
distance_label=b["label"],
|
||||
duration_s=b["dur"],
|
||||
achieved_at=b["at"],
|
||||
is_current_record=True,
|
||||
))
|
||||
db.commit()
|
||||
|
||||
return {"status": "ok", "records": len(best)}
|
||||
|
||||
Reference in New Issue
Block a user