Files
MileVault/backend/app/services/route_matcher.py
T
owain bc437cce92
Build and push images / validate (push) Successful in 2s
Build and push images / build-backend (push) Successful in 6s
Build and push images / build-worker (push) Successful in 6s
Build and push images / build-frontend (push) Successful in 9s
Batch 1: dashboard, maps, segments rewrite, health, sync UX
Fixes:
- Dashboard: featured most-recent activity card with map + stats
- Maps default to Street; preferCanvas + larger tile buffer for smoother pan/zoom
- Running cadence as colour-banded dots + 165 spm guide line
- Routes: inline row expansion, rename (PATCH /routes/{id}), podium + deltas, tiled map
- Records: remove reversed pace Y-axis
- Profile: remove resting HR; add goal weight
- Health: snapshot weight carry-forward; VO2 trend axis 30-70;
  weight goal line + kg/st-lb toggle + axis max; sleep 8h/avg lines
- Garmin sync progress moved to global store with persistent floating bar

Features:
- Speed-coloured activity route (default) with Speed/Solid toggle
- GPS-geometry segments: draw on map, match across all activities,
  1st/2nd/3rd leaderboard + podium badges (replaces old distance segments)
- Lap bests: best time per lap across a route + delta column
- Body Battery: highlight activity time windows

Schema: users.goal_weight_kg ALTER; new segments/segment_efforts tables.
Removes RouteSegment, the Segments page, and segment-bests endpoints.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 19:59:06 +01:00

218 lines
7.0 KiB
Python

"""
Route matching: identifies when multiple activities were on the same route.
Uses a bounding-box pre-filter + dynamic time warping (DTW) for GPS track similarity.
"""
import math
from typing import Optional
import polyline as polyline_lib
import numpy as np
def decode_polyline_to_coords(encoded: str) -> list[tuple[float, float]]:
return polyline_lib.decode(encoded)
def bounding_boxes_overlap(bb1: dict, bb2: dict, tolerance_deg: float = 0.005) -> bool:
"""Quick check: do two bounding boxes overlap (with a tolerance margin)?"""
return (
bb1["min_lat"] - tolerance_deg <= bb2["max_lat"] + tolerance_deg and
bb1["max_lat"] + tolerance_deg >= bb2["min_lat"] - tolerance_deg and
bb1["min_lon"] - tolerance_deg <= bb2["max_lon"] + tolerance_deg and
bb1["max_lon"] + tolerance_deg >= bb2["min_lon"] - tolerance_deg
)
def sample_coords(coords: list[tuple], n: int = 100) -> list[tuple]:
"""Downsample a track to n evenly-spaced points for DTW efficiency."""
if len(coords) <= n:
return coords
indices = [int(i * (len(coords) - 1) / (n - 1)) for i in range(n)]
return [coords[i] for i in indices]
def dtw_distance(track1: list[tuple], track2: list[tuple]) -> float:
"""
Compute DTW distance between two GPS tracks.
Each point is (lat, lon). Returns average distance in metres per matched pair.
"""
n, m = len(track1), len(track2)
dtw = np.full((n + 1, m + 1), np.inf)
dtw[0][0] = 0.0
for i in range(1, n + 1):
for j in range(1, m + 1):
cost = haversine_m(track1[i-1], track2[j-1])
dtw[i][j] = cost + min(dtw[i-1][j], dtw[i][j-1], dtw[i-1][j-1])
return dtw[n][m] / max(n, m)
def haversine_m(p1: tuple, p2: tuple) -> float:
R = 6371000
lat1, lon1 = math.radians(p1[0]), math.radians(p1[1])
lat2, lon2 = math.radians(p2[0]), math.radians(p2[1])
dlat = lat2 - lat1
dlon = lon2 - lon1
a = math.sin(dlat/2)**2 + math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2
return 2 * R * math.asin(math.sqrt(a))
def routes_are_similar(
poly1: str,
poly2: str,
bb1: Optional[dict],
bb2: Optional[dict],
dtw_threshold_m: float = 80.0,
dist1: Optional[float] = None,
dist2: Optional[float] = None,
) -> bool:
"""
Returns True if two activities are on sufficiently similar routes.
First does a cheap bounding box check, then DTW on downsampled tracks.
When dist1/dist2 are provided:
- Rejects if distance differs by more than 2.5%
- Uses 3% of route distance as the DTW threshold (capped at 300m)
"""
if dist1 and dist2 and dist1 > 0 and dist2 > 0:
if abs(dist1 - dist2) / max(dist1, dist2) > 0.025:
return False
dtw_threshold_m = min(max(dist1, dist2) * 0.03, 300.0)
if bb1 and bb2:
if not bounding_boxes_overlap(bb1, bb2):
return False
try:
coords1 = sample_coords(decode_polyline_to_coords(poly1), 60)
coords2 = sample_coords(decode_polyline_to_coords(poly2), 60)
except Exception:
return False
if not coords1 or not coords2:
return False
dist = dtw_distance(coords1, coords2)
return dist < dtw_threshold_m
def match_segment_in_activity(
seg_coords: list[tuple],
act_coords: list[tuple],
act_times: list,
tol_m: float = 30.0,
) -> Optional[float]:
"""
Determine whether an activity track traverses a segment's GPS geometry, and if so
how long it took. Works even when the activity's overall route differs — only the
overlapping stretch matters.
seg_coords: [(lat, lon), ...] segment geometry (start → end).
act_coords: [(lat, lon), ...] activity track, in time order.
act_times: parallel list of datetimes for act_coords.
Strategy: anchor on the activity point nearest the segment start, then the nearest
point (at/after it) to the segment end, then verify a few intermediate segment
points are each passed within tolerance between those anchors. Returns the time
between the start and end anchors, or None if the activity doesn't follow the segment.
"""
n = len(act_coords)
if n < 2 or len(seg_coords) < 2:
return None
start_pt, end_pt = seg_coords[0], seg_coords[-1]
si, sd = None, tol_m
for i in range(n):
d = haversine_m(act_coords[i], start_pt)
if d < sd:
sd, si = d, i
if si is None:
return None
ei, ed = None, tol_m
for i in range(si + 1, n):
d = haversine_m(act_coords[i], end_pt)
if d < ed:
ed, ei = d, i
if ei is None or ei <= si:
return None
# Verify the activity actually follows the segment shape between the anchors.
for frac in (0.25, 0.5, 0.75):
sp = seg_coords[int(frac * (len(seg_coords) - 1))]
if not any(haversine_m(act_coords[i], sp) <= tol_m for i in range(si, ei + 1)):
return None
dur = (act_times[ei] - act_times[si]).total_seconds()
return dur if dur > 0 else None
def find_best_split_time(
data_points: list[dict],
target_distance_m: float,
) -> Optional[float]:
"""
Find the best (fastest) time over any target_distance_m window within an activity.
E.g. fastest 1km split in a 10km run.
Returns duration in seconds.
"""
points_with_dist = [
p for p in data_points
if p.get("distance_m") is not None and p.get("timestamp") is not None
]
if not points_with_dist:
return None
best = None
j = 0
for i, start_p in enumerate(points_with_dist):
start_dist = start_p["distance_m"]
start_ts = start_p["timestamp"]
# Advance j until distance covered >= target
while j < len(points_with_dist):
end_p = points_with_dist[j]
covered = end_p["distance_m"] - start_dist
if covered >= target_distance_m:
from datetime import datetime
t1 = datetime.fromisoformat(start_ts) if isinstance(start_ts, str) else start_ts
t2 = datetime.fromisoformat(end_p["timestamp"]) if isinstance(end_p["timestamp"], str) else end_p["timestamp"]
duration = (t2 - t1).total_seconds()
if best is None or duration < best:
best = duration
break
j += 1
if j >= len(points_with_dist):
break
return best
STANDARD_DISTANCES = [
(400, "400m"),
(800, "800m"),
(1000, "1k"),
(1609.34, "1 mile"),
(3000, "3k"),
(5000, "5k"),
(10000, "10k"),
(21097.5, "Half marathon"),
(42195, "Marathon"),
(50000, "50k"),
(100000, "100k"),
]
def compute_best_splits(data_points: list[dict], total_distance_m: float) -> dict[str, float]:
"""Compute best split times for all standard distances that fit within the activity."""
results = {}
for dist_m, label in STANDARD_DISTANCES:
if total_distance_m >= dist_m * 0.95: # allow 5% tolerance
best = find_best_split_time(data_points, dist_m)
if best:
results[label] = best
return results