Files
MileVault/backend/app/workers/tasks.py
T
owain c4e5eb91ed
Build and push images / build-backend (push) Successful in 1m52s
Build and push images / build-worker (push) Successful in 44s
Build and push images / build-frontend (push) Successful in 25s
Use sync SQLAlchemy in Celery worker - fixes asyncpg connection issues
2026-06-06 15:29:36 +01:00

240 lines
9.0 KiB
Python

"""
Background tasks: activity ingestion, route matching, PR calculation.
Uses synchronous SQLAlchemy because Celery's prefork model doesn't play
well with asyncio - each worker process needs its own connection pool,
and async pools don't survive process forks.
"""
from celery import Celery
from app.core.config import settings
celery_app = Celery(
"milevault",
broker=settings.redis_url,
backend=settings.redis_url,
)
celery_app.conf.update(
task_serializer="json",
result_serializer="json",
accept_content=["json"],
timezone="UTC",
enable_utc=True,
task_track_started=True,
worker_prefetch_multiplier=1,
)
@celery_app.task(bind=True, name="process_activity_file")
def process_activity_file(self, file_path: str, user_id: int, source_type: str):
"""Parse a FIT/GPX file and insert activity + data points into DB."""
from app.services.fit_parser import parse_fit_file, parse_gpx_file, calculate_hr_zones
from app.core.database import SyncSessionLocal
from app.models.user import Activity, ActivityDataPoint, ActivityLap
from sqlalchemy import select
from datetime import datetime
self.update_state(state="PROGRESS", meta={"step": "parsing"})
try:
if source_type == "fit" or file_path.endswith(".fit"):
parsed = parse_fit_file(file_path)
else:
parsed = parse_gpx_file(file_path)
except Exception as e:
raise self.retry(exc=e, countdown=10, max_retries=3)
with SyncSessionLocal() as db:
# Check for duplicate
if parsed.get("garmin_activity_id"):
existing = db.execute(
select(Activity).where(
Activity.garmin_activity_id == parsed["garmin_activity_id"]
)
).scalar_one_or_none()
if existing:
return {"activity_id": existing.id, "status": "duplicate"}
hr_zones = calculate_hr_zones(
parsed.get("data_points", []),
parsed.get("max_heart_rate") or 190
)
start_time = datetime.fromisoformat(parsed["start_time"]) if parsed.get("start_time") else None
activity = Activity(
user_id=user_id,
name=parsed["name"],
sport_type=parsed["sport_type"],
start_time=start_time,
distance_m=parsed.get("distance_m"),
duration_s=parsed.get("duration_s"),
elevation_gain_m=parsed.get("elevation_gain_m"),
elevation_loss_m=parsed.get("elevation_loss_m"),
avg_heart_rate=parsed.get("avg_heart_rate"),
max_heart_rate=parsed.get("max_heart_rate"),
avg_cadence=parsed.get("avg_cadence"),
avg_power=parsed.get("avg_power"),
normalized_power=parsed.get("normalized_power"),
avg_speed_ms=parsed.get("avg_speed_ms"),
max_speed_ms=parsed.get("max_speed_ms"),
avg_temperature_c=parsed.get("avg_temperature_c"),
calories=parsed.get("calories"),
training_stress_score=parsed.get("training_stress_score"),
polyline=parsed.get("polyline"),
bounding_box=parsed.get("bounding_box"),
source_file=file_path,
source_type=parsed.get("source_type"),
hr_zones=hr_zones,
)
db.add(activity)
db.flush()
# Insert data points in batches - dedupe (activity_id, timestamp) pairs
# since composite PK rejects duplicates and Garmin sometimes has same-second readings
seen = set()
points = parsed.get("data_points", [])
batch = []
for p in points:
if not p.get("timestamp"):
continue
ts = datetime.fromisoformat(p["timestamp"]) if isinstance(p["timestamp"], str) else p["timestamp"]
key = (activity.id, ts)
if key in seen:
continue
seen.add(key)
batch.append(ActivityDataPoint(
activity_id=activity.id,
timestamp=ts,
latitude=p.get("latitude"),
longitude=p.get("longitude"),
altitude_m=p.get("altitude_m"),
heart_rate=p.get("heart_rate"),
cadence=p.get("cadence"),
speed_ms=p.get("speed_ms"),
power=p.get("power"),
temperature_c=p.get("temperature_c"),
distance_m=p.get("distance_m"),
))
if len(batch) >= 500:
db.add_all(batch)
db.flush()
batch = []
if batch:
db.add_all(batch)
db.flush()
# Laps
for lap in parsed.get("laps", []):
ls = datetime.fromisoformat(lap["start_time"]) if lap.get("start_time") else None
db.add(ActivityLap(
activity_id=activity.id,
lap_number=lap["lap_number"],
start_time=ls,
duration_s=lap.get("duration_s"),
distance_m=lap.get("distance_m"),
avg_heart_rate=lap.get("avg_heart_rate"),
avg_cadence=lap.get("avg_cadence"),
avg_speed_ms=lap.get("avg_speed_ms"),
avg_power=lap.get("avg_power"),
))
db.commit()
activity_id = activity.id
# Queue PR calculation
compute_personal_records.delay(activity_id, user_id, parsed)
return {"activity_id": activity_id, "status": "ok"}
@celery_app.task(name="compute_personal_records")
def compute_personal_records(activity_id: int, user_id: int, parsed: dict):
"""Calculate personal records for standard distances from this activity."""
from app.services.route_matcher import compute_best_splits, STANDARD_DISTANCES
from app.core.database import SyncSessionLocal
from app.models.user import PersonalRecord
from sqlalchemy import select
from datetime import datetime, timezone
data_points = parsed.get("data_points", [])
total_dist = parsed.get("distance_m", 0) or 0
sport = parsed.get("sport_type", "running")
start_time_str = parsed.get("start_time")
start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc)
best_splits = compute_best_splits(data_points, total_dist)
with SyncSessionLocal() as db:
for label, duration_s in best_splits.items():
dist_m = next((d for d, l in STANDARD_DISTANCES if l == label), None)
if dist_m is None:
continue
current = db.execute(
select(PersonalRecord).where(
PersonalRecord.user_id == user_id,
PersonalRecord.sport_type == sport,
PersonalRecord.distance_m == dist_m,
PersonalRecord.is_current_record == True,
)
).scalar_one_or_none()
if current is None or duration_s < current.duration_s:
if current:
current.is_current_record = False
db.add(PersonalRecord(
user_id=user_id,
activity_id=activity_id,
sport_type=sport,
distance_m=dist_m,
distance_label=label,
duration_s=duration_s,
achieved_at=start_time,
is_current_record=True,
))
db.commit()
@celery_app.task(name="process_garmin_health_zip")
def process_garmin_health_zip(zip_path: str, user_id: int):
"""Extract wellness/sleep/HRV data from a Garmin Connect export ZIP."""
import zipfile
import json
from app.core.database import SyncSessionLocal
from app.models.user import HealthMetric
from datetime import datetime, timezone
with SyncSessionLocal() as db:
with zipfile.ZipFile(zip_path) as zf:
for name in zf.namelist():
if "DailyMetrics" not in name or not name.endswith(".json"):
continue
with zf.open(name) as f:
try:
data = json.load(f)
except Exception:
continue
date_str = data.get("calendarDate") or data.get("date")
if not date_str:
continue
try:
date = datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc)
except ValueError:
continue
metric = HealthMetric(
user_id=user_id,
date=date,
resting_hr=data.get("restingHeartRate"),
steps=data.get("totalSteps"),
floors_climbed=data.get("floorsAscended"),
active_calories=data.get("activeKilocalories"),
total_calories=data.get("totalKilocalories"),
avg_stress=data.get("averageStressLevel"),
spo2_avg=data.get("avgSpo2"),
)
db.add(metric)
db.commit()