""" Background tasks: activity ingestion, route matching, PR calculation. Uses synchronous SQLAlchemy because Celery's prefork model doesn't play well with asyncio - each worker process needs its own connection pool, and async pools don't survive process forks. """ from celery import Celery from app.core.config import settings celery_app = Celery( "milevault", broker=settings.redis_url, backend=settings.redis_url, ) celery_app.conf.update( task_serializer="json", result_serializer="json", accept_content=["json"], timezone="UTC", enable_utc=True, task_track_started=True, worker_prefetch_multiplier=1, ) @celery_app.task(bind=True, name="process_activity_file") def process_activity_file(self, file_path: str, user_id: int, source_type: str): """Parse a FIT/GPX file and insert activity + data points into DB.""" from app.services.fit_parser import parse_fit_file, parse_gpx_file, calculate_hr_zones from app.core.database import SyncSessionLocal from app.models.user import Activity, ActivityDataPoint, ActivityLap from sqlalchemy import select from datetime import datetime self.update_state(state="PROGRESS", meta={"step": "parsing"}) try: if source_type == "fit" or file_path.endswith(".fit"): parsed = parse_fit_file(file_path) else: parsed = parse_gpx_file(file_path) except Exception as e: raise self.retry(exc=e, countdown=10, max_retries=3) with SyncSessionLocal() as db: # Check for duplicate if parsed.get("garmin_activity_id"): existing = db.execute( select(Activity).where( Activity.garmin_activity_id == parsed["garmin_activity_id"] ) ).scalar_one_or_none() if existing: return {"activity_id": existing.id, "status": "duplicate"} hr_zones = calculate_hr_zones( parsed.get("data_points", []), parsed.get("max_heart_rate") or 190 ) start_time = datetime.fromisoformat(parsed["start_time"]) if parsed.get("start_time") else None activity = Activity( user_id=user_id, name=parsed["name"], sport_type=parsed["sport_type"], start_time=start_time, distance_m=parsed.get("distance_m"), duration_s=parsed.get("duration_s"), elevation_gain_m=parsed.get("elevation_gain_m"), elevation_loss_m=parsed.get("elevation_loss_m"), avg_heart_rate=parsed.get("avg_heart_rate"), max_heart_rate=parsed.get("max_heart_rate"), avg_cadence=parsed.get("avg_cadence"), avg_power=parsed.get("avg_power"), normalized_power=parsed.get("normalized_power"), avg_speed_ms=parsed.get("avg_speed_ms"), max_speed_ms=parsed.get("max_speed_ms"), avg_temperature_c=parsed.get("avg_temperature_c"), calories=parsed.get("calories"), training_stress_score=parsed.get("training_stress_score"), polyline=parsed.get("polyline"), bounding_box=parsed.get("bounding_box"), source_file=file_path, source_type=parsed.get("source_type"), hr_zones=hr_zones, ) db.add(activity) db.flush() # Insert data points in batches - dedupe (activity_id, timestamp) pairs # since composite PK rejects duplicates and Garmin sometimes has same-second readings seen = set() points = parsed.get("data_points", []) batch = [] for p in points: if not p.get("timestamp"): continue ts = datetime.fromisoformat(p["timestamp"]) if isinstance(p["timestamp"], str) else p["timestamp"] key = (activity.id, ts) if key in seen: continue seen.add(key) batch.append(ActivityDataPoint( activity_id=activity.id, timestamp=ts, latitude=p.get("latitude"), longitude=p.get("longitude"), altitude_m=p.get("altitude_m"), heart_rate=p.get("heart_rate"), cadence=p.get("cadence"), speed_ms=p.get("speed_ms"), power=p.get("power"), temperature_c=p.get("temperature_c"), distance_m=p.get("distance_m"), )) if len(batch) >= 500: db.add_all(batch) db.flush() batch = [] if batch: db.add_all(batch) db.flush() # Laps for lap in parsed.get("laps", []): ls = datetime.fromisoformat(lap["start_time"]) if lap.get("start_time") else None db.add(ActivityLap( activity_id=activity.id, lap_number=lap["lap_number"], start_time=ls, duration_s=lap.get("duration_s"), distance_m=lap.get("distance_m"), avg_heart_rate=lap.get("avg_heart_rate"), avg_cadence=lap.get("avg_cadence"), avg_speed_ms=lap.get("avg_speed_ms"), avg_power=lap.get("avg_power"), )) db.commit() activity_id = activity.id # Queue PR calculation compute_personal_records.delay(activity_id, user_id, parsed) return {"activity_id": activity_id, "status": "ok"} @celery_app.task(name="compute_personal_records") def compute_personal_records(activity_id: int, user_id: int, parsed: dict): """Calculate personal records for standard distances from this activity.""" from app.services.route_matcher import compute_best_splits, STANDARD_DISTANCES from app.core.database import SyncSessionLocal from app.models.user import PersonalRecord from sqlalchemy import select from datetime import datetime, timezone data_points = parsed.get("data_points", []) total_dist = parsed.get("distance_m", 0) or 0 sport = parsed.get("sport_type", "running") start_time_str = parsed.get("start_time") start_time = datetime.fromisoformat(start_time_str) if start_time_str else datetime.now(timezone.utc) best_splits = compute_best_splits(data_points, total_dist) with SyncSessionLocal() as db: for label, duration_s in best_splits.items(): dist_m = next((d for d, l in STANDARD_DISTANCES if l == label), None) if dist_m is None: continue current = db.execute( select(PersonalRecord).where( PersonalRecord.user_id == user_id, PersonalRecord.sport_type == sport, PersonalRecord.distance_m == dist_m, PersonalRecord.is_current_record == True, ) ).scalar_one_or_none() if current is None or duration_s < current.duration_s: if current: current.is_current_record = False db.add(PersonalRecord( user_id=user_id, activity_id=activity_id, sport_type=sport, distance_m=dist_m, distance_label=label, duration_s=duration_s, achieved_at=start_time, is_current_record=True, )) db.commit() @celery_app.task(name="process_garmin_health_zip") def process_garmin_health_zip(zip_path: str, user_id: int): """Extract wellness/sleep/HRV data from a Garmin Connect export ZIP.""" import zipfile import json from app.core.database import SyncSessionLocal from app.models.user import HealthMetric from datetime import datetime, timezone with SyncSessionLocal() as db: with zipfile.ZipFile(zip_path) as zf: for name in zf.namelist(): if "DailyMetrics" not in name or not name.endswith(".json"): continue with zf.open(name) as f: try: data = json.load(f) except Exception: continue date_str = data.get("calendarDate") or data.get("date") if not date_str: continue try: date = datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) except ValueError: continue metric = HealthMetric( user_id=user_id, date=date, resting_hr=data.get("restingHeartRate"), steps=data.get("totalSteps"), floors_climbed=data.get("floorsAscended"), active_calories=data.get("activeKilocalories"), total_calories=data.get("totalKilocalories"), avg_stress=data.get("averageStressLevel"), spo2_avg=data.get("avgSpo2"), ) db.add(metric) db.commit()