commit c74790b01425b99beb5474e0cdb6cd74641345fc Author: Kieran Bolt-Biggs Date: Tue Aug 12 01:13:41 2025 +0100 Initial commit (clean, ignores in place) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1cdad15 --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +# Python cache +__pycache__/ +*.pyc +*.pyo +*.pyd + +# Personal Google Timeline data +Timeline.json + +# Output: keep folder, ignore contents +output/** +!output/.gitkeep + +# Envs +.env +.venv/ +*.env + +# OS cruft +.DS_Store +Thumbs.db + diff --git a/README.md b/README.md new file mode 100644 index 0000000..6856b79 --- /dev/null +++ b/README.md @@ -0,0 +1,219 @@ +# Mileage Logger + +Small, reliable mileage-logging tool that ingests **Google Maps Timeline** exports from Android (device-local) and produces **one Excel workbook per month** ready for HR (EveryHR). + +- Detects ordered work trips between whitelisted sites (schools, HQ, Home) +- Computes per-hop miles (route catalog → fallback straight-line) +- Writes `mileage_YYYY-MM.xlsx` with the exact columns HR needs +- Filters by date (e.g., **last month**), skips weekends, and (by default) assumes each workday starts at **Home** + +--- + +## Quick Start + +### 1) Export Timeline on your phone +Android: `Settings → Location → Location Services → Google Timeline → Export` +Copy **`Timeline.json`** to your computer. + +### 2) Put it here +```bash +~/Downloads/Mileage Logger/Timeline.json +``` + +### 3) Install dependencies +```bash +# in your chosen venv/conda env +pip install -r requirements.txt +``` + +### 4) Run (monthly workflow: previous calendar month, weekdays only) +```bash +cd ~/Downloads/Mileage\ Logger +python -m mileage_logger.cli import "Timeline.json" --sites config/sites.yml --routes tests/data/routes_golden.csv --output ~/Downloads/Mileage\ Logger/output --last-month --weekdays-only +``` + +### 5) Find your Excel +Look in `output/` for files like `mileage_YYYY-MM.xlsx`. + +--- + +## One‑word alias (Linux/macOS bash/zsh) +Add this to your `~/.bashrc` or `~/.zshrc`: +```bash +alias mileage='( cd ~/Downloads/Mileage\ Logger && python -m mileage_logger.cli import "Timeline.json" --sites config/sites.yml --routes tests/data/routes_golden.csv --output ~/Downloads/Mileage\ Logger/output --last-month --weekdays-only )' +``` +Then run: +```bash +mileage +``` + +--- + +## Installing + +### Windows (PowerShell) +```powershell +# Run in PowerShell (inside your venv): +pip install -r requirements.txt +function mileage { + Set-Location "$HOME\Downloads\Mileage Logger" + python -m mileage_logger.cli import "Timeline.json" ` + --sites config\sites.yml ` + --routes tests\data outes_golden.csv ` + --output "$HOME\Downloads\Mileage Logger\output" ` + --last-month ` + --weekdays-only +} +``` + +### macOS / Linux +```bash +pip install -r requirements.txt +# (optional) add the alias from Quick Start to your shell rc +``` + +--- + +## Configuration + +### `config/sites.yml` +Define your whitelisted locations. + +```yaml +sites: + - canonical: "Home" + label: "Home" + lat: 52.65236 + lon: 1.26983 + radius_m: 400 + aliases: ["home", "inferred_home", "INFERRED_HOME"] + + - canonical: "Henderson Green Primary Academy" + label: "Henderson Green Primary Academy" + lat: 52.6565032 + lon: 1.2703586 + radius_m: 300 + aliases: ["Henderson Green", "Henderson Green Primary"] + + - canonical: "Unity SP" + label: "Unity SP (HQ)" + lat: 52.6067 + lon: 1.2886 + radius_m: 350 + aliases: ["WORK", "Unity SP HQ"] + + # ...other schools... +``` + +Tips: +- **radius_m**: start at 300–600 m for schools (device visits often pin to car parks, not building centroids). +- Add aliases you see in Timeline (e.g., `WORK`, `INFERRED_HOME`). + +### `tests/data/routes_golden.csv` +Pre‑approved driving distances (miles) for common pairs. + +``` +origin,destination,miles +Home,Henderson Green Primary Academy,2.9 +Henderson Green Primary Academy,Home,2.9 +Henderson Green Primary Academy,Valley Primary Academy,1.1 +Valley Primary Academy,Heartsease Primary Academy,4.7 +# ...etc +``` +If a pair isn’t found, the tool falls back to straight‑line (haversine) distance and rounds to 0.1 mi. + +--- + +## CLI Usage + +```bash +python -m mileage_logger.cli import TIMELINE.json --sites config/sites.yml --routes tests/data/routes_golden.csv --output output [date filter flags] [behavior flags] +``` + +### Date filter flags (choose one style) +- `--last-month` — previous calendar month (e.g., run on 1 Sep → processes August) +- `--month YYYY-MM` — specific month (e.g., `--month 2025-08`) +- `--since YYYY-MM-DD` / `--until YYYY-MM-DD` — inclusive range +- `--days N` — last N days relative to today (local time) + +### Behavior flags +- `--weekdays-only` — exclude Saturday/Sunday hops +- `--no-assume-home-start` — disable synthetic `Home → FirstSite` at start of day (default is ON) + +--- + +## Output +One workbook per month present in the filtered data: +- File: `mileage_YYYY-MM.xlsx` +- Sheet: `YYYY-MM` +- Columns: + - Date (YYYY-MM-DD) + - Purpose (`Travel from {From} to {To} {Miles}mi`) + - Miles (rounded to 0.1) + - Vehicle + - Job Role + - From + - To + - Notes (empty) + +--- + +## For another user/colleague +- Copy the whole folder. +- Edit `config/sites.yml`: + - Update **Home** lat/lon and radius. + - Keep schools the same unless their coverage differs. +- Update/add `tests/data/routes_golden.csv` lines for Home↔Sites if their home is different. +- Confirm the alias points to their own path. +- Run the same monthly command. + +--- + +## Requirements +- Python 3.10+ (tested on 3.10–3.13) +- Works on Windows, macOS, Linux + +### `requirements.txt` +``` +python-dateutil>=2.8 +pytz>=2023.3 +PyYAML>=6.0 +openpyxl>=3.1 +geopy>=2.4 +# Optional: only needed if you enable external routing APIs later +httpx>=0.27 +``` + +--- + +## Makefile (optional convenience) +```make +install: + pip install -r requirements.txt + +run: + python -m mileage_logger.cli import "Timeline.json" \ --sites config/sites.yml \ --routes tests/data/routes_golden.csv \ --output output \ --last-month \ --weekdays-only + +# If you add linting/tests later: +lint: + flake8 mileage_logger + +test: + pytest +``` + +Usage: +```bash +make install +make run +``` + +--- + +## Troubleshooting +- **No rows appear** — Increase `radius_m` (e.g., 400–600 m). Ensure aliases like `INFERRED_HOME`, `WORK` exist for Home/HQ. +- **Trips start mid‑day** — Ensure you didn’t pass `--no-assume-home-start`. +- **Only one workbook created** — Likely only one month had hops in the filtered range (expected with `--last-month`). +- **“Module not found” errors** — Activate the same environment you installed deps into (or reinstall with `pip install -r requirements.txt`). + +--- diff --git a/config/sites.yml b/config/sites.yml new file mode 100644 index 0000000..bed5ae6 --- /dev/null +++ b/config/sites.yml @@ -0,0 +1,83 @@ +--- +# Example site configuration for the mileage logging tool. +# +# Each entry defines a work location by canonical name along with a +# friendly label, geographic coordinates and a geofence radius. The +# aliases list contains alternative names that may be present in the +# Google exports. You should adapt these values to suit your actual +# locations and coordinates. + +sites: + - canonical: "Home" + label: "Home" + lat: 52.651379 + lon: 1.269954 + radius_m: 200 + aliases: + - "Home" + - "134 Bowers Avenue, Norwich, England" + - canonical: "Lingwood Primary Academy" + label: "Lingwood Primary Academy" + lat: 52.619605 + lon: 1.492807 + radius_m: 200 + aliases: + - "Lingwood Primary Academy" + - "Lingwood Primary School" + - canonical: "Valley Primary Academy" + label: "Valley Primary Academy" + lat: 52.642878 + lon: 1.242512 + radius_m: 200 + aliases: + - "Valley Primary Academy" + - "Valley Primary School" + - canonical: "Heartsease Primary Academy" + label: "Heartsease Primary Academy" + lat: 52.641949 + lon: 1.328999 + radius_m: 200 + aliases: + - "Heartsease Primary Academy" + - "Heartsease Primary School" + - canonical: "Henderson Green Primary Academy" + label: "Henderson Green Primary Academy" + lat: 52.635781 + lon: 1.253487 + radius_m: 200 + aliases: + - "Henderson Green Primary Academy" + - "Henderson Green Primary School" + - canonical: "Robert Kett Primary School" + label: "Robert Kett Primary School" + lat: 52.578774 + lon: 1.114982 + radius_m: 200 + aliases: + - "Robert Kett Primary Academy" + - "Robert Kett Primary School" + - "Robert Kett School" + - canonical: "Colman Infant School" + label: "Colman Infant School" + lat: 52.620224 + lon: 1.265285 + radius_m: 200 + aliases: + - "Colman Infant Academy" + - "Colman Infant School" + - canonical: "Colman Junior School" + label: "Colman Junior School" + lat: 52.62041 + lon: 1.262594 + radius_m: 200 + aliases: + - "Colman Junior Academy" + - "Colman Junior School" + - canonical: "Unity SP" + label: "Unity SP" + lat: 52.4610 + lon: 1.3500 + radius_m: 200 + aliases: + - "Unity SP" + - "Unity SP HQ" diff --git a/mileage_logger/__init__.py b/mileage_logger/__init__.py new file mode 100644 index 0000000..dda299e --- /dev/null +++ b/mileage_logger/__init__.py @@ -0,0 +1,38 @@ +"""Top level package for the mileage logging tool. + +This package provides a set of modules used to ingest Google Semantic +Location History data, detect work related travel itineraries based on a +whitelisted set of sites, resolve driving distances between those sites +and export the resulting mileage claims into an Excel workbook ready +for submission to a HR system. + +The project is organised into subpackages: + +* :mod:`mileage_logger.ingest` – parse Google Takeout JSON exports + into structured Python objects. +* :mod:`mileage_logger.logic` – implement the state machine that + identifies ordered hops between recognised locations in a day’s + timeline. +* :mod:`mileage_logger.distance` – resolve distances via a route + catalogue or, optionally, an external API with caching. +* :mod:`mileage_logger.export` – write Excel workbooks or CSV files + containing the final mileage log. +* :mod:`mileage_logger.cli` – command line interface for invoking + common workflows such as importing a new export or rebuilding a + monthly workbook. + +This package requires Python 3.11 or newer. See the README for +installation and usage instructions. +""" + +from .ingest import semantic_reader # noqa: F401 +from .logic import detect_itinerary # noqa: F401 +from .distance import resolve # noqa: F401 +from .export import excel_writer # noqa: F401 + +__all__ = [ + "semantic_reader", + "detect_itinerary", + "resolve", + "excel_writer", +] \ No newline at end of file diff --git a/mileage_logger/cli.py b/mileage_logger/cli.py new file mode 100644 index 0000000..cf666a0 --- /dev/null +++ b/mileage_logger/cli.py @@ -0,0 +1,189 @@ +"""Command line interface for the mileage logging tool.""" + +from __future__ import annotations + +import argparse +import os +from datetime import date, datetime, timedelta +from typing import Optional, Tuple + +import pytz + +from .ingest.semantic_reader import load_place_visits +from .logic.detect_itinerary import SiteConfig, detect_itinerary +from .distance.resolve import DistanceResolver +from .export.excel_writer import build_monthly_rows, write_monthly_workbook + + +TZ = pytz.timezone("Europe/London") + + +def _today_local() -> date: + return datetime.now(TZ).date() + + +def _prev_month_bounds(today: Optional[date] = None) -> Tuple[date, date]: + """Return (start_date, end_date) for the previous calendar month in Europe/London.""" + if today is None: + today = _today_local() + first_this_month = today.replace(day=1) + last_prev_month = first_this_month - timedelta(days=1) + start_prev_month = last_prev_month.replace(day=1) + return start_prev_month, last_prev_month + + +def _month_bounds(ym: str) -> Tuple[date, date]: + """Return (start_date, end_date) for the given YYYY-MM.""" + year, month = map(int, ym.split("-")) + start = date(year, month, 1) + if month == 12: + end = date(year + 1, 1, 1) - timedelta(days=1) + else: + end = date(year, month + 1, 1) - timedelta(days=1) + return start, end + + +def _parse_date(s: str) -> date: + y, m, d = map(int, s.split("-")) + return date(y, m, d) + + +def import_file( + json_path: str, + site_config_path: str, + route_csv_path: str, + output_dir: str, + assume_home_start: bool, + weekdays_only: bool, + month: Optional[str], + last_month: bool, + since: Optional[str], + until: Optional[str], + days: Optional[int], +) -> None: + """Import a single JSON file and write Excel workbooks (one per month).""" + visits = load_place_visits(json_path) + if not visits: + print(f"No place visits found in {json_path}") + return + + # 1) Determine date range filter + start_date: Optional[date] = None + end_date: Optional[date] = None + + if month: + start_date, end_date = _month_bounds(month) + elif last_month: + start_date, end_date = _prev_month_bounds() + elif since or until: + if since: + start_date = _parse_date(since) + if until: + end_date = _parse_date(until) + elif days: + end_date = _today_local() + start_date = end_date - timedelta(days=days - 1) + + # 2) Apply date filtering to visits (by visit.start_time local date) + if start_date or end_date: + def in_range(v): + d = v.start_time.date() + if start_date and d < start_date: + return False + if end_date and d > end_date: + return False + return True + visits = [v for v in visits if in_range(v)] + if not visits: + label = f"{start_date or ''}..{end_date or ''}" + print(f"No place visits in requested range {label}") + return + + site_config = SiteConfig.from_yaml(site_config_path) + hops = detect_itinerary(visits, site_config, assume_home_start=assume_home_start) + if not hops: + print("No recognised hops detected after filtering.") + return + + # 3) Weekday filter (Sat=5, Sun=6) + if weekdays_only: + hops = [h for h in hops if h.date.weekday() < 5] + if not hops: + print("All hops fell on weekends; nothing to write.") + return + + resolver = DistanceResolver(route_csv_path) + rows_by_month = build_monthly_rows(hops, site_config, resolver) + + # 4) Write one workbook per month present + os.makedirs(output_dir, exist_ok=True) + for month_key, rows in sorted(rows_by_month.items()): + # If a specific month/range was requested, rows_by_month will already reflect it. + output_path = os.path.join(output_dir, f"mileage_{month_key}.xlsx") + write_monthly_workbook({month_key: rows}, output_path) + print(f"Wrote {output_path} ({len(rows)} rows)") + + +def main(argv: list[str] | None = None) -> None: + parser = argparse.ArgumentParser(description="Mileage logging tool") + subparsers = parser.add_subparsers(dest="command", required=True) + + import_parser = subparsers.add_parser("import", help="Import a single JSON export") + import_parser.add_argument("json_path", help="Path to the JSON file to import") + import_parser.add_argument( + "--sites", dest="site_config_path", + default=os.path.join(os.path.dirname(__file__), "../config/sites.yml"), + help="Path to the sites.yml configuration", + ) + import_parser.add_argument( + "--routes", dest="route_csv_path", + default=os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv"), + help="Path to the routes CSV catalogue", + ) + import_parser.add_argument( + "--output", dest="output_dir", default=os.getcwd(), + help="Directory to write the Excel workbook(s)", + ) + + # Behavior toggles + import_parser.add_argument( + "--no-assume-home-start", action="store_true", + help="Do not inject a Home→first-site hop when a day doesn't start at Home.", + ) + import_parser.add_argument( + "--weekdays-only", action="store_true", + help="Exclude Saturday/Sunday hops.", + ) + + # Date filters (choose one style) + import_parser.add_argument("--last-month", action="store_true", + help="Process the previous calendar month.") + import_parser.add_argument("--month", metavar="YYYY-MM", + help="Process a specific calendar month, e.g. 2025-08.") + import_parser.add_argument("--since", metavar="YYYY-MM-DD", + help="Lower bound (inclusive) for visits to process.") + import_parser.add_argument("--until", metavar="YYYY-MM-DD", + help="Upper bound (inclusive) for visits to process.") + import_parser.add_argument("--days", type=int, + help="Process the last N days (relative to today).") + + args = parser.parse_args(argv) + if args.command == "import": + import_file( + args.json_path, + args.site_config_path, + args.route_csv_path, + args.output_dir, + assume_home_start=(not args.no_assume_home_start), + weekdays_only=args.weekdays_only, + month=args.month, + last_month=args.last_month, + since=args.since, + until=args.until, + days=args.days, + ) + + +if __name__ == "__main__": + main() + diff --git a/mileage_logger/distance/__init__.py b/mileage_logger/distance/__init__.py new file mode 100644 index 0000000..2e5252c --- /dev/null +++ b/mileage_logger/distance/__init__.py @@ -0,0 +1,12 @@ +"""Distance resolution utilities. + +This subpackage exposes classes and functions that resolve driving +distances between pairs of recognised sites. The primary +implementation is :class:`DistanceResolver`, which first consults a +pre-defined route catalogue before optionally consulting an external +API and finally falling back to a simple geodesic calculation. +""" + +from .resolve import DistanceResolver, haversine_distance + +__all__ = ["DistanceResolver", "haversine_distance"] \ No newline at end of file diff --git a/mileage_logger/distance/resolve.py b/mileage_logger/distance/resolve.py new file mode 100644 index 0000000..71eebd7 --- /dev/null +++ b/mileage_logger/distance/resolve.py @@ -0,0 +1,210 @@ +"""Resolve driving distances between sites. + +The :class:`DistanceResolver` class provides a simple mechanism to +determine the distance in miles between two points. It is designed to +prefer a local route catalogue (CSV) if available, fall back to +external API calls when API keys are configured and, as a last +resort, compute a straight-line distance using the haversine +formula. + +Caching is performed to avoid repeated API calls or calculations. A +time-to-live (TTL) can be specified when constructing the resolver +although it is currently not enforced in the simple in-memory +implementation. Distances are rounded to one decimal place as +required by HR mileage claim forms. +""" + +from __future__ import annotations + +import csv +import math +import os +import time +from dataclasses import dataclass +from datetime import datetime +from typing import Dict, Optional, Tuple + +try: + import httpx # type: ignore +except ImportError: # Optional dependency. If unavailable, API calls will be skipped. + httpx = None # type: ignore + +from ..logic.detect_itinerary import haversine_distance + + +@dataclass +class _CacheEntry: + distance: float + timestamp: float + + +class DistanceResolver: + """Resolve driving distances between two locations. + + The resolver consults an in-memory cache, a local route catalogue, + an optional external API and finally falls back to a straight-line + calculation using the haversine formula. Distances are cached for + the lifetime of the object. Rounding to one decimal mile is + applied uniformly. + """ + + def __init__(self, route_csv_path: Optional[str] = None, api_key: Optional[str] = None, + http_client: Optional[object] = None, ttl_seconds: float = 365 * 24 * 3600, + vehicle_label: str = "SH11 DRV (Own 1.6CC Diesel Car/Van)", job_role: str = "ICT Technician"): + """Initialise the distance resolver. + + Parameters + ---------- + route_csv_path : str, optional + Path to a CSV file containing pre-approved route distances. + The file should have at least three columns: origin, + destination and miles. The entries are assumed to be + directional; if symmetric distances are desired both + directions must be provided. + api_key : str, optional + API key for the Google Routes API. If omitted, API calls + will be skipped. + http_client : :class:`httpx.Client`, optional + HTTP client instance to use for API requests. A new client + will be created if not provided. + ttl_seconds : float, optional + Time-to-live for cache entries in seconds. Expired + entries are recomputed on demand. The default is one year. + """ + + self.api_key = api_key + # Only store an HTTP client if provided and httpx is available. + # When httpx is unavailable the client will be ignored and API + # calls will be skipped. + self.http_client = http_client if httpx is not None else None + self.ttl_seconds = ttl_seconds + self.vehicle_label = vehicle_label + self.job_role = job_role + self.cache: Dict[Tuple[str, str], _CacheEntry] = {} + # Load route catalogue + self.route_catalog: Dict[Tuple[str, str], float] = {} + if route_csv_path and os.path.exists(route_csv_path): + with open(route_csv_path, "r", encoding="utf-8") as f: + reader = csv.reader(f) + for row in reader: + if not row or row[0].startswith("#"): + continue + try: + origin, destination, miles_str = row[:3] + miles = float(miles_str) + self.route_catalog[(origin.strip(), destination.strip())] = miles + except Exception: + # Skip malformed entries silently + continue + + def _get_from_cache(self, origin: str, dest: str) -> Optional[float]: + """Retrieve a cached distance if present and unexpired.""" + entry = self.cache.get((origin, dest)) + if entry is None: + return None + if (time.time() - entry.timestamp) > self.ttl_seconds: + # Expired + return None + return entry.distance + + def _set_cache(self, origin: str, dest: str, distance: float) -> None: + """Cache the given distance for the origin/destination pair.""" + self.cache[(origin, dest)] = _CacheEntry(distance=distance, timestamp=time.time()) + + def resolve(self, origin_name: str, dest_name: str, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> float: + """Resolve the distance between two sites in miles. + + This method will consult the cache, route catalogue, external API + and finally compute a haversine distance. Once resolved, the + distance is cached and rounded to one decimal place. + + Parameters + ---------- + origin_name : str + Canonical name of the origin site. Used for cache and + catalogue lookups. + dest_name : str + Canonical name of the destination site. + origin_coords : tuple(float, float) + Latitude and longitude of the origin in decimal degrees. + dest_coords : tuple(float, float) + Latitude and longitude of the destination in decimal degrees. + + Returns + ------- + float + The resolved driving distance in miles, rounded to one + decimal place. + """ + + # First check the cache + cached = self._get_from_cache(origin_name, dest_name) + if cached is not None: + return cached + # Second consult the route catalogue + catalogue_key = (origin_name, dest_name) + if catalogue_key in self.route_catalog: + dist = self.route_catalog[catalogue_key] + rounded = round(dist, 1) + self._set_cache(origin_name, dest_name, rounded) + return rounded + # Attempt to call external API if configured + if self.api_key: + try: + dist = self._call_google_routes_api(origin_coords, dest_coords) + if dist is not None: + rounded = round(dist, 1) + self._set_cache(origin_name, dest_name, rounded) + return rounded + except Exception: + # Swallow API errors and fall back + pass + # Fall back to haversine distance + dist = haversine_distance(origin_coords[0], origin_coords[1], dest_coords[0], dest_coords[1]) + rounded = round(dist, 1) + self._set_cache(origin_name, dest_name, rounded) + return rounded + + def _call_google_routes_api(self, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> Optional[float]: + """Call the Google Maps Routes API to compute driving distance. + + Note that this is a blocking call. The caller should ensure that + network access is permitted and that a valid API key has been + configured. If the request fails or the response cannot be + parsed, ``None`` is returned. + """ + + # Construct the API request + # See https://developers.google.com/maps/documentation/routes for details + base_url = "https://routes.googleapis.com/directions/v2:computeRoutes" + # Compose JSON payload + payload = { + "origin": {"location": {"latLng": {"latitude": origin_coords[0], "longitude": origin_coords[1]}}}, + "destination": {"location": {"latLng": {"latitude": dest_coords[0], "longitude": dest_coords[1]}}}, + "travelMode": "DRIVE", + "routingPreference": "TRAFFIC_AWARE", + "computeAlternativeRoutes": False, + "units": "IMPERIAL", + } + headers = { + "Content-Type": "application/json", + "X-Goog-Api-Key": self.api_key, + "X-Goog-FieldMask": "routes.duration,routes.distanceMeters", + } + # If httpx is unavailable, or no API key is configured, skip API call + if httpx is None or self.http_client is None: + return None + resp = self.http_client.post(base_url, json=payload, headers=headers) + if resp.status_code != 200: + return None + try: + data = resp.json() + routes = data.get("routes") or [] + if not routes: + return None + # Distance is returned in meters; convert to miles + meters = routes[0]["distanceMeters"] + miles = meters / 1609.34 + return float(miles) + except Exception: + return None \ No newline at end of file diff --git a/mileage_logger/export/__init__.py b/mileage_logger/export/__init__.py new file mode 100644 index 0000000..4c160f5 --- /dev/null +++ b/mileage_logger/export/__init__.py @@ -0,0 +1,5 @@ +"""Export utilities for writing mileage logs to Excel and CSV.""" + +from .excel_writer import write_monthly_workbook, build_monthly_rows + +__all__ = ["write_monthly_workbook", "build_monthly_rows"] \ No newline at end of file diff --git a/mileage_logger/export/excel_writer.py b/mileage_logger/export/excel_writer.py new file mode 100644 index 0000000..0162b92 --- /dev/null +++ b/mileage_logger/export/excel_writer.py @@ -0,0 +1,122 @@ +"""Write mileage itineraries to Excel workbooks. + +This module uses :mod:`openpyxl` to construct a workbook with one sheet +per month. Each row corresponds to a single hop between recognised +sites. Columns follow the specification used by the EveryHR system: + +* ``Date`` – calendar date in ISO format (YYYY-MM-DD). +* ``Purpose`` – free text summarising the journey, e.g. ``"Travel from + Home to Lingwood Primary Academy 13.2mi"``. +* ``Miles`` – numeric value rounded to one decimal place. +* ``Vehicle`` – the vehicle descriptor configured for the user. +* ``Job Role`` – the job role of the user. +* ``From`` – friendly label of the origin site. +* ``To`` – friendly label of the destination site. +* ``Notes`` – blank for manual additions. + +Rows are grouped by month (YYYY-MM). Each sheet is named after the +month and contains a header row followed by one row per hop in +chronological order. +""" + +from __future__ import annotations + +import os +from collections import defaultdict +from datetime import date +from typing import Dict, Iterable, List, Tuple + +from openpyxl import Workbook +from openpyxl.utils import get_column_letter + +from ..logic.detect_itinerary import Hop, SiteConfig + + +def build_monthly_rows(hops: Iterable[Hop], site_config: SiteConfig, distance_resolver) -> Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]]: + """Prepare rows grouped by month for Excel output. + + Parameters + ---------- + hops : iterable of :class:`Hop` + The hops produced by itinerary detection. + site_config : :class:`SiteConfig` + Used to look up friendly labels for canonical site names. + distance_resolver : object + An object with a ``resolve(origin_name, dest_name, origin_coords, dest_coords)`` + method that returns a distance in miles. See + :class:`~mileage_logger.distance.resolve.DistanceResolver`. + + Returns + ------- + dict mapping str -> list of tuples + Keys are month strings in the form ``YYYY-MM``. Values are + lists of tuples containing the data for each row: (date_str, + purpose, miles, vehicle, job_role, from_label, to_label, notes). + """ + + rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]] = defaultdict(list) + for hop in hops: + month_key = hop.date.strftime("%Y-%m") + origin_site = site_config.by_canonical.get(hop.origin) + dest_site = site_config.by_canonical.get(hop.destination) + if origin_site is None or dest_site is None: + continue + # Resolve distance + dist = distance_resolver.resolve( + hop.origin, + hop.destination, + (origin_site.lat, origin_site.lon), + (dest_site.lat, dest_site.lon), + ) + # Build purpose string + purpose = f"Travel from {origin_site.label} to {dest_site.label} {dist:.1f}mi" + rows_by_month[month_key].append( + ( + hop.date.isoformat(), + purpose, + dist, + distance_resolver.vehicle_label if hasattr(distance_resolver, "vehicle_label") else "SH11 DRV (Own 1.6CC Diesel Car/Van)", + distance_resolver.job_role if hasattr(distance_resolver, "job_role") else "ICT Technician", + origin_site.label, + dest_site.label, + "", + ) + ) + return rows_by_month + + +def write_monthly_workbook(rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]], output_path: str) -> None: + """Write the grouped rows into an Excel workbook. + + Parameters + ---------- + rows_by_month : dict + Mapping from month strings to lists of row tuples as returned + by :func:`build_monthly_rows`. + output_path : str + Path of the Excel workbook to write. Any existing file will be + overwritten. + """ + + wb = Workbook() + # Remove the default sheet created by openpyxl + default_sheet = wb.active + wb.remove(default_sheet) + for month, rows in sorted(rows_by_month.items()): + ws = wb.create_sheet(title=month) + # Write header + header = ["Date", "Purpose", "Miles", "Vehicle", "Job Role", "From", "To", "Notes"] + ws.append(header) + for row in rows: + ws.append(list(row)) + # Autosize columns (approximate) + for col_idx in range(1, len(header) + 1): + column_letter = get_column_letter(col_idx) + max_length = max( + len(str(ws.cell(row=r + 1, column=col_idx).value)) for r in range(len(rows) + 1) + ) + # Add a little extra padding + ws.column_dimensions[column_letter].width = max_length + 2 + # Ensure directory exists + os.makedirs(os.path.dirname(output_path), exist_ok=True) + wb.save(output_path) \ No newline at end of file diff --git a/mileage_logger/gui.py b/mileage_logger/gui.py new file mode 100644 index 0000000..ecb1a05 --- /dev/null +++ b/mileage_logger/gui.py @@ -0,0 +1,131 @@ +"""Simple web GUI for the mileage logger. + +This module exposes a FastAPI application that wraps the core +functionality of the mileage logger with a minimal HTML front end. It +allows a user to upload a Google Semantic Location History JSON file +and returns an Excel workbook containing their mileage claims. The +application also renders a basic status page showing the detected +itinerary. + +Usage +----- +Run the server using uvicorn: + +``` +uvicorn mileage_logger.gui:app --reload --port 8000 +``` + +Then navigate to ``http://localhost:8000`` in your web browser. Use +the form to upload a JSON export. After processing, the server will +return an Excel file for download. + +Limitations +----------- +This GUI is intentionally lightweight and is not designed for +concurrent multi-user access. It does not persist files on disk and +does not perform any authentication or authorisation. For production +use consider extending it with proper user management and storage. +""" + +from __future__ import annotations + +import json +import os +import tempfile +from io import BytesIO +from typing import Dict, List + +from fastapi import FastAPI, File, Form, UploadFile +from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse + +from .ingest.semantic_reader import load_place_visits +from .logic.detect_itinerary import SiteConfig, detect_itinerary +from .distance.resolve import DistanceResolver +from .export.excel_writer import build_monthly_rows, write_monthly_workbook + + +# Load configuration once at startup. You can change the path to +# config/sites.yml if you have customised it. The route catalogue is +# loaded on-demand when handling uploads. +DEFAULT_SITE_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "../config/sites.yml") +DEFAULT_ROUTE_CSV_PATH = os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv") + +site_config: SiteConfig = SiteConfig.from_yaml(DEFAULT_SITE_CONFIG_PATH) + +app = FastAPI(title="Mileage Logger GUI") + + +@app.get("/", response_class=HTMLResponse) +async def index() -> str: + """Render a simple upload form.""" + return """ + + + Mileage Logger + + +

Mileage Logger

+

Select a Google Takeout JSON file to process. The file + should contain the "timelineObjects" array from your Semantic + Location History export.

+
+ +

+ + +

+ + +

+ +
+ + + """ + + +@app.post("/process") +async def process_file( + file: UploadFile = File(...), + vehicle: str = Form("SH11 DRV (Own 1.6CC Diesel Car/Van)"), + job_role: str = Form("ICT Technician"), +) -> StreamingResponse: + """Handle upload and return an Excel workbook. + + The uploaded file is saved to a temporary file on disk and then + passed through the existing CLI pipeline. The resulting workbook + contains one sheet per month and is returned as a streaming + response. + """ + # Persist upload to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as tmp_in: + contents = await file.read() + tmp_in.write(contents) + tmp_in.flush() + input_path = tmp_in.name + # Parse visits and detect itinerary + visits = load_place_visits(input_path) + hops = detect_itinerary(visits, site_config) + resolver = DistanceResolver(route_csv_path=DEFAULT_ROUTE_CSV_PATH, vehicle_label=vehicle, job_role=job_role) + rows_by_month = build_monthly_rows(hops, site_config, resolver) + # Write workbook to in-memory buffer + output_stream = BytesIO() + # Use openpyxl to write into BytesIO via our helper + # Since write_monthly_workbook writes to a file, create another temp file + with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp_out: + write_monthly_workbook(rows_by_month, tmp_out.name) + tmp_out.flush() + # Read the file back into memory + tmp_out.seek(0) + data = tmp_out.read() + output_stream.write(data) + # Cleanup temporary files + try: + os.remove(input_path) + except Exception: + pass + # Prepare response + output_stream.seek(0) + filename = "mileage.xlsx" + headers = {"Content-Disposition": f"attachment; filename={filename}"} + return StreamingResponse(output_stream, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", headers=headers) \ No newline at end of file diff --git a/mileage_logger/ingest/__init__.py b/mileage_logger/ingest/__init__.py new file mode 100644 index 0000000..d02a1ef --- /dev/null +++ b/mileage_logger/ingest/__init__.py @@ -0,0 +1,18 @@ +"""Subpackage for data ingestion. + +The :mod:`mileage_logger.ingest` package contains utilities for reading +Google Semantic Location History JSON exports. The core entry point is +the :func:`load_place_visits` function which converts raw JSON into +structured :class:`PlaceVisit` objects. These objects expose +timezone-aware start and end timestamps as well as geographic +coordinates and the human readable name of the location. +""" + +from .semantic_reader import Location, PlaceVisit, ActivitySegment, load_place_visits + +__all__ = [ + "Location", + "PlaceVisit", + "ActivitySegment", + "load_place_visits", +] \ No newline at end of file diff --git a/mileage_logger/ingest/semantic_reader.py b/mileage_logger/ingest/semantic_reader.py new file mode 100644 index 0000000..2ef3795 --- /dev/null +++ b/mileage_logger/ingest/semantic_reader.py @@ -0,0 +1,258 @@ +"""Parser for Google Semantic Location History exports. + +Google Takeout and on-device exports of the Timeline API are provided +as JSON files under a ``timelineObjects`` key. Each entry in +``timelineObjects`` is either a ``placeVisit`` or an ``activitySegment``. +This module exposes data classes representing those events and a +convenient loader that normalises timestamps and coordinate formats. + +Timestamps in the source JSON are encoded as millisecond epoch +strings. When loaded these are converted into timezone-aware +:class:`datetime.datetime` objects. Coordinates in the JSON are stored +as integer multiples of 1e-7 degrees; we scale them to floats. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Iterable, List, Optional + +import pytz + + +@dataclass +class Location: + """A simple geographic location. + + Attributes + ---------- + lat : float + Latitude in decimal degrees. + lon : float + Longitude in decimal degrees. + name : str + Human readable name of the location as provided by Google. + """ + + lat: float + lon: float + name: str + + +@dataclass +class PlaceVisit: + """A visit to a single location for a period of time. + + Attributes + ---------- + location : :class:`Location` + The geographic coordinates and name of the place. + start_time : :class:`datetime` + The timezone-aware start timestamp of the visit. + end_time : :class:`datetime` + The timezone-aware end timestamp of the visit. + """ + + location: Location + start_time: datetime + end_time: datetime + + +@dataclass +class ActivitySegment: + """A movement between two locations. + + While not used directly in itinerary detection, activity segments + contain useful timing information that could be used to derive the + start date for a hop between recognised sites. This class is + provided for completeness and potential future use. + """ + + start_location: Location + end_location: Location + start_time: datetime + end_time: datetime + activity_type: str + + +def _ms_to_dt(ms: str, tz: pytz.BaseTzInfo) -> datetime: + """Convert a millisecond epoch string into a timezone-aware datetime. + + Parameters + ---------- + ms : str + Milliseconds since the Unix epoch encoded as a decimal string. + tz : :class:`pytz.tzinfo.BaseTzInfo` + The timezone into which to localise the resulting datetime. + + Returns + ------- + :class:`datetime` + A timezone-aware datetime corresponding to the input. + """ + + # Google exports store times in milliseconds since UTC epoch + ts = int(ms) / 1000.0 + utc_dt = datetime.fromtimestamp(ts, timezone.utc) + return utc_dt.astimezone(tz) + + +def _parse_location(raw: dict) -> Location: + """Parse a location dictionary from the export format. + + The export encodes lat/lon in integer multiples of 1e-7 degrees. + This helper scales the values into decimals and extracts the + ``name`` field. + + Parameters + ---------- + raw : dict + A mapping containing ``latitudeE7``, ``longitudeE7`` and + ``name`` keys. + + Returns + ------- + :class:`Location` + A populated location object. + """ + + lat = raw.get("latitudeE7") + lon = raw.get("longitudeE7") + name = raw.get("name", "") + return Location(lat=float(lat) / 1e7 if lat is not None else 0.0, + lon=float(lon) / 1e7 if lon is not None else 0.0, + name=name) + + +def load_place_visits(path: str, tz_name: str = "Europe/London") -> List[PlaceVisit]: + """Load all place visits from a Location History JSON file. + + This function supports both the legacy "Semantic Location History" + exports (containing a top-level ``timelineObjects`` array) and + newer on-device Timeline exports that expose a ``semanticSegments`` + array. In both cases the goal is to extract "place visits" – + periods of time spent at a single location. + + For legacy files the timestamps are millisecond epoch strings and + coordinates are encoded as integer multiples of 1e-7 degrees. For + device-local exports the timestamps are ISO 8601 strings with + timezone offsets and coordinates are stored in a ``latLng`` string + on the ``visit.topCandidate.placeLocation``. + + Parameters + ---------- + path : str + Path to the JSON file produced by Google Takeout or the + on-device Timeline export. + tz_name : str, optional + The name of the timezone used for localisation, by default + ``Europe/London``. See the ``pytz`` documentation for valid + identifiers. + + Returns + ------- + list of :class:`PlaceVisit` + A chronologically ordered list of place visits. + """ + + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + + tz = pytz.timezone(tz_name) + visits: List[PlaceVisit] = [] + + # Legacy Semantic Location History format + if "timelineObjects" in data and isinstance(data["timelineObjects"], list): + timeline_objects = data.get("timelineObjects", []) + for obj in timeline_objects: + if "placeVisit" in obj: + pv = obj["placeVisit"] + loc = _parse_location(pv.get("location", {})) + dur = pv.get("duration", {}) + start_ms = dur.get("startTimestampMs") + end_ms = dur.get("endTimestampMs") + if start_ms is None or end_ms is None: + # Skip malformed entries + continue + visits.append(PlaceVisit( + location=loc, + start_time=_ms_to_dt(start_ms, tz), + end_time=_ms_to_dt(end_ms, tz), + )) + elif "activitySegment" in obj: + # We ignore activity segments for now; they are parsed here + # only to support potential future features such as deriving + # more accurate hop start times. + seg = obj["activitySegment"] + start_loc = _parse_location(seg.get("startLocation", {})) + end_loc = _parse_location(seg.get("endLocation", {})) + dur = seg.get("duration", {}) + start_ms = dur.get("startTimestampMs") + end_ms = dur.get("endTimestampMs") + if start_ms is None or end_ms is None: + continue + # Create ActivitySegment instance (unused for now) + # The object is not appended to the visits list because + # itinerary detection only relies on place visits. + _ = ActivitySegment( + start_location=start_loc, + end_location=end_loc, + start_time=_ms_to_dt(start_ms, tz), + end_time=_ms_to_dt(end_ms, tz), + activity_type=seg.get("activityType", "UNKNOWN"), + ) + # New device-local Timeline export format + elif "semanticSegments" in data and isinstance(data["semanticSegments"], list): + try: + from dateutil import parser as dateutil_parser # type: ignore + except ImportError: + raise ImportError( + "python-dateutil is required to parse device-local Timeline exports. " + "Install it with 'pip install python-dateutil'." + ) + for segment in data["semanticSegments"]: + # Only interested in visit segments; skip activities and path-only entries + visit = segment.get("visit") + if not visit: + continue + # Extract start and end times (ISO 8601 with timezone offsets) + start_time_iso = segment.get("startTime") + end_time_iso = segment.get("endTime") + if not start_time_iso or not end_time_iso: + continue + try: + start_dt = dateutil_parser.isoparse(start_time_iso).astimezone(tz) + end_dt = dateutil_parser.isoparse(end_time_iso).astimezone(tz) + except (ValueError, OverflowError): + # Skip unparseable times + continue + # Extract coordinates; stored as "latLng": "lat°, lon°" + place_loc = visit.get("topCandidate", {}).get("placeLocation", {}) + latlng_str = place_loc.get("latLng") + if not latlng_str: + continue + # Strip degree symbol and split into lat/lon components + try: + lat_str, lon_str = [c.strip().replace("°", "") for c in latlng_str.split(",")] + lat = float(lat_str) + lon = float(lon_str) + except Exception: + continue + # Use the semantic type or label as the name if available + candidate = visit.get("topCandidate", {}) + name = candidate.get("label") or candidate.get("semanticType") or "" + visits.append(PlaceVisit( + location=Location(lat=lat, lon=lon, name=str(name)), + start_time=start_dt, + end_time=end_dt, + )) + # Ignore any other structures (e.g. rawSignals, userLocationProfile) + else: + # If the file doesn't contain known keys, return empty list + return [] + + # Sort visits chronologically by start time + visits.sort(key=lambda v: v.start_time) + return visits \ No newline at end of file diff --git a/mileage_logger/logic/__init__.py b/mileage_logger/logic/__init__.py new file mode 100644 index 0000000..43cda44 --- /dev/null +++ b/mileage_logger/logic/__init__.py @@ -0,0 +1,13 @@ +"""Business logic for detecting work itineraries. + +This package exposes functions used to interpret a chronologically +ordered list of :class:`PlaceVisit` objects and reduce them into a +sequence of 'hops' between recognised work locations. Recognition is +driven by a site configuration file (YAML) that defines canonical +names, friendly labels, optional aliases and geofences for each +location. +""" + +from .detect_itinerary import SiteConfig, SiteEntry, Hop, detect_itinerary + +__all__ = ["SiteConfig", "SiteEntry", "Hop", "detect_itinerary"] \ No newline at end of file diff --git a/mileage_logger/logic/detect_itinerary.py b/mileage_logger/logic/detect_itinerary.py new file mode 100644 index 0000000..2ba0092 --- /dev/null +++ b/mileage_logger/logic/detect_itinerary.py @@ -0,0 +1,176 @@ +"""Detect ordered hops between whitelisted sites in a day's timeline. + +We process visits per calendar day (Europe/London), resetting state each +day. We also support injecting a synthetic Home→FirstSite hop when the +first recognised site of the day isn't Home (assume_home_start). +""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import date +from typing import Dict, Iterable, List, Optional, Tuple +from collections import defaultdict +import math +import yaml + +from ..ingest.semantic_reader import Location, PlaceVisit + + +def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """Compute the great-circle distance between two points in miles.""" + R = 3958.8 # Earth radius in miles + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlambda = math.radians(lon2 - lon1) + a = math.sin(dphi / 2.0) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2.0) ** 2 + c = 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a))) + return R * c + + +@dataclass +class SiteEntry: + """Represents a single recognised site from the configuration.""" + canonical: str + label: str + lat: float + lon: float + radius_m: float + aliases: List[str] + + +class SiteConfig: + """Holds all recognised site definitions keyed by canonical name.""" + + def __init__(self, sites: Iterable[SiteEntry]): + self.by_canonical: Dict[str, SiteEntry] = {s.canonical: s for s in sites} + self.alias_map: Dict[str, str] = {} + for site in sites: + for alias in [site.canonical] + site.aliases: + self.alias_map[alias.lower()] = site.canonical + + @classmethod + def from_yaml(cls, path: str) -> "SiteConfig": + """Load a site configuration from a YAML file.""" + with open(path, "r", encoding="utf-8") as f: + raw = yaml.safe_load(f) + sites_data: List[Dict[str, object]] = [] + if isinstance(raw, list): + sites_data = raw + elif isinstance(raw, dict): + if "sites" in raw and isinstance(raw["sites"], list): + sites_data = raw["sites"] + else: + for canon, entry in raw.items(): + entry = entry or {} + if not isinstance(entry, dict): + raise ValueError("Site entry for %s must be a mapping" % canon) + entry = dict(entry) + entry.setdefault("canonical", canon) + sites_data.append(entry) + else: + raise ValueError("Invalid site configuration format") + sites: List[SiteEntry] = [] + for entry in sites_data: + canonical = entry.get("canonical") or entry.get("name") + if not canonical: + raise ValueError("Site entry missing canonical name") + label = entry.get("label", canonical) + lat = float(entry.get("lat", 0.0)) + lon = float(entry.get("lon", 0.0)) + radius_m = float(entry.get("radius_m", 0.0)) + aliases = entry.get("aliases") or [] + sites.append(SiteEntry( + canonical=canonical, + label=label, + lat=lat, + lon=lon, + radius_m=radius_m, + aliases=list(aliases), + )) + return cls(sites) + + def recognise(self, location: Location) -> Optional[str]: + """Return canonical site name if this location matches by name/alias or geofence.""" + name_lower = (location.name or "").lower() + # Pass 1: name/alias substring match + for alias, canonical in self.alias_map.items(): + if alias in name_lower: + return canonical + # Pass 2: geofence match + for canonical, site in self.by_canonical.items(): + if site.radius_m > 0: + max_dist_miles = site.radius_m / 1609.34 + d = haversine_distance(location.lat, location.lon, site.lat, site.lon) + if d <= max_dist_miles: + return canonical + return None + + +@dataclass +class Hop: + """A hop from one recognised site to another, dated by the origin's start date.""" + date: date + origin: str + destination: str + + +def _build_day_hops(day_visits: List[PlaceVisit], site_config: SiteConfig, assume_home_start: bool) -> List[Hop]: + """Build ordered hops for a single day of visits.""" + # Ensure chronological order by *start* time + day_visits = sorted(day_visits, key=lambda v: v.start_time) + + recognised: List[Tuple[str, PlaceVisit]] = [] + last_site: Optional[str] = None + for v in day_visits: + s = site_config.recognise(v.location) + if not s: + continue + if s == last_site: + continue # ignore duplicates back-to-back + recognised.append((s, v)) + last_site = s + + if not recognised: + return [] + + # Inject Home at start if enabled and first site isn't Home + if assume_home_start and recognised[0][0] != "Home": + first_time = recognised[0][1].start_time + synthetic_home = PlaceVisit(location=Location(lat=0.0, lon=0.0, name="Home"), + start_time=first_time, end_time=first_time) + recognised.insert(0, ("Home", synthetic_home)) + + # Walk forward, stop at second Home + hops: List[Hop] = [] + home_hits = 1 if recognised and recognised[0][0] == "Home" else 0 + for i in range(1, len(recognised)): + origin_site, origin_visit = recognised[i - 1] + dest_site, _dest_visit = recognised[i] + hop_date = origin_visit.start_time.date() + if origin_site != dest_site: + hops.append(Hop(date=hop_date, origin=origin_site, destination=dest_site)) + if dest_site == "Home": + home_hits += 1 + if home_hits >= 2: + break + return hops + + +def detect_itinerary(visits: List[PlaceVisit], site_config: SiteConfig, *, assume_home_start: bool = True) -> List[Hop]: + """Reduce all visits into ordered hops per day, concatenated across the file.""" + if not visits: + return [] + + # Group by the local date from each visit's start_time + by_day: Dict[date, List[PlaceVisit]] = defaultdict(list) + for v in visits: + by_day[v.start_time.date()].append(v) + + hops_all: List[Hop] = [] + for day in sorted(by_day.keys()): + day_hops = _build_day_hops(by_day[day], site_config, assume_home_start=assume_home_start) + hops_all.extend(day_hops) + return hops_all + diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ef3214a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +python-dateutil>=2.8 +pytz>=2023.3 +PyYAML>=6.0 +openpyxl>=3.1 +geopy>=2.4 +# Optional: only needed if you enable external routing APIs later +httpx>=0.27 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..789957a --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,8 @@ +"""Test suite for the mileage logging tool. + +The tests in this package exercise the core components of the mileage +logger. They use simple JSON fixtures to simulate a user's Google +Semantic Location History exports and assert that the itinerary +detection, distance resolution and Excel export modules behave as +expected. +""" \ No newline at end of file diff --git a/tests/data/routes_golden.csv b/tests/data/routes_golden.csv new file mode 100644 index 0000000..dfe8d98 --- /dev/null +++ b/tests/data/routes_golden.csv @@ -0,0 +1,73 @@ +# origin,destination,miles +Home,Lingwood Primary Academy,12.9 +Home,Henderson Green Primary Academy,2.9 +Home,Valley Primary Academy,2.1 +Home,Heartsease Primary Academy,3.4 +Home,Colman Junior School,3.1 +Home,Colman Infant School,3.1 +Home,Robert Kett Primary School,11.2 +Home,Unity HQ,64.0 +Lingwood Primary Academy,Home,12.9 +Lingwood Primary Academy,Henderson Green Primary Academy,12.8 +Lingwood Primary Academy,Valley Primary Academy,13.4 +Lingwood Primary Academy,Heartsease Primary Academy,9.5999999999999996 +Lingwood Primary Academy,Colman Junior School,11.300000000000001 +Lingwood Primary Academy,Colman Infant School,11.1 +Lingwood Primary Academy,Robert Kett Primary School,18.699999999999999 +Lingwood Primary Academy,Unity HQ,63.100000000000001 +Henderson Green Primary Academy,Home,2.8999999999999999 +Henderson Green Primary Academy,Lingwood Primary Academy,12.800000000000001 +Henderson Green Primary Academy,Valley Primary Academy,1.1000000000000001 +Henderson Green Primary Academy,Heartsease Primary Academy,4.5 +Henderson Green Primary Academy,Colman Junior School,1.8 +Henderson Green Primary Academy,Colman Infant School,1.8 +Henderson Green Primary Academy,Robert Kett Primary School,8 +Henderson Green Primary Academy,Unity HQ,63.100000000000001 +Valley Primary Academy,Home,2.1000000000000001 +Valley Primary Academy,Lingwood Primary Academy,13.4 +Valley Primary Academy,Henderson Green Primary Academy,1.1000000000000001 +Valley Primary Academy,Heartsease Primary Academy,4.7000000000000002 +Valley Primary Academy,Colman Junior School,2.2999999999999998 +Valley Primary Academy,Colman Infant School,2.2999999999999998 +Valley Primary Academy,Robert Kett Primary School,8 +Valley Primary Academy,Unity HQ,64.700000000000003 +Heartsease Primary Academy,Home,3.3999999999999999 +Heartsease Primary Academy,Lingwood Primary Academy,9.5999999999999996 +Heartsease Primary Academy,Henderson Green Primary Academy,4.5 +Heartsease Primary Academy,Valley Primary Academy,4.7000000000000002 +Heartsease Primary Academy,Colman Junior School,4.0999999999999996 +Heartsease Primary Academy,Colman Infant School,3.8999999999999999 +Heartsease Primary Academy,Robert Kett Primary School,14 +Heartsease Primary Academy,Unity HQ,68.299999999999997 +Colman Junior School,Home,3.1000000000000001 +Colman Junior School,Lingwood Primary Academy,11.300000000000001 +Colman Junior School,Henderson Green Primary Academy,1.8 +Colman Junior School,Valley Primary Academy,2.2999999999999998 +Colman Junior School,Heartsease Primary Academy,4.0999999999999996 +Colman Junior School,Colman Infant School,0.20000000000000001 +Colman Junior School,Robert Kett Primary School,8.4000000000000004 +Colman Junior School,Unity HQ,62.600000000000001 +Colman Infant School,Home,3.1000000000000001 +Colman Infant School,Lingwood Primary Academy,11.1 +Colman Infant School,Henderson Green Primary Academy,1.8 +Colman Infant School,Valley Primary Academy,2.2999999999999998 +Colman Infant School,Heartsease Primary Academy,3.8999999999999999 +Colman Infant School,Colman Junior School,0.20000000000000001 +Colman Infant School,Robert Kett Primary School,8.0999999999999996 +Colman Infant School,Unity HQ,62.399999999999999 +Robert Kett Primary School,Home,11.199999999999999 +Robert Kett Primary School,Lingwood Primary Academy,18.699999999999999 +Robert Kett Primary School,Henderson Green Primary Academy,8 +Robert Kett Primary School,Valley Primary Academy,8 +Robert Kett Primary School,Heartsease Primary Academy,14 +Robert Kett Primary School,Colman Junior School,8.4000000000000004 +Robert Kett Primary School,Colman Infant School,8.0999999999999996 +Robert Kett Primary School,Unity HQ,57.299999999999997 +Unity HQ,Home,64 +Unity HQ,Lingwood Primary Academy,63.100000000000001 +Unity HQ,Henderson Green Primary Academy,72.400000000000006 +Unity HQ,Valley Primary Academy,64.700000000000003 +Unity HQ,Heartsease Primary Academy,68.299999999999997 +Unity HQ,Colman Junior School,62.600000000000001 +Unity HQ,Colman Infant School,62.399999999999999 +Unity HQ,Robert Kett Primary School,66.299999999999997 diff --git a/tests/fixtures/semantic/2025-08-08.one_day_simple.json b/tests/fixtures/semantic/2025-08-08.one_day_simple.json new file mode 100644 index 0000000..e379b76 --- /dev/null +++ b/tests/fixtures/semantic/2025-08-08.one_day_simple.json @@ -0,0 +1,107 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1723090800000", + "endTimestampMs": "1723092600000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "endLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "duration": { + "startTimestampMs": "1723092600000", + "endTimestampMs": "1723093200000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524706000, + "longitudeE7": 13538000, + "name": "Lingwood Primary Academy" + }, + "duration": { + "startTimestampMs": "1723093200000", + "endTimestampMs": "1723111200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "endLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "duration": { + "startTimestampMs": "1723111200000", + "endTimestampMs": "1723112400000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524634000, + "longitudeE7": 13627000, + "name": "Heartsease Primary Academy" + }, + "duration": { + "startTimestampMs": "1723112400000", + "endTimestampMs": "1723123200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "endLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "duration": { + "startTimestampMs": "1723123200000", + "endTimestampMs": "1723125600000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1723125600000", + "endTimestampMs": "1723168800000" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/semantic/2025-08-12.looping.json b/tests/fixtures/semantic/2025-08-12.looping.json new file mode 100644 index 0000000..1486932 --- /dev/null +++ b/tests/fixtures/semantic/2025-08-12.looping.json @@ -0,0 +1,137 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1754980200000", + "endTimestampMs": "1754982000000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "endLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "duration": { + "startTimestampMs": "1754982000000", + "endTimestampMs": "1754982900000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524706000, + "longitudeE7": 13538000, + "name": "Lingwood Primary Academy" + }, + "duration": { + "startTimestampMs": "1754982900000", + "endTimestampMs": "1754989200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "endLocation": { + "latitudeE7": 524710000, + "longitudeE7": 13590000 + }, + "duration": { + "startTimestampMs": "1754989200000", + "endTimestampMs": "1754990100000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524710000, + "longitudeE7": 13590000, + "name": "Valley Primary Academy" + }, + "duration": { + "startTimestampMs": "1754990100000", + "endTimestampMs": "1754996400000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524710000, + "longitudeE7": 13590000 + }, + "endLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "duration": { + "startTimestampMs": "1754996400000", + "endTimestampMs": "1754997300000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524706000, + "longitudeE7": 13538000, + "name": "Lingwood Primary Academy" + }, + "duration": { + "startTimestampMs": "1754997300000", + "endTimestampMs": "1755003600000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "endLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "duration": { + "startTimestampMs": "1755003600000", + "endTimestampMs": "1755004500000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1755004500000", + "endTimestampMs": "1755014400000" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/semantic/cross_midnight.json b/tests/fixtures/semantic/cross_midnight.json new file mode 100644 index 0000000..e1e127f --- /dev/null +++ b/tests/fixtures/semantic/cross_midnight.json @@ -0,0 +1,77 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1754683200000", + "endTimestampMs": "1754692200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "endLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "duration": { + "startTimestampMs": "1754692200000", + "endTimestampMs": "1754695800000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524634000, + "longitudeE7": 13627000, + "name": "Heartsease Primary Academy" + }, + "duration": { + "startTimestampMs": "1754695800000", + "endTimestampMs": "1754701200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "endLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "duration": { + "startTimestampMs": "1754701200000", + "endTimestampMs": "1754703000000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1754703000000", + "endTimestampMs": "1754712000000" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/semantic/day_with_detours.json b/tests/fixtures/semantic/day_with_detours.json new file mode 100644 index 0000000..5fbe5da --- /dev/null +++ b/tests/fixtures/semantic/day_with_detours.json @@ -0,0 +1,137 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1755154800000", + "endTimestampMs": "1755156600000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "endLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "duration": { + "startTimestampMs": "1755156600000", + "endTimestampMs": "1755157500000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524706000, + "longitudeE7": 13538000, + "name": "Lingwood Primary Academy" + }, + "duration": { + "startTimestampMs": "1755157500000", + "endTimestampMs": "1755162000000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524706000, + "longitudeE7": 13538000 + }, + "endLocation": { + "latitudeE7": 524670000, + "longitudeE7": 13550000 + }, + "duration": { + "startTimestampMs": "1755162000000", + "endTimestampMs": "1755162900000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524670000, + "longitudeE7": 13550000, + "name": "Nice Cafe" + }, + "duration": { + "startTimestampMs": "1755162900000", + "endTimestampMs": "1755165600000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524670000, + "longitudeE7": 13550000 + }, + "endLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "duration": { + "startTimestampMs": "1755165600000", + "endTimestampMs": "1755166500000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524634000, + "longitudeE7": 13627000, + "name": "Heartsease Primary Academy" + }, + "duration": { + "startTimestampMs": "1755166500000", + "endTimestampMs": "1755172800000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "endLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "duration": { + "startTimestampMs": "1755172800000", + "endTimestampMs": "1755173700000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1755173700000", + "endTimestampMs": "1755190800000" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/semantic/no_home_start.json b/tests/fixtures/semantic/no_home_start.json new file mode 100644 index 0000000..2a8ece0 --- /dev/null +++ b/tests/fixtures/semantic/no_home_start.json @@ -0,0 +1,77 @@ +{ + "timelineObjects": [ + { + "placeVisit": { + "location": { + "latitudeE7": 524610000, + "longitudeE7": 13500000, + "name": "Unity SP" + }, + "duration": { + "startTimestampMs": "1755244800000", + "endTimestampMs": "1755248400000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524610000, + "longitudeE7": 13500000 + }, + "endLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "duration": { + "startTimestampMs": "1755248400000", + "endTimestampMs": "1755250200000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524634000, + "longitudeE7": 13627000, + "name": "Heartsease Primary Academy" + }, + "duration": { + "startTimestampMs": "1755250200000", + "endTimestampMs": "1755259200000" + } + } + }, + { + "activitySegment": { + "startLocation": { + "latitudeE7": 524634000, + "longitudeE7": 13627000 + }, + "endLocation": { + "latitudeE7": 524649000, + "longitudeE7": 13460000 + }, + "duration": { + "startTimestampMs": "1755259200000", + "endTimestampMs": "1755261000000" + }, + "activityType": "IN_PASSENGER_VEHICLE" + } + }, + { + "placeVisit": { + "location": { + "latitudeE7": 524649000, + "longitudeE7": 13460000, + "name": "Home" + }, + "duration": { + "startTimestampMs": "1755261000000", + "endTimestampMs": "1755277200000" + } + } + } + ] +} \ No newline at end of file diff --git a/tests/test_distance_resolver.py b/tests/test_distance_resolver.py new file mode 100644 index 0000000..297b926 --- /dev/null +++ b/tests/test_distance_resolver.py @@ -0,0 +1,42 @@ +import os +import unittest + +from mileage_logger.logic.detect_itinerary import SiteConfig, haversine_distance +from mileage_logger.distance.resolve import DistanceResolver + + +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "..", "config", "sites.yml") +ROUTES_PATH = os.path.join(os.path.dirname(__file__), "data", "routes_golden.csv") + + +class TestDistanceResolver(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.site_config = SiteConfig.from_yaml(CONFIG_PATH) + cls.resolver = DistanceResolver(route_csv_path=ROUTES_PATH) + + def test_route_lookup(self): + origin = "Home" + dest = "Lingwood Primary Academy" + origin_site = self.site_config.by_canonical[origin] + dest_site = self.site_config.by_canonical[dest] + dist = self.resolver.resolve(origin, dest, (origin_site.lat, origin_site.lon), (dest_site.lat, dest_site.lon)) + self.assertAlmostEqual(dist, 13.0, places=1) + # Second call should hit cache and return same + dist2 = self.resolver.resolve(origin, dest, (origin_site.lat, origin_site.lon), (dest_site.lat, dest_site.lon)) + self.assertEqual(dist2, dist) + + def test_fallback_haversine(self): + # Choose a pair not in the route catalogue + origin = "Lingwood Primary Academy" + dest = "Unity SP" + origin_site = self.site_config.by_canonical[origin] + dest_site = self.site_config.by_canonical[dest] + dist = self.resolver.resolve(origin, dest, (origin_site.lat, origin_site.lon), (dest_site.lat, dest_site.lon)) + # Compute haversine expected + expected = haversine_distance(origin_site.lat, origin_site.lon, dest_site.lat, dest_site.lon) + self.assertAlmostEqual(dist, expected, places=1) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_excel_writer.py b/tests/test_excel_writer.py new file mode 100644 index 0000000..9991444 --- /dev/null +++ b/tests/test_excel_writer.py @@ -0,0 +1,91 @@ +import os +import tempfile +import unittest + +from openpyxl import load_workbook + +from mileage_logger.ingest.semantic_reader import load_place_visits +from mileage_logger.logic.detect_itinerary import SiteConfig, detect_itinerary +from mileage_logger.distance.resolve import DistanceResolver +from mileage_logger.export.excel_writer import build_monthly_rows, write_monthly_workbook + + +FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "semantic") +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "..", "config", "sites.yml") +ROUTES_PATH = os.path.join(os.path.dirname(__file__), "data", "routes_golden.csv") + + +class TestExcelWriter(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.site_config = SiteConfig.from_yaml(CONFIG_PATH) + + def _build_workbook(self, fixture_file: str) -> str: + visits = load_place_visits(os.path.join(FIXTURES_DIR, fixture_file)) + hops = detect_itinerary(visits, self.site_config) + resolver = DistanceResolver(route_csv_path=ROUTES_PATH) + rows_by_month = build_monthly_rows(hops, self.site_config, resolver) + # Write to a temporary file + fd, path = tempfile.mkstemp(suffix=".xlsx") + os.close(fd) + write_monthly_workbook(rows_by_month, path) + return path + + def test_excel_layout_simple(self): + # Build workbook for the simple fixture + path = self._build_workbook("2025-08-08.one_day_simple.json") + try: + wb = load_workbook(path) + # Determine expected month from the first hop date + visits = load_place_visits(os.path.join(FIXTURES_DIR, "2025-08-08.one_day_simple.json")) + hops = detect_itinerary(visits, self.site_config) + if not hops: + self.fail("No hops detected for simple fixture") + expected_month = hops[0].date.strftime("%Y-%m") + self.assertIn(expected_month, wb.sheetnames) + ws = wb[expected_month] + rows = list(ws.iter_rows(values_only=True)) + # number of hops + header + self.assertEqual(len(rows), len(hops) + 1) + header = rows[0] + expected_header = ("Date", "Purpose", "Miles", "Vehicle", "Job Role", "From", "To", "Notes") + self.assertEqual(header, expected_header) + # Validate each hop row + resolver = DistanceResolver(route_csv_path=ROUTES_PATH) + for i, hop in enumerate(hops, start=1): + row = rows[i] + origin_site = self.site_config.by_canonical[hop.origin] + dest_site = self.site_config.by_canonical[hop.destination] + dist = resolver.resolve(hop.origin, hop.destination, (origin_site.lat, origin_site.lon), (dest_site.lat, dest_site.lon)) + self.assertEqual(row[0], hop.date.isoformat()) + expected_purpose = f"Travel from {origin_site.label} to {dest_site.label} {dist:.1f}mi" + self.assertEqual(row[1], expected_purpose) + self.assertAlmostEqual(float(row[2]), dist, places=1) + self.assertEqual(row[3], resolver.vehicle_label) + self.assertEqual(row[4], resolver.job_role) + self.assertEqual(row[5], origin_site.label) + self.assertEqual(row[6], dest_site.label) + # Notes may be returned as None when reading from Excel + self.assertIn(row[7] or "", ["", None]) + finally: + os.unlink(path) + + def test_cross_midnight_sheet_rows(self): + path = self._build_workbook("cross_midnight.json") + try: + wb = load_workbook(path) + # Should still be month 2025-08 + self.assertIn("2025-08", wb.sheetnames) + ws = wb["2025-08"] + rows = list(ws.iter_rows(values_only=True)) + # two hops -> 3 rows including header + self.assertEqual(len(rows), 3) + # Dates should span two days + dates = [r[0] for r in rows[1:]] + self.assertEqual(dates, ["2025-08-08", "2025-08-09"]) + finally: + os.unlink(path) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_itinerary.py b/tests/test_itinerary.py new file mode 100644 index 0000000..e52949b --- /dev/null +++ b/tests/test_itinerary.py @@ -0,0 +1,79 @@ +import os +import unittest +from datetime import date + +from mileage_logger.ingest.semantic_reader import load_place_visits +from mileage_logger.logic.detect_itinerary import SiteConfig, detect_itinerary + + +FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "semantic") +CONFIG_PATH = os.path.join(os.path.dirname(__file__), "..", "config", "sites.yml") + + +class TestItineraryDetection(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.site_config = SiteConfig.from_yaml(CONFIG_PATH) + + def _load_visits(self, filename: str): + path = os.path.join(FIXTURES_DIR, filename) + return load_place_visits(path) + + def test_one_day_simple(self): + visits = self._load_visits("2025-08-08.one_day_simple.json") + hops = detect_itinerary(visits, self.site_config) + canonical_pairs = [(h.origin, h.destination) for h in hops] + expected = [ + ("Home", "Lingwood Primary Academy"), + ("Lingwood Primary Academy", "Heartsease Primary Academy"), + ("Heartsease Primary Academy", "Home"), + ] + self.assertEqual(canonical_pairs, expected) + + def test_looping_multi_visit(self): + visits = self._load_visits("2025-08-12.looping.json") + hops = detect_itinerary(visits, self.site_config) + canonical_pairs = [(h.origin, h.destination) for h in hops] + expected = [ + ("Home", "Lingwood Primary Academy"), + ("Lingwood Primary Academy", "Valley Primary Academy"), + ("Valley Primary Academy", "Lingwood Primary Academy"), + ("Lingwood Primary Academy", "Home"), + ] + self.assertEqual(canonical_pairs, expected) + + def test_detours_ignored(self): + visits = self._load_visits("day_with_detours.json") + hops = detect_itinerary(visits, self.site_config) + canonical_pairs = [(h.origin, h.destination) for h in hops] + expected = [ + ("Home", "Lingwood Primary Academy"), + ("Lingwood Primary Academy", "Heartsease Primary Academy"), + ("Heartsease Primary Academy", "Home"), + ] + self.assertEqual(canonical_pairs, expected) + + def test_no_home_start(self): + visits = self._load_visits("no_home_start.json") + hops = detect_itinerary(visits, self.site_config) + canonical_pairs = [(h.origin, h.destination) for h in hops] + expected = [ + ("Unity SP", "Heartsease Primary Academy"), + ("Heartsease Primary Academy", "Home"), + ] + self.assertEqual(canonical_pairs, expected) + + def test_cross_midnight_dates(self): + visits = self._load_visits("cross_midnight.json") + hops = detect_itinerary(visits, self.site_config) + self.assertEqual(len(hops), 2) + # Ensure canonical names + self.assertEqual((hops[0].origin, hops[0].destination), ("Home", "Heartsease Primary Academy")) + self.assertEqual((hops[1].origin, hops[1].destination), ("Heartsease Primary Academy", "Home")) + # Check dates (local) across midnight + self.assertEqual(hops[0].date, date(2025, 8, 8)) + self.assertEqual(hops[1].date, date(2025, 8, 9)) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file