Initial commit (clean, ignores in place)

2025-08-12 01:13:41 +01:00
commit c74790b014
26 changed files with 2331 additions and 0 deletions
--- a/mileage_logger/init.py
+++ b/mileage_logger/init.py
@@ -0,0 +1,38 @@
+"""Top level package for the mileage logging tool.
+
+This package provides a set of modules used to ingest Google Semantic
+Location History data, detect work related travel itineraries based on a
+whitelisted set of sites, resolve driving distances between those sites
+and export the resulting mileage claims into an Excel workbook ready
+for submission to a HR system.
+
+The project is organised into subpackages:
+
+* :mod:`mileage_logger.ingest` – parse Google Takeout JSON exports
+  into structured Python objects.
+* :mod:`mileage_logger.logic` – implement the state machine that
+  identifies ordered hops between recognised locations in a day’s
+  timeline.
+* :mod:`mileage_logger.distance` – resolve distances via a route
+  catalogue or, optionally, an external API with caching.
+* :mod:`mileage_logger.export` – write Excel workbooks or CSV files
+  containing the final mileage log.
+* :mod:`mileage_logger.cli` – command line interface for invoking
+  common workflows such as importing a new export or rebuilding a
+  monthly workbook.
+
+This package requires Python 3.11 or newer. See the README for
+installation and usage instructions.
+"""
+
+from .ingest import semantic_reader  # noqa: F401
+from .logic import detect_itinerary  # noqa: F401
+from .distance import resolve       # noqa: F401
+from .export import excel_writer    # noqa: F401
+
+__all__ = [
+    "semantic_reader",
+    "detect_itinerary",
+    "resolve",
+    "excel_writer",
+]
--- a/mileage_logger/cli.py
+++ b/mileage_logger/cli.py
@@ -0,0 +1,189 @@
+"""Command line interface for the mileage logging tool."""
+
+from __future__ import annotations
+
+import argparse
+import os
+from datetime import date, datetime, timedelta
+from typing import Optional, Tuple
+
+import pytz
+
+from .ingest.semantic_reader import load_place_visits
+from .logic.detect_itinerary import SiteConfig, detect_itinerary
+from .distance.resolve import DistanceResolver
+from .export.excel_writer import build_monthly_rows, write_monthly_workbook
+
+
+TZ = pytz.timezone("Europe/London")
+
+
+def _today_local() -> date:
+    return datetime.now(TZ).date()
+
+
+def _prev_month_bounds(today: Optional[date] = None) -> Tuple[date, date]:
+    """Return (start_date, end_date) for the previous calendar month in Europe/London."""
+    if today is None:
+        today = _today_local()
+    first_this_month = today.replace(day=1)
+    last_prev_month = first_this_month - timedelta(days=1)
+    start_prev_month = last_prev_month.replace(day=1)
+    return start_prev_month, last_prev_month
+
+
+def _month_bounds(ym: str) -> Tuple[date, date]:
+    """Return (start_date, end_date) for the given YYYY-MM."""
+    year, month = map(int, ym.split("-"))
+    start = date(year, month, 1)
+    if month == 12:
+        end = date(year + 1, 1, 1) - timedelta(days=1)
+    else:
+        end = date(year, month + 1, 1) - timedelta(days=1)
+    return start, end
+
+
+def _parse_date(s: str) -> date:
+    y, m, d = map(int, s.split("-"))
+    return date(y, m, d)
+
+
+def import_file(
+    json_path: str,
+    site_config_path: str,
+    route_csv_path: str,
+    output_dir: str,
+    assume_home_start: bool,
+    weekdays_only: bool,
+    month: Optional[str],
+    last_month: bool,
+    since: Optional[str],
+    until: Optional[str],
+    days: Optional[int],
+) -> None:
+    """Import a single JSON file and write Excel workbooks (one per month)."""
+    visits = load_place_visits(json_path)
+    if not visits:
+        print(f"No place visits found in {json_path}")
+        return
+
+    # 1) Determine date range filter
+    start_date: Optional[date] = None
+    end_date: Optional[date] = None
+
+    if month:
+        start_date, end_date = _month_bounds(month)
+    elif last_month:
+        start_date, end_date = _prev_month_bounds()
+    elif since or until:
+        if since:
+            start_date = _parse_date(since)
+        if until:
+            end_date = _parse_date(until)
+    elif days:
+        end_date = _today_local()
+        start_date = end_date - timedelta(days=days - 1)
+
+    # 2) Apply date filtering to visits (by visit.start_time local date)
+    if start_date or end_date:
+        def in_range(v):
+            d = v.start_time.date()
+            if start_date and d < start_date:
+                return False
+            if end_date and d > end_date:
+                return False
+            return True
+        visits = [v for v in visits if in_range(v)]
+        if not visits:
+            label = f"{start_date or ''}..{end_date or ''}"
+            print(f"No place visits in requested range {label}")
+            return
+
+    site_config = SiteConfig.from_yaml(site_config_path)
+    hops = detect_itinerary(visits, site_config, assume_home_start=assume_home_start)
+    if not hops:
+        print("No recognised hops detected after filtering.")
+        return
+
+    # 3) Weekday filter (Sat=5, Sun=6)
+    if weekdays_only:
+        hops = [h for h in hops if h.date.weekday() < 5]
+        if not hops:
+            print("All hops fell on weekends; nothing to write.")
+            return
+
+    resolver = DistanceResolver(route_csv_path)
+    rows_by_month = build_monthly_rows(hops, site_config, resolver)
+
+    # 4) Write one workbook per month present
+    os.makedirs(output_dir, exist_ok=True)
+    for month_key, rows in sorted(rows_by_month.items()):
+        # If a specific month/range was requested, rows_by_month will already reflect it.
+        output_path = os.path.join(output_dir, f"mileage_{month_key}.xlsx")
+        write_monthly_workbook({month_key: rows}, output_path)
+        print(f"Wrote {output_path} ({len(rows)} rows)")
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(description="Mileage logging tool")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    import_parser = subparsers.add_parser("import", help="Import a single JSON export")
+    import_parser.add_argument("json_path", help="Path to the JSON file to import")
+    import_parser.add_argument(
+        "--sites", dest="site_config_path",
+        default=os.path.join(os.path.dirname(__file__), "../config/sites.yml"),
+        help="Path to the sites.yml configuration",
+    )
+    import_parser.add_argument(
+        "--routes", dest="route_csv_path",
+        default=os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv"),
+        help="Path to the routes CSV catalogue",
+    )
+    import_parser.add_argument(
+        "--output", dest="output_dir", default=os.getcwd(),
+        help="Directory to write the Excel workbook(s)",
+    )
+
+    # Behavior toggles
+    import_parser.add_argument(
+        "--no-assume-home-start", action="store_true",
+        help="Do not inject a Home→first-site hop when a day doesn't start at Home.",
+    )
+    import_parser.add_argument(
+        "--weekdays-only", action="store_true",
+        help="Exclude Saturday/Sunday hops.",
+    )
+
+    # Date filters (choose one style)
+    import_parser.add_argument("--last-month", action="store_true",
+                               help="Process the previous calendar month.")
+    import_parser.add_argument("--month", metavar="YYYY-MM",
+                               help="Process a specific calendar month, e.g. 2025-08.")
+    import_parser.add_argument("--since", metavar="YYYY-MM-DD",
+                               help="Lower bound (inclusive) for visits to process.")
+    import_parser.add_argument("--until", metavar="YYYY-MM-DD",
+                               help="Upper bound (inclusive) for visits to process.")
+    import_parser.add_argument("--days", type=int,
+                               help="Process the last N days (relative to today).")
+
+    args = parser.parse_args(argv)
+    if args.command == "import":
+        import_file(
+            args.json_path,
+            args.site_config_path,
+            args.route_csv_path,
+            args.output_dir,
+            assume_home_start=(not args.no_assume_home_start),
+            weekdays_only=args.weekdays_only,
+            month=args.month,
+            last_month=args.last_month,
+            since=args.since,
+            until=args.until,
+            days=args.days,
+        )
+
+
+if __name__ == "__main__":
+    main()
+
--- a/mileage_logger/distance/init.py
+++ b/mileage_logger/distance/init.py
@@ -0,0 +1,12 @@
+"""Distance resolution utilities.
+
+This subpackage exposes classes and functions that resolve driving
+distances between pairs of recognised sites. The primary
+implementation is :class:`DistanceResolver`, which first consults a
+pre-defined route catalogue before optionally consulting an external
+API and finally falling back to a simple geodesic calculation.
+"""
+
+from .resolve import DistanceResolver, haversine_distance
+
+__all__ = ["DistanceResolver", "haversine_distance"]
--- a/mileage_logger/distance/resolve.py
+++ b/mileage_logger/distance/resolve.py
@@ -0,0 +1,210 @@
+"""Resolve driving distances between sites.
+
+The :class:`DistanceResolver` class provides a simple mechanism to
+determine the distance in miles between two points. It is designed to
+prefer a local route catalogue (CSV) if available, fall back to
+external API calls when API keys are configured and, as a last
+resort, compute a straight-line distance using the haversine
+formula.
+
+Caching is performed to avoid repeated API calls or calculations. A
+time-to-live (TTL) can be specified when constructing the resolver
+although it is currently not enforced in the simple in-memory
+implementation. Distances are rounded to one decimal place as
+required by HR mileage claim forms.
+"""
+
+from __future__ import annotations
+
+import csv
+import math
+import os
+import time
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Dict, Optional, Tuple
+
+try:
+    import httpx  # type: ignore
+except ImportError:  # Optional dependency. If unavailable, API calls will be skipped.
+    httpx = None  # type: ignore
+
+from ..logic.detect_itinerary import haversine_distance
+
+
+@dataclass
+class _CacheEntry:
+    distance: float
+    timestamp: float
+
+
+class DistanceResolver:
+    """Resolve driving distances between two locations.
+
+    The resolver consults an in-memory cache, a local route catalogue,
+    an optional external API and finally falls back to a straight-line
+    calculation using the haversine formula. Distances are cached for
+    the lifetime of the object. Rounding to one decimal mile is
+    applied uniformly.
+    """
+
+    def __init__(self, route_csv_path: Optional[str] = None, api_key: Optional[str] = None,
+                 http_client: Optional[object] = None, ttl_seconds: float = 365 * 24 * 3600,
+                 vehicle_label: str = "SH11 DRV (Own 1.6CC Diesel Car/Van)", job_role: str = "ICT Technician"):
+        """Initialise the distance resolver.
+
+        Parameters
+        ----------
+        route_csv_path : str, optional
+            Path to a CSV file containing pre-approved route distances.
+            The file should have at least three columns: origin,
+            destination and miles. The entries are assumed to be
+            directional; if symmetric distances are desired both
+            directions must be provided.
+        api_key : str, optional
+            API key for the Google Routes API. If omitted, API calls
+            will be skipped.
+        http_client : :class:`httpx.Client`, optional
+            HTTP client instance to use for API requests. A new client
+            will be created if not provided.
+        ttl_seconds : float, optional
+            Time-to-live for cache entries in seconds. Expired
+            entries are recomputed on demand. The default is one year.
+        """
+
+        self.api_key = api_key
+        # Only store an HTTP client if provided and httpx is available.
+        # When httpx is unavailable the client will be ignored and API
+        # calls will be skipped.
+        self.http_client = http_client if httpx is not None else None
+        self.ttl_seconds = ttl_seconds
+        self.vehicle_label = vehicle_label
+        self.job_role = job_role
+        self.cache: Dict[Tuple[str, str], _CacheEntry] = {}
+        # Load route catalogue
+        self.route_catalog: Dict[Tuple[str, str], float] = {}
+        if route_csv_path and os.path.exists(route_csv_path):
+            with open(route_csv_path, "r", encoding="utf-8") as f:
+                reader = csv.reader(f)
+                for row in reader:
+                    if not row or row[0].startswith("#"):
+                        continue
+                    try:
+                        origin, destination, miles_str = row[:3]
+                        miles = float(miles_str)
+                        self.route_catalog[(origin.strip(), destination.strip())] = miles
+                    except Exception:
+                        # Skip malformed entries silently
+                        continue
+
+    def _get_from_cache(self, origin: str, dest: str) -> Optional[float]:
+        """Retrieve a cached distance if present and unexpired."""
+        entry = self.cache.get((origin, dest))
+        if entry is None:
+            return None
+        if (time.time() - entry.timestamp) > self.ttl_seconds:
+            # Expired
+            return None
+        return entry.distance
+
+    def _set_cache(self, origin: str, dest: str, distance: float) -> None:
+        """Cache the given distance for the origin/destination pair."""
+        self.cache[(origin, dest)] = _CacheEntry(distance=distance, timestamp=time.time())
+
+    def resolve(self, origin_name: str, dest_name: str, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> float:
+        """Resolve the distance between two sites in miles.
+
+        This method will consult the cache, route catalogue, external API
+        and finally compute a haversine distance. Once resolved, the
+        distance is cached and rounded to one decimal place.
+
+        Parameters
+        ----------
+        origin_name : str
+            Canonical name of the origin site. Used for cache and
+            catalogue lookups.
+        dest_name : str
+            Canonical name of the destination site.
+        origin_coords : tuple(float, float)
+            Latitude and longitude of the origin in decimal degrees.
+        dest_coords : tuple(float, float)
+            Latitude and longitude of the destination in decimal degrees.
+
+        Returns
+        -------
+        float
+            The resolved driving distance in miles, rounded to one
+            decimal place.
+        """
+
+        # First check the cache
+        cached = self._get_from_cache(origin_name, dest_name)
+        if cached is not None:
+            return cached
+        # Second consult the route catalogue
+        catalogue_key = (origin_name, dest_name)
+        if catalogue_key in self.route_catalog:
+            dist = self.route_catalog[catalogue_key]
+            rounded = round(dist, 1)
+            self._set_cache(origin_name, dest_name, rounded)
+            return rounded
+        # Attempt to call external API if configured
+        if self.api_key:
+            try:
+                dist = self._call_google_routes_api(origin_coords, dest_coords)
+                if dist is not None:
+                    rounded = round(dist, 1)
+                    self._set_cache(origin_name, dest_name, rounded)
+                    return rounded
+            except Exception:
+                # Swallow API errors and fall back
+                pass
+        # Fall back to haversine distance
+        dist = haversine_distance(origin_coords[0], origin_coords[1], dest_coords[0], dest_coords[1])
+        rounded = round(dist, 1)
+        self._set_cache(origin_name, dest_name, rounded)
+        return rounded
+
+    def _call_google_routes_api(self, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> Optional[float]:
+        """Call the Google Maps Routes API to compute driving distance.
+
+        Note that this is a blocking call. The caller should ensure that
+        network access is permitted and that a valid API key has been
+        configured. If the request fails or the response cannot be
+        parsed, ``None`` is returned.
+        """
+
+        # Construct the API request
+        # See https://developers.google.com/maps/documentation/routes for details
+        base_url = "https://routes.googleapis.com/directions/v2:computeRoutes"
+        # Compose JSON payload
+        payload = {
+            "origin": {"location": {"latLng": {"latitude": origin_coords[0], "longitude": origin_coords[1]}}},
+            "destination": {"location": {"latLng": {"latitude": dest_coords[0], "longitude": dest_coords[1]}}},
+            "travelMode": "DRIVE",
+            "routingPreference": "TRAFFIC_AWARE",
+            "computeAlternativeRoutes": False,
+            "units": "IMPERIAL",
+        }
+        headers = {
+            "Content-Type": "application/json",
+            "X-Goog-Api-Key": self.api_key,
+            "X-Goog-FieldMask": "routes.duration,routes.distanceMeters",
+        }
+        # If httpx is unavailable, or no API key is configured, skip API call
+        if httpx is None or self.http_client is None:
+            return None
+        resp = self.http_client.post(base_url, json=payload, headers=headers)
+        if resp.status_code != 200:
+            return None
+        try:
+            data = resp.json()
+            routes = data.get("routes") or []
+            if not routes:
+                return None
+            # Distance is returned in meters; convert to miles
+            meters = routes[0]["distanceMeters"]
+            miles = meters / 1609.34
+            return float(miles)
+        except Exception:
+            return None
--- a/mileage_logger/export/init.py
+++ b/mileage_logger/export/init.py
@@ -0,0 +1,5 @@
+"""Export utilities for writing mileage logs to Excel and CSV."""
+
+from .excel_writer import write_monthly_workbook, build_monthly_rows
+
+__all__ = ["write_monthly_workbook", "build_monthly_rows"]
--- a/mileage_logger/export/excel_writer.py
+++ b/mileage_logger/export/excel_writer.py
@@ -0,0 +1,122 @@
+"""Write mileage itineraries to Excel workbooks.
+
+This module uses :mod:`openpyxl` to construct a workbook with one sheet
+per month. Each row corresponds to a single hop between recognised
+sites. Columns follow the specification used by the EveryHR system:
+
+* ``Date`` – calendar date in ISO format (YYYY-MM-DD).
+* ``Purpose`` – free text summarising the journey, e.g. ``"Travel from
+  Home to Lingwood Primary Academy 13.2mi"``.
+* ``Miles`` – numeric value rounded to one decimal place.
+* ``Vehicle`` – the vehicle descriptor configured for the user.
+* ``Job Role`` – the job role of the user.
+* ``From`` – friendly label of the origin site.
+* ``To`` – friendly label of the destination site.
+* ``Notes`` – blank for manual additions.
+
+Rows are grouped by month (YYYY-MM). Each sheet is named after the
+month and contains a header row followed by one row per hop in
+chronological order.
+"""
+
+from __future__ import annotations
+
+import os
+from collections import defaultdict
+from datetime import date
+from typing import Dict, Iterable, List, Tuple
+
+from openpyxl import Workbook
+from openpyxl.utils import get_column_letter
+
+from ..logic.detect_itinerary import Hop, SiteConfig
+
+
+def build_monthly_rows(hops: Iterable[Hop], site_config: SiteConfig, distance_resolver) -> Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]]:
+    """Prepare rows grouped by month for Excel output.
+
+    Parameters
+    ----------
+    hops : iterable of :class:`Hop`
+        The hops produced by itinerary detection.
+    site_config : :class:`SiteConfig`
+        Used to look up friendly labels for canonical site names.
+    distance_resolver : object
+        An object with a ``resolve(origin_name, dest_name, origin_coords, dest_coords)``
+        method that returns a distance in miles. See
+        :class:`~mileage_logger.distance.resolve.DistanceResolver`.
+
+    Returns
+    -------
+    dict mapping str -> list of tuples
+        Keys are month strings in the form ``YYYY-MM``. Values are
+        lists of tuples containing the data for each row: (date_str,
+        purpose, miles, vehicle, job_role, from_label, to_label, notes).
+    """
+
+    rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]] = defaultdict(list)
+    for hop in hops:
+        month_key = hop.date.strftime("%Y-%m")
+        origin_site = site_config.by_canonical.get(hop.origin)
+        dest_site = site_config.by_canonical.get(hop.destination)
+        if origin_site is None or dest_site is None:
+            continue
+        # Resolve distance
+        dist = distance_resolver.resolve(
+            hop.origin,
+            hop.destination,
+            (origin_site.lat, origin_site.lon),
+            (dest_site.lat, dest_site.lon),
+        )
+        # Build purpose string
+        purpose = f"Travel from {origin_site.label} to {dest_site.label} {dist:.1f}mi"
+        rows_by_month[month_key].append(
+            (
+                hop.date.isoformat(),
+                purpose,
+                dist,
+                distance_resolver.vehicle_label if hasattr(distance_resolver, "vehicle_label") else "SH11 DRV (Own 1.6CC Diesel Car/Van)",
+                distance_resolver.job_role if hasattr(distance_resolver, "job_role") else "ICT Technician",
+                origin_site.label,
+                dest_site.label,
+                "",
+            )
+        )
+    return rows_by_month
+
+
+def write_monthly_workbook(rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]], output_path: str) -> None:
+    """Write the grouped rows into an Excel workbook.
+
+    Parameters
+    ----------
+    rows_by_month : dict
+        Mapping from month strings to lists of row tuples as returned
+        by :func:`build_monthly_rows`.
+    output_path : str
+        Path of the Excel workbook to write. Any existing file will be
+        overwritten.
+    """
+
+    wb = Workbook()
+    # Remove the default sheet created by openpyxl
+    default_sheet = wb.active
+    wb.remove(default_sheet)
+    for month, rows in sorted(rows_by_month.items()):
+        ws = wb.create_sheet(title=month)
+        # Write header
+        header = ["Date", "Purpose", "Miles", "Vehicle", "Job Role", "From", "To", "Notes"]
+        ws.append(header)
+        for row in rows:
+            ws.append(list(row))
+        # Autosize columns (approximate)
+        for col_idx in range(1, len(header) + 1):
+            column_letter = get_column_letter(col_idx)
+            max_length = max(
+                len(str(ws.cell(row=r + 1, column=col_idx).value)) for r in range(len(rows) + 1)
+            )
+            # Add a little extra padding
+            ws.column_dimensions[column_letter].width = max_length + 2
+    # Ensure directory exists
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    wb.save(output_path)
--- a/mileage_logger/gui.py
+++ b/mileage_logger/gui.py
@@ -0,0 +1,131 @@
+"""Simple web GUI for the mileage logger.
+
+This module exposes a FastAPI application that wraps the core
+functionality of the mileage logger with a minimal HTML front end. It
+allows a user to upload a Google Semantic Location History JSON file
+and returns an Excel workbook containing their mileage claims. The
+application also renders a basic status page showing the detected
+itinerary.
+
+Usage
+-----
+Run the server using uvicorn:
+
+```
+uvicorn mileage_logger.gui:app --reload --port 8000
+```
+
+Then navigate to ``http://localhost:8000`` in your web browser. Use
+the form to upload a JSON export. After processing, the server will
+return an Excel file for download.
+
+Limitations
+-----------
+This GUI is intentionally lightweight and is not designed for
+concurrent multi-user access. It does not persist files on disk and
+does not perform any authentication or authorisation. For production
+use consider extending it with proper user management and storage.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tempfile
+from io import BytesIO
+from typing import Dict, List
+
+from fastapi import FastAPI, File, Form, UploadFile
+from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse
+
+from .ingest.semantic_reader import load_place_visits
+from .logic.detect_itinerary import SiteConfig, detect_itinerary
+from .distance.resolve import DistanceResolver
+from .export.excel_writer import build_monthly_rows, write_monthly_workbook
+
+
+# Load configuration once at startup. You can change the path to
+# config/sites.yml if you have customised it. The route catalogue is
+# loaded on-demand when handling uploads.
+DEFAULT_SITE_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "../config/sites.yml")
+DEFAULT_ROUTE_CSV_PATH = os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv")
+
+site_config: SiteConfig = SiteConfig.from_yaml(DEFAULT_SITE_CONFIG_PATH)
+
+app = FastAPI(title="Mileage Logger GUI")
+
+
+@app.get("/", response_class=HTMLResponse)
+async def index() -> str:
+    """Render a simple upload form."""
+    return """
+    <html>
+    <head>
+        <title>Mileage Logger</title>
+    </head>
+    <body>
+        <h1>Mileage Logger</h1>
+        <p>Select a Google Takeout JSON file to process. The file
+        should contain the "timelineObjects" array from your Semantic
+        Location History export.</p>
+        <form action="/process" method="post" enctype="multipart/form-data">
+            <input type="file" name="file" accept="application/json" required />
+            <br/><br/>
+            <label for="vehicle">Vehicle description:</label>
+            <input type="text" id="vehicle" name="vehicle" value="SH11 DRV (Own 1.6CC Diesel Car/Van)" />
+            <br/><br/>
+            <label for="job_role">Job role:</label>
+            <input type="text" id="job_role" name="job_role" value="ICT Technician" />
+            <br/><br/>
+            <input type="submit" value="Process" />
+        </form>
+    </body>
+    </html>
+    """
+
+
+@app.post("/process")
+async def process_file(
+    file: UploadFile = File(...),
+    vehicle: str = Form("SH11 DRV (Own 1.6CC Diesel Car/Van)"),
+    job_role: str = Form("ICT Technician"),
+) -> StreamingResponse:
+    """Handle upload and return an Excel workbook.
+
+    The uploaded file is saved to a temporary file on disk and then
+    passed through the existing CLI pipeline. The resulting workbook
+    contains one sheet per month and is returned as a streaming
+    response.
+    """
+    # Persist upload to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as tmp_in:
+        contents = await file.read()
+        tmp_in.write(contents)
+        tmp_in.flush()
+        input_path = tmp_in.name
+    # Parse visits and detect itinerary
+    visits = load_place_visits(input_path)
+    hops = detect_itinerary(visits, site_config)
+    resolver = DistanceResolver(route_csv_path=DEFAULT_ROUTE_CSV_PATH, vehicle_label=vehicle, job_role=job_role)
+    rows_by_month = build_monthly_rows(hops, site_config, resolver)
+    # Write workbook to in-memory buffer
+    output_stream = BytesIO()
+    # Use openpyxl to write into BytesIO via our helper
+    # Since write_monthly_workbook writes to a file, create another temp file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp_out:
+        write_monthly_workbook(rows_by_month, tmp_out.name)
+        tmp_out.flush()
+        # Read the file back into memory
+        tmp_out.seek(0)
+        data = tmp_out.read()
+        output_stream.write(data)
+    # Cleanup temporary files
+    try:
+        os.remove(input_path)
+    except Exception:
+        pass
+    # Prepare response
+    output_stream.seek(0)
+    filename = "mileage.xlsx"
+    headers = {"Content-Disposition": f"attachment; filename={filename}"}
+    return StreamingResponse(output_stream, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", headers=headers)
--- a/mileage_logger/ingest/init.py
+++ b/mileage_logger/ingest/init.py
@@ -0,0 +1,18 @@
+"""Subpackage for data ingestion.
+
+The :mod:`mileage_logger.ingest` package contains utilities for reading
+Google Semantic Location History JSON exports. The core entry point is
+the :func:`load_place_visits` function which converts raw JSON into
+structured :class:`PlaceVisit` objects. These objects expose
+timezone-aware start and end timestamps as well as geographic
+coordinates and the human readable name of the location.
+"""
+
+from .semantic_reader import Location, PlaceVisit, ActivitySegment, load_place_visits
+
+__all__ = [
+    "Location",
+    "PlaceVisit",
+    "ActivitySegment",
+    "load_place_visits",
+]
--- a/mileage_logger/ingest/semantic_reader.py
+++ b/mileage_logger/ingest/semantic_reader.py
@@ -0,0 +1,258 @@
+"""Parser for Google Semantic Location History exports.
+
+Google Takeout and on-device exports of the Timeline API are provided
+as JSON files under a ``timelineObjects`` key. Each entry in
+``timelineObjects`` is either a ``placeVisit`` or an ``activitySegment``.
+This module exposes data classes representing those events and a
+convenient loader that normalises timestamps and coordinate formats.
+
+Timestamps in the source JSON are encoded as millisecond epoch
+strings. When loaded these are converted into timezone-aware
+:class:`datetime.datetime` objects. Coordinates in the JSON are stored
+as integer multiples of 1e-7 degrees; we scale them to floats.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Iterable, List, Optional
+
+import pytz
+
+
+@dataclass
+class Location:
+    """A simple geographic location.
+
+    Attributes
+    ----------
+    lat : float
+        Latitude in decimal degrees.
+    lon : float
+        Longitude in decimal degrees.
+    name : str
+        Human readable name of the location as provided by Google.
+    """
+
+    lat: float
+    lon: float
+    name: str
+
+
+@dataclass
+class PlaceVisit:
+    """A visit to a single location for a period of time.
+
+    Attributes
+    ----------
+    location : :class:`Location`
+        The geographic coordinates and name of the place.
+    start_time : :class:`datetime`
+        The timezone-aware start timestamp of the visit.
+    end_time : :class:`datetime`
+        The timezone-aware end timestamp of the visit.
+    """
+
+    location: Location
+    start_time: datetime
+    end_time: datetime
+
+
+@dataclass
+class ActivitySegment:
+    """A movement between two locations.
+
+    While not used directly in itinerary detection, activity segments
+    contain useful timing information that could be used to derive the
+    start date for a hop between recognised sites. This class is
+    provided for completeness and potential future use.
+    """
+
+    start_location: Location
+    end_location: Location
+    start_time: datetime
+    end_time: datetime
+    activity_type: str
+
+
+def _ms_to_dt(ms: str, tz: pytz.BaseTzInfo) -> datetime:
+    """Convert a millisecond epoch string into a timezone-aware datetime.
+
+    Parameters
+    ----------
+    ms : str
+        Milliseconds since the Unix epoch encoded as a decimal string.
+    tz : :class:`pytz.tzinfo.BaseTzInfo`
+        The timezone into which to localise the resulting datetime.
+
+    Returns
+    -------
+    :class:`datetime`
+        A timezone-aware datetime corresponding to the input.
+    """
+
+    # Google exports store times in milliseconds since UTC epoch
+    ts = int(ms) / 1000.0
+    utc_dt = datetime.fromtimestamp(ts, timezone.utc)
+    return utc_dt.astimezone(tz)
+
+
+def _parse_location(raw: dict) -> Location:
+    """Parse a location dictionary from the export format.
+
+    The export encodes lat/lon in integer multiples of 1e-7 degrees.
+    This helper scales the values into decimals and extracts the
+    ``name`` field.
+
+    Parameters
+    ----------
+    raw : dict
+        A mapping containing ``latitudeE7``, ``longitudeE7`` and
+        ``name`` keys.
+
+    Returns
+    -------
+    :class:`Location`
+        A populated location object.
+    """
+
+    lat = raw.get("latitudeE7")
+    lon = raw.get("longitudeE7")
+    name = raw.get("name", "")
+    return Location(lat=float(lat) / 1e7 if lat is not None else 0.0,
+                    lon=float(lon) / 1e7 if lon is not None else 0.0,
+                    name=name)
+
+
+def load_place_visits(path: str, tz_name: str = "Europe/London") -> List[PlaceVisit]:
+    """Load all place visits from a Location History JSON file.
+
+    This function supports both the legacy "Semantic Location History"
+    exports (containing a top-level ``timelineObjects`` array) and
+    newer on-device Timeline exports that expose a ``semanticSegments``
+    array. In both cases the goal is to extract "place visits" –
+    periods of time spent at a single location.
+
+    For legacy files the timestamps are millisecond epoch strings and
+    coordinates are encoded as integer multiples of 1e-7 degrees. For
+    device-local exports the timestamps are ISO 8601 strings with
+    timezone offsets and coordinates are stored in a ``latLng`` string
+    on the ``visit.topCandidate.placeLocation``.
+
+    Parameters
+    ----------
+    path : str
+        Path to the JSON file produced by Google Takeout or the
+        on-device Timeline export.
+    tz_name : str, optional
+        The name of the timezone used for localisation, by default
+        ``Europe/London``. See the ``pytz`` documentation for valid
+        identifiers.
+
+    Returns
+    -------
+    list of :class:`PlaceVisit`
+        A chronologically ordered list of place visits.
+    """
+
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    tz = pytz.timezone(tz_name)
+    visits: List[PlaceVisit] = []
+
+    # Legacy Semantic Location History format
+    if "timelineObjects" in data and isinstance(data["timelineObjects"], list):
+        timeline_objects = data.get("timelineObjects", [])
+        for obj in timeline_objects:
+            if "placeVisit" in obj:
+                pv = obj["placeVisit"]
+                loc = _parse_location(pv.get("location", {}))
+                dur = pv.get("duration", {})
+                start_ms = dur.get("startTimestampMs")
+                end_ms = dur.get("endTimestampMs")
+                if start_ms is None or end_ms is None:
+                    # Skip malformed entries
+                    continue
+                visits.append(PlaceVisit(
+                    location=loc,
+                    start_time=_ms_to_dt(start_ms, tz),
+                    end_time=_ms_to_dt(end_ms, tz),
+                ))
+            elif "activitySegment" in obj:
+                # We ignore activity segments for now; they are parsed here
+                # only to support potential future features such as deriving
+                # more accurate hop start times.
+                seg = obj["activitySegment"]
+                start_loc = _parse_location(seg.get("startLocation", {}))
+                end_loc = _parse_location(seg.get("endLocation", {}))
+                dur = seg.get("duration", {})
+                start_ms = dur.get("startTimestampMs")
+                end_ms = dur.get("endTimestampMs")
+                if start_ms is None or end_ms is None:
+                    continue
+                # Create ActivitySegment instance (unused for now)
+                # The object is not appended to the visits list because
+                # itinerary detection only relies on place visits.
+                _ = ActivitySegment(
+                    start_location=start_loc,
+                    end_location=end_loc,
+                    start_time=_ms_to_dt(start_ms, tz),
+                    end_time=_ms_to_dt(end_ms, tz),
+                    activity_type=seg.get("activityType", "UNKNOWN"),
+                )
+    # New device-local Timeline export format
+    elif "semanticSegments" in data and isinstance(data["semanticSegments"], list):
+        try:
+            from dateutil import parser as dateutil_parser  # type: ignore
+        except ImportError:
+            raise ImportError(
+                "python-dateutil is required to parse device-local Timeline exports. "
+                "Install it with 'pip install python-dateutil'."
+            )
+        for segment in data["semanticSegments"]:
+            # Only interested in visit segments; skip activities and path-only entries
+            visit = segment.get("visit")
+            if not visit:
+                continue
+            # Extract start and end times (ISO 8601 with timezone offsets)
+            start_time_iso = segment.get("startTime")
+            end_time_iso = segment.get("endTime")
+            if not start_time_iso or not end_time_iso:
+                continue
+            try:
+                start_dt = dateutil_parser.isoparse(start_time_iso).astimezone(tz)
+                end_dt = dateutil_parser.isoparse(end_time_iso).astimezone(tz)
+            except (ValueError, OverflowError):
+                # Skip unparseable times
+                continue
+            # Extract coordinates; stored as "latLng": "lat°, lon°"
+            place_loc = visit.get("topCandidate", {}).get("placeLocation", {})
+            latlng_str = place_loc.get("latLng")
+            if not latlng_str:
+                continue
+            # Strip degree symbol and split into lat/lon components
+            try:
+                lat_str, lon_str = [c.strip().replace("°", "") for c in latlng_str.split(",")]
+                lat = float(lat_str)
+                lon = float(lon_str)
+            except Exception:
+                continue
+            # Use the semantic type or label as the name if available
+            candidate = visit.get("topCandidate", {})
+            name = candidate.get("label") or candidate.get("semanticType") or ""
+            visits.append(PlaceVisit(
+                location=Location(lat=lat, lon=lon, name=str(name)),
+                start_time=start_dt,
+                end_time=end_dt,
+            ))
+    # Ignore any other structures (e.g. rawSignals, userLocationProfile)
+    else:
+        # If the file doesn't contain known keys, return empty list
+        return []
+
+    # Sort visits chronologically by start time
+    visits.sort(key=lambda v: v.start_time)
+    return visits
--- a/mileage_logger/logic/init.py
+++ b/mileage_logger/logic/init.py
@@ -0,0 +1,13 @@
+"""Business logic for detecting work itineraries.
+
+This package exposes functions used to interpret a chronologically
+ordered list of :class:`PlaceVisit` objects and reduce them into a
+sequence of 'hops' between recognised work locations. Recognition is
+driven by a site configuration file (YAML) that defines canonical
+names, friendly labels, optional aliases and geofences for each
+location.
+"""
+
+from .detect_itinerary import SiteConfig, SiteEntry, Hop, detect_itinerary
+
+__all__ = ["SiteConfig", "SiteEntry", "Hop", "detect_itinerary"]
--- a/mileage_logger/logic/detect_itinerary.py
+++ b/mileage_logger/logic/detect_itinerary.py
@@ -0,0 +1,176 @@
+"""Detect ordered hops between whitelisted sites in a day's timeline.
+
+We process visits per calendar day (Europe/London), resetting state each
+day. We also support injecting a synthetic Home→FirstSite hop when the
+first recognised site of the day isn't Home (assume_home_start).
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date
+from typing import Dict, Iterable, List, Optional, Tuple
+from collections import defaultdict
+import math
+import yaml
+
+from ..ingest.semantic_reader import Location, PlaceVisit
+
+
+def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
+    """Compute the great-circle distance between two points in miles."""
+    R = 3958.8  # Earth radius in miles
+    phi1 = math.radians(lat1)
+    phi2 = math.radians(lat2)
+    dphi = math.radians(lat2 - lat1)
+    dlambda = math.radians(lon2 - lon1)
+    a = math.sin(dphi / 2.0) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2.0) ** 2
+    c = 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a)))
+    return R * c
+
+
+@dataclass
+class SiteEntry:
+    """Represents a single recognised site from the configuration."""
+    canonical: str
+    label: str
+    lat: float
+    lon: float
+    radius_m: float
+    aliases: List[str]
+
+
+class SiteConfig:
+    """Holds all recognised site definitions keyed by canonical name."""
+
+    def __init__(self, sites: Iterable[SiteEntry]):
+        self.by_canonical: Dict[str, SiteEntry] = {s.canonical: s for s in sites}
+        self.alias_map: Dict[str, str] = {}
+        for site in sites:
+            for alias in [site.canonical] + site.aliases:
+                self.alias_map[alias.lower()] = site.canonical
+
+    @classmethod
+    def from_yaml(cls, path: str) -> "SiteConfig":
+        """Load a site configuration from a YAML file."""
+        with open(path, "r", encoding="utf-8") as f:
+            raw = yaml.safe_load(f)
+        sites_data: List[Dict[str, object]] = []
+        if isinstance(raw, list):
+            sites_data = raw
+        elif isinstance(raw, dict):
+            if "sites" in raw and isinstance(raw["sites"], list):
+                sites_data = raw["sites"]
+            else:
+                for canon, entry in raw.items():
+                    entry = entry or {}
+                    if not isinstance(entry, dict):
+                        raise ValueError("Site entry for %s must be a mapping" % canon)
+                    entry = dict(entry)
+                    entry.setdefault("canonical", canon)
+                    sites_data.append(entry)
+        else:
+            raise ValueError("Invalid site configuration format")
+        sites: List[SiteEntry] = []
+        for entry in sites_data:
+            canonical = entry.get("canonical") or entry.get("name")
+            if not canonical:
+                raise ValueError("Site entry missing canonical name")
+            label = entry.get("label", canonical)
+            lat = float(entry.get("lat", 0.0))
+            lon = float(entry.get("lon", 0.0))
+            radius_m = float(entry.get("radius_m", 0.0))
+            aliases = entry.get("aliases") or []
+            sites.append(SiteEntry(
+                canonical=canonical,
+                label=label,
+                lat=lat,
+                lon=lon,
+                radius_m=radius_m,
+                aliases=list(aliases),
+            ))
+        return cls(sites)
+
+    def recognise(self, location: Location) -> Optional[str]:
+        """Return canonical site name if this location matches by name/alias or geofence."""
+        name_lower = (location.name or "").lower()
+        # Pass 1: name/alias substring match
+        for alias, canonical in self.alias_map.items():
+            if alias in name_lower:
+                return canonical
+        # Pass 2: geofence match
+        for canonical, site in self.by_canonical.items():
+            if site.radius_m > 0:
+                max_dist_miles = site.radius_m / 1609.34
+                d = haversine_distance(location.lat, location.lon, site.lat, site.lon)
+                if d <= max_dist_miles:
+                    return canonical
+        return None
+
+
+@dataclass
+class Hop:
+    """A hop from one recognised site to another, dated by the origin's start date."""
+    date: date
+    origin: str
+    destination: str
+
+
+def _build_day_hops(day_visits: List[PlaceVisit], site_config: SiteConfig, assume_home_start: bool) -> List[Hop]:
+    """Build ordered hops for a single day of visits."""
+    # Ensure chronological order by *start* time
+    day_visits = sorted(day_visits, key=lambda v: v.start_time)
+
+    recognised: List[Tuple[str, PlaceVisit]] = []
+    last_site: Optional[str] = None
+    for v in day_visits:
+        s = site_config.recognise(v.location)
+        if not s:
+            continue
+        if s == last_site:
+            continue  # ignore duplicates back-to-back
+        recognised.append((s, v))
+        last_site = s
+
+    if not recognised:
+        return []
+
+    # Inject Home at start if enabled and first site isn't Home
+    if assume_home_start and recognised[0][0] != "Home":
+        first_time = recognised[0][1].start_time
+        synthetic_home = PlaceVisit(location=Location(lat=0.0, lon=0.0, name="Home"),
+                                    start_time=first_time, end_time=first_time)
+        recognised.insert(0, ("Home", synthetic_home))
+
+    # Walk forward, stop at second Home
+    hops: List[Hop] = []
+    home_hits = 1 if recognised and recognised[0][0] == "Home" else 0
+    for i in range(1, len(recognised)):
+        origin_site, origin_visit = recognised[i - 1]
+        dest_site, _dest_visit = recognised[i]
+        hop_date = origin_visit.start_time.date()
+        if origin_site != dest_site:
+            hops.append(Hop(date=hop_date, origin=origin_site, destination=dest_site))
+        if dest_site == "Home":
+            home_hits += 1
+            if home_hits >= 2:
+                break
+    return hops
+
+
+def detect_itinerary(visits: List[PlaceVisit], site_config: SiteConfig, *, assume_home_start: bool = True) -> List[Hop]:
+    """Reduce all visits into ordered hops per day, concatenated across the file."""
+    if not visits:
+        return []
+
+    # Group by the local date from each visit's start_time
+    by_day: Dict[date, List[PlaceVisit]] = defaultdict(list)
+    for v in visits:
+        by_day[v.start_time.date()].append(v)
+
+    hops_all: List[Hop] = []
+    for day in sorted(by_day.keys()):
+        day_hops = _build_day_hops(by_day[day], site_config, assume_home_start=assume_home_start)
+        hops_all.extend(day_hops)
+    return hops_all
+