Initial commit (clean, ignores in place)
This commit is contained in:
38
mileage_logger/__init__.py
Normal file
38
mileage_logger/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Top level package for the mileage logging tool.
|
||||
|
||||
This package provides a set of modules used to ingest Google Semantic
|
||||
Location History data, detect work related travel itineraries based on a
|
||||
whitelisted set of sites, resolve driving distances between those sites
|
||||
and export the resulting mileage claims into an Excel workbook ready
|
||||
for submission to a HR system.
|
||||
|
||||
The project is organised into subpackages:
|
||||
|
||||
* :mod:`mileage_logger.ingest` – parse Google Takeout JSON exports
|
||||
into structured Python objects.
|
||||
* :mod:`mileage_logger.logic` – implement the state machine that
|
||||
identifies ordered hops between recognised locations in a day’s
|
||||
timeline.
|
||||
* :mod:`mileage_logger.distance` – resolve distances via a route
|
||||
catalogue or, optionally, an external API with caching.
|
||||
* :mod:`mileage_logger.export` – write Excel workbooks or CSV files
|
||||
containing the final mileage log.
|
||||
* :mod:`mileage_logger.cli` – command line interface for invoking
|
||||
common workflows such as importing a new export or rebuilding a
|
||||
monthly workbook.
|
||||
|
||||
This package requires Python 3.11 or newer. See the README for
|
||||
installation and usage instructions.
|
||||
"""
|
||||
|
||||
from .ingest import semantic_reader # noqa: F401
|
||||
from .logic import detect_itinerary # noqa: F401
|
||||
from .distance import resolve # noqa: F401
|
||||
from .export import excel_writer # noqa: F401
|
||||
|
||||
__all__ = [
|
||||
"semantic_reader",
|
||||
"detect_itinerary",
|
||||
"resolve",
|
||||
"excel_writer",
|
||||
]
|
189
mileage_logger/cli.py
Normal file
189
mileage_logger/cli.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""Command line interface for the mileage logging tool."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import pytz
|
||||
|
||||
from .ingest.semantic_reader import load_place_visits
|
||||
from .logic.detect_itinerary import SiteConfig, detect_itinerary
|
||||
from .distance.resolve import DistanceResolver
|
||||
from .export.excel_writer import build_monthly_rows, write_monthly_workbook
|
||||
|
||||
|
||||
TZ = pytz.timezone("Europe/London")
|
||||
|
||||
|
||||
def _today_local() -> date:
|
||||
return datetime.now(TZ).date()
|
||||
|
||||
|
||||
def _prev_month_bounds(today: Optional[date] = None) -> Tuple[date, date]:
|
||||
"""Return (start_date, end_date) for the previous calendar month in Europe/London."""
|
||||
if today is None:
|
||||
today = _today_local()
|
||||
first_this_month = today.replace(day=1)
|
||||
last_prev_month = first_this_month - timedelta(days=1)
|
||||
start_prev_month = last_prev_month.replace(day=1)
|
||||
return start_prev_month, last_prev_month
|
||||
|
||||
|
||||
def _month_bounds(ym: str) -> Tuple[date, date]:
|
||||
"""Return (start_date, end_date) for the given YYYY-MM."""
|
||||
year, month = map(int, ym.split("-"))
|
||||
start = date(year, month, 1)
|
||||
if month == 12:
|
||||
end = date(year + 1, 1, 1) - timedelta(days=1)
|
||||
else:
|
||||
end = date(year, month + 1, 1) - timedelta(days=1)
|
||||
return start, end
|
||||
|
||||
|
||||
def _parse_date(s: str) -> date:
|
||||
y, m, d = map(int, s.split("-"))
|
||||
return date(y, m, d)
|
||||
|
||||
|
||||
def import_file(
|
||||
json_path: str,
|
||||
site_config_path: str,
|
||||
route_csv_path: str,
|
||||
output_dir: str,
|
||||
assume_home_start: bool,
|
||||
weekdays_only: bool,
|
||||
month: Optional[str],
|
||||
last_month: bool,
|
||||
since: Optional[str],
|
||||
until: Optional[str],
|
||||
days: Optional[int],
|
||||
) -> None:
|
||||
"""Import a single JSON file and write Excel workbooks (one per month)."""
|
||||
visits = load_place_visits(json_path)
|
||||
if not visits:
|
||||
print(f"No place visits found in {json_path}")
|
||||
return
|
||||
|
||||
# 1) Determine date range filter
|
||||
start_date: Optional[date] = None
|
||||
end_date: Optional[date] = None
|
||||
|
||||
if month:
|
||||
start_date, end_date = _month_bounds(month)
|
||||
elif last_month:
|
||||
start_date, end_date = _prev_month_bounds()
|
||||
elif since or until:
|
||||
if since:
|
||||
start_date = _parse_date(since)
|
||||
if until:
|
||||
end_date = _parse_date(until)
|
||||
elif days:
|
||||
end_date = _today_local()
|
||||
start_date = end_date - timedelta(days=days - 1)
|
||||
|
||||
# 2) Apply date filtering to visits (by visit.start_time local date)
|
||||
if start_date or end_date:
|
||||
def in_range(v):
|
||||
d = v.start_time.date()
|
||||
if start_date and d < start_date:
|
||||
return False
|
||||
if end_date and d > end_date:
|
||||
return False
|
||||
return True
|
||||
visits = [v for v in visits if in_range(v)]
|
||||
if not visits:
|
||||
label = f"{start_date or ''}..{end_date or ''}"
|
||||
print(f"No place visits in requested range {label}")
|
||||
return
|
||||
|
||||
site_config = SiteConfig.from_yaml(site_config_path)
|
||||
hops = detect_itinerary(visits, site_config, assume_home_start=assume_home_start)
|
||||
if not hops:
|
||||
print("No recognised hops detected after filtering.")
|
||||
return
|
||||
|
||||
# 3) Weekday filter (Sat=5, Sun=6)
|
||||
if weekdays_only:
|
||||
hops = [h for h in hops if h.date.weekday() < 5]
|
||||
if not hops:
|
||||
print("All hops fell on weekends; nothing to write.")
|
||||
return
|
||||
|
||||
resolver = DistanceResolver(route_csv_path)
|
||||
rows_by_month = build_monthly_rows(hops, site_config, resolver)
|
||||
|
||||
# 4) Write one workbook per month present
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
for month_key, rows in sorted(rows_by_month.items()):
|
||||
# If a specific month/range was requested, rows_by_month will already reflect it.
|
||||
output_path = os.path.join(output_dir, f"mileage_{month_key}.xlsx")
|
||||
write_monthly_workbook({month_key: rows}, output_path)
|
||||
print(f"Wrote {output_path} ({len(rows)} rows)")
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
parser = argparse.ArgumentParser(description="Mileage logging tool")
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
import_parser = subparsers.add_parser("import", help="Import a single JSON export")
|
||||
import_parser.add_argument("json_path", help="Path to the JSON file to import")
|
||||
import_parser.add_argument(
|
||||
"--sites", dest="site_config_path",
|
||||
default=os.path.join(os.path.dirname(__file__), "../config/sites.yml"),
|
||||
help="Path to the sites.yml configuration",
|
||||
)
|
||||
import_parser.add_argument(
|
||||
"--routes", dest="route_csv_path",
|
||||
default=os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv"),
|
||||
help="Path to the routes CSV catalogue",
|
||||
)
|
||||
import_parser.add_argument(
|
||||
"--output", dest="output_dir", default=os.getcwd(),
|
||||
help="Directory to write the Excel workbook(s)",
|
||||
)
|
||||
|
||||
# Behavior toggles
|
||||
import_parser.add_argument(
|
||||
"--no-assume-home-start", action="store_true",
|
||||
help="Do not inject a Home→first-site hop when a day doesn't start at Home.",
|
||||
)
|
||||
import_parser.add_argument(
|
||||
"--weekdays-only", action="store_true",
|
||||
help="Exclude Saturday/Sunday hops.",
|
||||
)
|
||||
|
||||
# Date filters (choose one style)
|
||||
import_parser.add_argument("--last-month", action="store_true",
|
||||
help="Process the previous calendar month.")
|
||||
import_parser.add_argument("--month", metavar="YYYY-MM",
|
||||
help="Process a specific calendar month, e.g. 2025-08.")
|
||||
import_parser.add_argument("--since", metavar="YYYY-MM-DD",
|
||||
help="Lower bound (inclusive) for visits to process.")
|
||||
import_parser.add_argument("--until", metavar="YYYY-MM-DD",
|
||||
help="Upper bound (inclusive) for visits to process.")
|
||||
import_parser.add_argument("--days", type=int,
|
||||
help="Process the last N days (relative to today).")
|
||||
|
||||
args = parser.parse_args(argv)
|
||||
if args.command == "import":
|
||||
import_file(
|
||||
args.json_path,
|
||||
args.site_config_path,
|
||||
args.route_csv_path,
|
||||
args.output_dir,
|
||||
assume_home_start=(not args.no_assume_home_start),
|
||||
weekdays_only=args.weekdays_only,
|
||||
month=args.month,
|
||||
last_month=args.last_month,
|
||||
since=args.since,
|
||||
until=args.until,
|
||||
days=args.days,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
12
mileage_logger/distance/__init__.py
Normal file
12
mileage_logger/distance/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Distance resolution utilities.
|
||||
|
||||
This subpackage exposes classes and functions that resolve driving
|
||||
distances between pairs of recognised sites. The primary
|
||||
implementation is :class:`DistanceResolver`, which first consults a
|
||||
pre-defined route catalogue before optionally consulting an external
|
||||
API and finally falling back to a simple geodesic calculation.
|
||||
"""
|
||||
|
||||
from .resolve import DistanceResolver, haversine_distance
|
||||
|
||||
__all__ = ["DistanceResolver", "haversine_distance"]
|
210
mileage_logger/distance/resolve.py
Normal file
210
mileage_logger/distance/resolve.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Resolve driving distances between sites.
|
||||
|
||||
The :class:`DistanceResolver` class provides a simple mechanism to
|
||||
determine the distance in miles between two points. It is designed to
|
||||
prefer a local route catalogue (CSV) if available, fall back to
|
||||
external API calls when API keys are configured and, as a last
|
||||
resort, compute a straight-line distance using the haversine
|
||||
formula.
|
||||
|
||||
Caching is performed to avoid repeated API calls or calculations. A
|
||||
time-to-live (TTL) can be specified when constructing the resolver
|
||||
although it is currently not enforced in the simple in-memory
|
||||
implementation. Distances are rounded to one decimal place as
|
||||
required by HR mileage claim forms.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
try:
|
||||
import httpx # type: ignore
|
||||
except ImportError: # Optional dependency. If unavailable, API calls will be skipped.
|
||||
httpx = None # type: ignore
|
||||
|
||||
from ..logic.detect_itinerary import haversine_distance
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CacheEntry:
|
||||
distance: float
|
||||
timestamp: float
|
||||
|
||||
|
||||
class DistanceResolver:
|
||||
"""Resolve driving distances between two locations.
|
||||
|
||||
The resolver consults an in-memory cache, a local route catalogue,
|
||||
an optional external API and finally falls back to a straight-line
|
||||
calculation using the haversine formula. Distances are cached for
|
||||
the lifetime of the object. Rounding to one decimal mile is
|
||||
applied uniformly.
|
||||
"""
|
||||
|
||||
def __init__(self, route_csv_path: Optional[str] = None, api_key: Optional[str] = None,
|
||||
http_client: Optional[object] = None, ttl_seconds: float = 365 * 24 * 3600,
|
||||
vehicle_label: str = "SH11 DRV (Own 1.6CC Diesel Car/Van)", job_role: str = "ICT Technician"):
|
||||
"""Initialise the distance resolver.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
route_csv_path : str, optional
|
||||
Path to a CSV file containing pre-approved route distances.
|
||||
The file should have at least three columns: origin,
|
||||
destination and miles. The entries are assumed to be
|
||||
directional; if symmetric distances are desired both
|
||||
directions must be provided.
|
||||
api_key : str, optional
|
||||
API key for the Google Routes API. If omitted, API calls
|
||||
will be skipped.
|
||||
http_client : :class:`httpx.Client`, optional
|
||||
HTTP client instance to use for API requests. A new client
|
||||
will be created if not provided.
|
||||
ttl_seconds : float, optional
|
||||
Time-to-live for cache entries in seconds. Expired
|
||||
entries are recomputed on demand. The default is one year.
|
||||
"""
|
||||
|
||||
self.api_key = api_key
|
||||
# Only store an HTTP client if provided and httpx is available.
|
||||
# When httpx is unavailable the client will be ignored and API
|
||||
# calls will be skipped.
|
||||
self.http_client = http_client if httpx is not None else None
|
||||
self.ttl_seconds = ttl_seconds
|
||||
self.vehicle_label = vehicle_label
|
||||
self.job_role = job_role
|
||||
self.cache: Dict[Tuple[str, str], _CacheEntry] = {}
|
||||
# Load route catalogue
|
||||
self.route_catalog: Dict[Tuple[str, str], float] = {}
|
||||
if route_csv_path and os.path.exists(route_csv_path):
|
||||
with open(route_csv_path, "r", encoding="utf-8") as f:
|
||||
reader = csv.reader(f)
|
||||
for row in reader:
|
||||
if not row or row[0].startswith("#"):
|
||||
continue
|
||||
try:
|
||||
origin, destination, miles_str = row[:3]
|
||||
miles = float(miles_str)
|
||||
self.route_catalog[(origin.strip(), destination.strip())] = miles
|
||||
except Exception:
|
||||
# Skip malformed entries silently
|
||||
continue
|
||||
|
||||
def _get_from_cache(self, origin: str, dest: str) -> Optional[float]:
|
||||
"""Retrieve a cached distance if present and unexpired."""
|
||||
entry = self.cache.get((origin, dest))
|
||||
if entry is None:
|
||||
return None
|
||||
if (time.time() - entry.timestamp) > self.ttl_seconds:
|
||||
# Expired
|
||||
return None
|
||||
return entry.distance
|
||||
|
||||
def _set_cache(self, origin: str, dest: str, distance: float) -> None:
|
||||
"""Cache the given distance for the origin/destination pair."""
|
||||
self.cache[(origin, dest)] = _CacheEntry(distance=distance, timestamp=time.time())
|
||||
|
||||
def resolve(self, origin_name: str, dest_name: str, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> float:
|
||||
"""Resolve the distance between two sites in miles.
|
||||
|
||||
This method will consult the cache, route catalogue, external API
|
||||
and finally compute a haversine distance. Once resolved, the
|
||||
distance is cached and rounded to one decimal place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
origin_name : str
|
||||
Canonical name of the origin site. Used for cache and
|
||||
catalogue lookups.
|
||||
dest_name : str
|
||||
Canonical name of the destination site.
|
||||
origin_coords : tuple(float, float)
|
||||
Latitude and longitude of the origin in decimal degrees.
|
||||
dest_coords : tuple(float, float)
|
||||
Latitude and longitude of the destination in decimal degrees.
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The resolved driving distance in miles, rounded to one
|
||||
decimal place.
|
||||
"""
|
||||
|
||||
# First check the cache
|
||||
cached = self._get_from_cache(origin_name, dest_name)
|
||||
if cached is not None:
|
||||
return cached
|
||||
# Second consult the route catalogue
|
||||
catalogue_key = (origin_name, dest_name)
|
||||
if catalogue_key in self.route_catalog:
|
||||
dist = self.route_catalog[catalogue_key]
|
||||
rounded = round(dist, 1)
|
||||
self._set_cache(origin_name, dest_name, rounded)
|
||||
return rounded
|
||||
# Attempt to call external API if configured
|
||||
if self.api_key:
|
||||
try:
|
||||
dist = self._call_google_routes_api(origin_coords, dest_coords)
|
||||
if dist is not None:
|
||||
rounded = round(dist, 1)
|
||||
self._set_cache(origin_name, dest_name, rounded)
|
||||
return rounded
|
||||
except Exception:
|
||||
# Swallow API errors and fall back
|
||||
pass
|
||||
# Fall back to haversine distance
|
||||
dist = haversine_distance(origin_coords[0], origin_coords[1], dest_coords[0], dest_coords[1])
|
||||
rounded = round(dist, 1)
|
||||
self._set_cache(origin_name, dest_name, rounded)
|
||||
return rounded
|
||||
|
||||
def _call_google_routes_api(self, origin_coords: Tuple[float, float], dest_coords: Tuple[float, float]) -> Optional[float]:
|
||||
"""Call the Google Maps Routes API to compute driving distance.
|
||||
|
||||
Note that this is a blocking call. The caller should ensure that
|
||||
network access is permitted and that a valid API key has been
|
||||
configured. If the request fails or the response cannot be
|
||||
parsed, ``None`` is returned.
|
||||
"""
|
||||
|
||||
# Construct the API request
|
||||
# See https://developers.google.com/maps/documentation/routes for details
|
||||
base_url = "https://routes.googleapis.com/directions/v2:computeRoutes"
|
||||
# Compose JSON payload
|
||||
payload = {
|
||||
"origin": {"location": {"latLng": {"latitude": origin_coords[0], "longitude": origin_coords[1]}}},
|
||||
"destination": {"location": {"latLng": {"latitude": dest_coords[0], "longitude": dest_coords[1]}}},
|
||||
"travelMode": "DRIVE",
|
||||
"routingPreference": "TRAFFIC_AWARE",
|
||||
"computeAlternativeRoutes": False,
|
||||
"units": "IMPERIAL",
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Goog-Api-Key": self.api_key,
|
||||
"X-Goog-FieldMask": "routes.duration,routes.distanceMeters",
|
||||
}
|
||||
# If httpx is unavailable, or no API key is configured, skip API call
|
||||
if httpx is None or self.http_client is None:
|
||||
return None
|
||||
resp = self.http_client.post(base_url, json=payload, headers=headers)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
try:
|
||||
data = resp.json()
|
||||
routes = data.get("routes") or []
|
||||
if not routes:
|
||||
return None
|
||||
# Distance is returned in meters; convert to miles
|
||||
meters = routes[0]["distanceMeters"]
|
||||
miles = meters / 1609.34
|
||||
return float(miles)
|
||||
except Exception:
|
||||
return None
|
5
mileage_logger/export/__init__.py
Normal file
5
mileage_logger/export/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Export utilities for writing mileage logs to Excel and CSV."""
|
||||
|
||||
from .excel_writer import write_monthly_workbook, build_monthly_rows
|
||||
|
||||
__all__ = ["write_monthly_workbook", "build_monthly_rows"]
|
122
mileage_logger/export/excel_writer.py
Normal file
122
mileage_logger/export/excel_writer.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Write mileage itineraries to Excel workbooks.
|
||||
|
||||
This module uses :mod:`openpyxl` to construct a workbook with one sheet
|
||||
per month. Each row corresponds to a single hop between recognised
|
||||
sites. Columns follow the specification used by the EveryHR system:
|
||||
|
||||
* ``Date`` – calendar date in ISO format (YYYY-MM-DD).
|
||||
* ``Purpose`` – free text summarising the journey, e.g. ``"Travel from
|
||||
Home to Lingwood Primary Academy 13.2mi"``.
|
||||
* ``Miles`` – numeric value rounded to one decimal place.
|
||||
* ``Vehicle`` – the vehicle descriptor configured for the user.
|
||||
* ``Job Role`` – the job role of the user.
|
||||
* ``From`` – friendly label of the origin site.
|
||||
* ``To`` – friendly label of the destination site.
|
||||
* ``Notes`` – blank for manual additions.
|
||||
|
||||
Rows are grouped by month (YYYY-MM). Each sheet is named after the
|
||||
month and contains a header row followed by one row per hop in
|
||||
chronological order.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from datetime import date
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
from ..logic.detect_itinerary import Hop, SiteConfig
|
||||
|
||||
|
||||
def build_monthly_rows(hops: Iterable[Hop], site_config: SiteConfig, distance_resolver) -> Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]]:
|
||||
"""Prepare rows grouped by month for Excel output.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hops : iterable of :class:`Hop`
|
||||
The hops produced by itinerary detection.
|
||||
site_config : :class:`SiteConfig`
|
||||
Used to look up friendly labels for canonical site names.
|
||||
distance_resolver : object
|
||||
An object with a ``resolve(origin_name, dest_name, origin_coords, dest_coords)``
|
||||
method that returns a distance in miles. See
|
||||
:class:`~mileage_logger.distance.resolve.DistanceResolver`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict mapping str -> list of tuples
|
||||
Keys are month strings in the form ``YYYY-MM``. Values are
|
||||
lists of tuples containing the data for each row: (date_str,
|
||||
purpose, miles, vehicle, job_role, from_label, to_label, notes).
|
||||
"""
|
||||
|
||||
rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]] = defaultdict(list)
|
||||
for hop in hops:
|
||||
month_key = hop.date.strftime("%Y-%m")
|
||||
origin_site = site_config.by_canonical.get(hop.origin)
|
||||
dest_site = site_config.by_canonical.get(hop.destination)
|
||||
if origin_site is None or dest_site is None:
|
||||
continue
|
||||
# Resolve distance
|
||||
dist = distance_resolver.resolve(
|
||||
hop.origin,
|
||||
hop.destination,
|
||||
(origin_site.lat, origin_site.lon),
|
||||
(dest_site.lat, dest_site.lon),
|
||||
)
|
||||
# Build purpose string
|
||||
purpose = f"Travel from {origin_site.label} to {dest_site.label} {dist:.1f}mi"
|
||||
rows_by_month[month_key].append(
|
||||
(
|
||||
hop.date.isoformat(),
|
||||
purpose,
|
||||
dist,
|
||||
distance_resolver.vehicle_label if hasattr(distance_resolver, "vehicle_label") else "SH11 DRV (Own 1.6CC Diesel Car/Van)",
|
||||
distance_resolver.job_role if hasattr(distance_resolver, "job_role") else "ICT Technician",
|
||||
origin_site.label,
|
||||
dest_site.label,
|
||||
"",
|
||||
)
|
||||
)
|
||||
return rows_by_month
|
||||
|
||||
|
||||
def write_monthly_workbook(rows_by_month: Dict[str, List[Tuple[str, str, float, str, str, str, str, str]]], output_path: str) -> None:
|
||||
"""Write the grouped rows into an Excel workbook.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rows_by_month : dict
|
||||
Mapping from month strings to lists of row tuples as returned
|
||||
by :func:`build_monthly_rows`.
|
||||
output_path : str
|
||||
Path of the Excel workbook to write. Any existing file will be
|
||||
overwritten.
|
||||
"""
|
||||
|
||||
wb = Workbook()
|
||||
# Remove the default sheet created by openpyxl
|
||||
default_sheet = wb.active
|
||||
wb.remove(default_sheet)
|
||||
for month, rows in sorted(rows_by_month.items()):
|
||||
ws = wb.create_sheet(title=month)
|
||||
# Write header
|
||||
header = ["Date", "Purpose", "Miles", "Vehicle", "Job Role", "From", "To", "Notes"]
|
||||
ws.append(header)
|
||||
for row in rows:
|
||||
ws.append(list(row))
|
||||
# Autosize columns (approximate)
|
||||
for col_idx in range(1, len(header) + 1):
|
||||
column_letter = get_column_letter(col_idx)
|
||||
max_length = max(
|
||||
len(str(ws.cell(row=r + 1, column=col_idx).value)) for r in range(len(rows) + 1)
|
||||
)
|
||||
# Add a little extra padding
|
||||
ws.column_dimensions[column_letter].width = max_length + 2
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
wb.save(output_path)
|
131
mileage_logger/gui.py
Normal file
131
mileage_logger/gui.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Simple web GUI for the mileage logger.
|
||||
|
||||
This module exposes a FastAPI application that wraps the core
|
||||
functionality of the mileage logger with a minimal HTML front end. It
|
||||
allows a user to upload a Google Semantic Location History JSON file
|
||||
and returns an Excel workbook containing their mileage claims. The
|
||||
application also renders a basic status page showing the detected
|
||||
itinerary.
|
||||
|
||||
Usage
|
||||
-----
|
||||
Run the server using uvicorn:
|
||||
|
||||
```
|
||||
uvicorn mileage_logger.gui:app --reload --port 8000
|
||||
```
|
||||
|
||||
Then navigate to ``http://localhost:8000`` in your web browser. Use
|
||||
the form to upload a JSON export. After processing, the server will
|
||||
return an Excel file for download.
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
This GUI is intentionally lightweight and is not designed for
|
||||
concurrent multi-user access. It does not persist files on disk and
|
||||
does not perform any authentication or authorisation. For production
|
||||
use consider extending it with proper user management and storage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from typing import Dict, List
|
||||
|
||||
from fastapi import FastAPI, File, Form, UploadFile
|
||||
from fastapi.responses import HTMLResponse, FileResponse, StreamingResponse
|
||||
|
||||
from .ingest.semantic_reader import load_place_visits
|
||||
from .logic.detect_itinerary import SiteConfig, detect_itinerary
|
||||
from .distance.resolve import DistanceResolver
|
||||
from .export.excel_writer import build_monthly_rows, write_monthly_workbook
|
||||
|
||||
|
||||
# Load configuration once at startup. You can change the path to
|
||||
# config/sites.yml if you have customised it. The route catalogue is
|
||||
# loaded on-demand when handling uploads.
|
||||
DEFAULT_SITE_CONFIG_PATH = os.path.join(os.path.dirname(__file__), "../config/sites.yml")
|
||||
DEFAULT_ROUTE_CSV_PATH = os.path.join(os.path.dirname(__file__), "../tests/data/routes_golden.csv")
|
||||
|
||||
site_config: SiteConfig = SiteConfig.from_yaml(DEFAULT_SITE_CONFIG_PATH)
|
||||
|
||||
app = FastAPI(title="Mileage Logger GUI")
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def index() -> str:
|
||||
"""Render a simple upload form."""
|
||||
return """
|
||||
<html>
|
||||
<head>
|
||||
<title>Mileage Logger</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Mileage Logger</h1>
|
||||
<p>Select a Google Takeout JSON file to process. The file
|
||||
should contain the "timelineObjects" array from your Semantic
|
||||
Location History export.</p>
|
||||
<form action="/process" method="post" enctype="multipart/form-data">
|
||||
<input type="file" name="file" accept="application/json" required />
|
||||
<br/><br/>
|
||||
<label for="vehicle">Vehicle description:</label>
|
||||
<input type="text" id="vehicle" name="vehicle" value="SH11 DRV (Own 1.6CC Diesel Car/Van)" />
|
||||
<br/><br/>
|
||||
<label for="job_role">Job role:</label>
|
||||
<input type="text" id="job_role" name="job_role" value="ICT Technician" />
|
||||
<br/><br/>
|
||||
<input type="submit" value="Process" />
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
@app.post("/process")
|
||||
async def process_file(
|
||||
file: UploadFile = File(...),
|
||||
vehicle: str = Form("SH11 DRV (Own 1.6CC Diesel Car/Van)"),
|
||||
job_role: str = Form("ICT Technician"),
|
||||
) -> StreamingResponse:
|
||||
"""Handle upload and return an Excel workbook.
|
||||
|
||||
The uploaded file is saved to a temporary file on disk and then
|
||||
passed through the existing CLI pipeline. The resulting workbook
|
||||
contains one sheet per month and is returned as a streaming
|
||||
response.
|
||||
"""
|
||||
# Persist upload to a temporary file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as tmp_in:
|
||||
contents = await file.read()
|
||||
tmp_in.write(contents)
|
||||
tmp_in.flush()
|
||||
input_path = tmp_in.name
|
||||
# Parse visits and detect itinerary
|
||||
visits = load_place_visits(input_path)
|
||||
hops = detect_itinerary(visits, site_config)
|
||||
resolver = DistanceResolver(route_csv_path=DEFAULT_ROUTE_CSV_PATH, vehicle_label=vehicle, job_role=job_role)
|
||||
rows_by_month = build_monthly_rows(hops, site_config, resolver)
|
||||
# Write workbook to in-memory buffer
|
||||
output_stream = BytesIO()
|
||||
# Use openpyxl to write into BytesIO via our helper
|
||||
# Since write_monthly_workbook writes to a file, create another temp file
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx") as tmp_out:
|
||||
write_monthly_workbook(rows_by_month, tmp_out.name)
|
||||
tmp_out.flush()
|
||||
# Read the file back into memory
|
||||
tmp_out.seek(0)
|
||||
data = tmp_out.read()
|
||||
output_stream.write(data)
|
||||
# Cleanup temporary files
|
||||
try:
|
||||
os.remove(input_path)
|
||||
except Exception:
|
||||
pass
|
||||
# Prepare response
|
||||
output_stream.seek(0)
|
||||
filename = "mileage.xlsx"
|
||||
headers = {"Content-Disposition": f"attachment; filename={filename}"}
|
||||
return StreamingResponse(output_stream, media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", headers=headers)
|
18
mileage_logger/ingest/__init__.py
Normal file
18
mileage_logger/ingest/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Subpackage for data ingestion.
|
||||
|
||||
The :mod:`mileage_logger.ingest` package contains utilities for reading
|
||||
Google Semantic Location History JSON exports. The core entry point is
|
||||
the :func:`load_place_visits` function which converts raw JSON into
|
||||
structured :class:`PlaceVisit` objects. These objects expose
|
||||
timezone-aware start and end timestamps as well as geographic
|
||||
coordinates and the human readable name of the location.
|
||||
"""
|
||||
|
||||
from .semantic_reader import Location, PlaceVisit, ActivitySegment, load_place_visits
|
||||
|
||||
__all__ = [
|
||||
"Location",
|
||||
"PlaceVisit",
|
||||
"ActivitySegment",
|
||||
"load_place_visits",
|
||||
]
|
258
mileage_logger/ingest/semantic_reader.py
Normal file
258
mileage_logger/ingest/semantic_reader.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""Parser for Google Semantic Location History exports.
|
||||
|
||||
Google Takeout and on-device exports of the Timeline API are provided
|
||||
as JSON files under a ``timelineObjects`` key. Each entry in
|
||||
``timelineObjects`` is either a ``placeVisit`` or an ``activitySegment``.
|
||||
This module exposes data classes representing those events and a
|
||||
convenient loader that normalises timestamps and coordinate formats.
|
||||
|
||||
Timestamps in the source JSON are encoded as millisecond epoch
|
||||
strings. When loaded these are converted into timezone-aware
|
||||
:class:`datetime.datetime` objects. Coordinates in the JSON are stored
|
||||
as integer multiples of 1e-7 degrees; we scale them to floats.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Iterable, List, Optional
|
||||
|
||||
import pytz
|
||||
|
||||
|
||||
@dataclass
|
||||
class Location:
|
||||
"""A simple geographic location.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
lat : float
|
||||
Latitude in decimal degrees.
|
||||
lon : float
|
||||
Longitude in decimal degrees.
|
||||
name : str
|
||||
Human readable name of the location as provided by Google.
|
||||
"""
|
||||
|
||||
lat: float
|
||||
lon: float
|
||||
name: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlaceVisit:
|
||||
"""A visit to a single location for a period of time.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
location : :class:`Location`
|
||||
The geographic coordinates and name of the place.
|
||||
start_time : :class:`datetime`
|
||||
The timezone-aware start timestamp of the visit.
|
||||
end_time : :class:`datetime`
|
||||
The timezone-aware end timestamp of the visit.
|
||||
"""
|
||||
|
||||
location: Location
|
||||
start_time: datetime
|
||||
end_time: datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActivitySegment:
|
||||
"""A movement between two locations.
|
||||
|
||||
While not used directly in itinerary detection, activity segments
|
||||
contain useful timing information that could be used to derive the
|
||||
start date for a hop between recognised sites. This class is
|
||||
provided for completeness and potential future use.
|
||||
"""
|
||||
|
||||
start_location: Location
|
||||
end_location: Location
|
||||
start_time: datetime
|
||||
end_time: datetime
|
||||
activity_type: str
|
||||
|
||||
|
||||
def _ms_to_dt(ms: str, tz: pytz.BaseTzInfo) -> datetime:
|
||||
"""Convert a millisecond epoch string into a timezone-aware datetime.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ms : str
|
||||
Milliseconds since the Unix epoch encoded as a decimal string.
|
||||
tz : :class:`pytz.tzinfo.BaseTzInfo`
|
||||
The timezone into which to localise the resulting datetime.
|
||||
|
||||
Returns
|
||||
-------
|
||||
:class:`datetime`
|
||||
A timezone-aware datetime corresponding to the input.
|
||||
"""
|
||||
|
||||
# Google exports store times in milliseconds since UTC epoch
|
||||
ts = int(ms) / 1000.0
|
||||
utc_dt = datetime.fromtimestamp(ts, timezone.utc)
|
||||
return utc_dt.astimezone(tz)
|
||||
|
||||
|
||||
def _parse_location(raw: dict) -> Location:
|
||||
"""Parse a location dictionary from the export format.
|
||||
|
||||
The export encodes lat/lon in integer multiples of 1e-7 degrees.
|
||||
This helper scales the values into decimals and extracts the
|
||||
``name`` field.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw : dict
|
||||
A mapping containing ``latitudeE7``, ``longitudeE7`` and
|
||||
``name`` keys.
|
||||
|
||||
Returns
|
||||
-------
|
||||
:class:`Location`
|
||||
A populated location object.
|
||||
"""
|
||||
|
||||
lat = raw.get("latitudeE7")
|
||||
lon = raw.get("longitudeE7")
|
||||
name = raw.get("name", "")
|
||||
return Location(lat=float(lat) / 1e7 if lat is not None else 0.0,
|
||||
lon=float(lon) / 1e7 if lon is not None else 0.0,
|
||||
name=name)
|
||||
|
||||
|
||||
def load_place_visits(path: str, tz_name: str = "Europe/London") -> List[PlaceVisit]:
|
||||
"""Load all place visits from a Location History JSON file.
|
||||
|
||||
This function supports both the legacy "Semantic Location History"
|
||||
exports (containing a top-level ``timelineObjects`` array) and
|
||||
newer on-device Timeline exports that expose a ``semanticSegments``
|
||||
array. In both cases the goal is to extract "place visits" –
|
||||
periods of time spent at a single location.
|
||||
|
||||
For legacy files the timestamps are millisecond epoch strings and
|
||||
coordinates are encoded as integer multiples of 1e-7 degrees. For
|
||||
device-local exports the timestamps are ISO 8601 strings with
|
||||
timezone offsets and coordinates are stored in a ``latLng`` string
|
||||
on the ``visit.topCandidate.placeLocation``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
Path to the JSON file produced by Google Takeout or the
|
||||
on-device Timeline export.
|
||||
tz_name : str, optional
|
||||
The name of the timezone used for localisation, by default
|
||||
``Europe/London``. See the ``pytz`` documentation for valid
|
||||
identifiers.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of :class:`PlaceVisit`
|
||||
A chronologically ordered list of place visits.
|
||||
"""
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
tz = pytz.timezone(tz_name)
|
||||
visits: List[PlaceVisit] = []
|
||||
|
||||
# Legacy Semantic Location History format
|
||||
if "timelineObjects" in data and isinstance(data["timelineObjects"], list):
|
||||
timeline_objects = data.get("timelineObjects", [])
|
||||
for obj in timeline_objects:
|
||||
if "placeVisit" in obj:
|
||||
pv = obj["placeVisit"]
|
||||
loc = _parse_location(pv.get("location", {}))
|
||||
dur = pv.get("duration", {})
|
||||
start_ms = dur.get("startTimestampMs")
|
||||
end_ms = dur.get("endTimestampMs")
|
||||
if start_ms is None or end_ms is None:
|
||||
# Skip malformed entries
|
||||
continue
|
||||
visits.append(PlaceVisit(
|
||||
location=loc,
|
||||
start_time=_ms_to_dt(start_ms, tz),
|
||||
end_time=_ms_to_dt(end_ms, tz),
|
||||
))
|
||||
elif "activitySegment" in obj:
|
||||
# We ignore activity segments for now; they are parsed here
|
||||
# only to support potential future features such as deriving
|
||||
# more accurate hop start times.
|
||||
seg = obj["activitySegment"]
|
||||
start_loc = _parse_location(seg.get("startLocation", {}))
|
||||
end_loc = _parse_location(seg.get("endLocation", {}))
|
||||
dur = seg.get("duration", {})
|
||||
start_ms = dur.get("startTimestampMs")
|
||||
end_ms = dur.get("endTimestampMs")
|
||||
if start_ms is None or end_ms is None:
|
||||
continue
|
||||
# Create ActivitySegment instance (unused for now)
|
||||
# The object is not appended to the visits list because
|
||||
# itinerary detection only relies on place visits.
|
||||
_ = ActivitySegment(
|
||||
start_location=start_loc,
|
||||
end_location=end_loc,
|
||||
start_time=_ms_to_dt(start_ms, tz),
|
||||
end_time=_ms_to_dt(end_ms, tz),
|
||||
activity_type=seg.get("activityType", "UNKNOWN"),
|
||||
)
|
||||
# New device-local Timeline export format
|
||||
elif "semanticSegments" in data and isinstance(data["semanticSegments"], list):
|
||||
try:
|
||||
from dateutil import parser as dateutil_parser # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"python-dateutil is required to parse device-local Timeline exports. "
|
||||
"Install it with 'pip install python-dateutil'."
|
||||
)
|
||||
for segment in data["semanticSegments"]:
|
||||
# Only interested in visit segments; skip activities and path-only entries
|
||||
visit = segment.get("visit")
|
||||
if not visit:
|
||||
continue
|
||||
# Extract start and end times (ISO 8601 with timezone offsets)
|
||||
start_time_iso = segment.get("startTime")
|
||||
end_time_iso = segment.get("endTime")
|
||||
if not start_time_iso or not end_time_iso:
|
||||
continue
|
||||
try:
|
||||
start_dt = dateutil_parser.isoparse(start_time_iso).astimezone(tz)
|
||||
end_dt = dateutil_parser.isoparse(end_time_iso).astimezone(tz)
|
||||
except (ValueError, OverflowError):
|
||||
# Skip unparseable times
|
||||
continue
|
||||
# Extract coordinates; stored as "latLng": "lat°, lon°"
|
||||
place_loc = visit.get("topCandidate", {}).get("placeLocation", {})
|
||||
latlng_str = place_loc.get("latLng")
|
||||
if not latlng_str:
|
||||
continue
|
||||
# Strip degree symbol and split into lat/lon components
|
||||
try:
|
||||
lat_str, lon_str = [c.strip().replace("°", "") for c in latlng_str.split(",")]
|
||||
lat = float(lat_str)
|
||||
lon = float(lon_str)
|
||||
except Exception:
|
||||
continue
|
||||
# Use the semantic type or label as the name if available
|
||||
candidate = visit.get("topCandidate", {})
|
||||
name = candidate.get("label") or candidate.get("semanticType") or ""
|
||||
visits.append(PlaceVisit(
|
||||
location=Location(lat=lat, lon=lon, name=str(name)),
|
||||
start_time=start_dt,
|
||||
end_time=end_dt,
|
||||
))
|
||||
# Ignore any other structures (e.g. rawSignals, userLocationProfile)
|
||||
else:
|
||||
# If the file doesn't contain known keys, return empty list
|
||||
return []
|
||||
|
||||
# Sort visits chronologically by start time
|
||||
visits.sort(key=lambda v: v.start_time)
|
||||
return visits
|
13
mileage_logger/logic/__init__.py
Normal file
13
mileage_logger/logic/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Business logic for detecting work itineraries.
|
||||
|
||||
This package exposes functions used to interpret a chronologically
|
||||
ordered list of :class:`PlaceVisit` objects and reduce them into a
|
||||
sequence of 'hops' between recognised work locations. Recognition is
|
||||
driven by a site configuration file (YAML) that defines canonical
|
||||
names, friendly labels, optional aliases and geofences for each
|
||||
location.
|
||||
"""
|
||||
|
||||
from .detect_itinerary import SiteConfig, SiteEntry, Hop, detect_itinerary
|
||||
|
||||
__all__ = ["SiteConfig", "SiteEntry", "Hop", "detect_itinerary"]
|
176
mileage_logger/logic/detect_itinerary.py
Normal file
176
mileage_logger/logic/detect_itinerary.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Detect ordered hops between whitelisted sites in a day's timeline.
|
||||
|
||||
We process visits per calendar day (Europe/London), resetting state each
|
||||
day. We also support injecting a synthetic Home→FirstSite hop when the
|
||||
first recognised site of the day isn't Home (assume_home_start).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
import math
|
||||
import yaml
|
||||
|
||||
from ..ingest.semantic_reader import Location, PlaceVisit
|
||||
|
||||
|
||||
def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
|
||||
"""Compute the great-circle distance between two points in miles."""
|
||||
R = 3958.8 # Earth radius in miles
|
||||
phi1 = math.radians(lat1)
|
||||
phi2 = math.radians(lat2)
|
||||
dphi = math.radians(lat2 - lat1)
|
||||
dlambda = math.radians(lon2 - lon1)
|
||||
a = math.sin(dphi / 2.0) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlambda / 2.0) ** 2
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a)))
|
||||
return R * c
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteEntry:
|
||||
"""Represents a single recognised site from the configuration."""
|
||||
canonical: str
|
||||
label: str
|
||||
lat: float
|
||||
lon: float
|
||||
radius_m: float
|
||||
aliases: List[str]
|
||||
|
||||
|
||||
class SiteConfig:
|
||||
"""Holds all recognised site definitions keyed by canonical name."""
|
||||
|
||||
def __init__(self, sites: Iterable[SiteEntry]):
|
||||
self.by_canonical: Dict[str, SiteEntry] = {s.canonical: s for s in sites}
|
||||
self.alias_map: Dict[str, str] = {}
|
||||
for site in sites:
|
||||
for alias in [site.canonical] + site.aliases:
|
||||
self.alias_map[alias.lower()] = site.canonical
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: str) -> "SiteConfig":
|
||||
"""Load a site configuration from a YAML file."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f)
|
||||
sites_data: List[Dict[str, object]] = []
|
||||
if isinstance(raw, list):
|
||||
sites_data = raw
|
||||
elif isinstance(raw, dict):
|
||||
if "sites" in raw and isinstance(raw["sites"], list):
|
||||
sites_data = raw["sites"]
|
||||
else:
|
||||
for canon, entry in raw.items():
|
||||
entry = entry or {}
|
||||
if not isinstance(entry, dict):
|
||||
raise ValueError("Site entry for %s must be a mapping" % canon)
|
||||
entry = dict(entry)
|
||||
entry.setdefault("canonical", canon)
|
||||
sites_data.append(entry)
|
||||
else:
|
||||
raise ValueError("Invalid site configuration format")
|
||||
sites: List[SiteEntry] = []
|
||||
for entry in sites_data:
|
||||
canonical = entry.get("canonical") or entry.get("name")
|
||||
if not canonical:
|
||||
raise ValueError("Site entry missing canonical name")
|
||||
label = entry.get("label", canonical)
|
||||
lat = float(entry.get("lat", 0.0))
|
||||
lon = float(entry.get("lon", 0.0))
|
||||
radius_m = float(entry.get("radius_m", 0.0))
|
||||
aliases = entry.get("aliases") or []
|
||||
sites.append(SiteEntry(
|
||||
canonical=canonical,
|
||||
label=label,
|
||||
lat=lat,
|
||||
lon=lon,
|
||||
radius_m=radius_m,
|
||||
aliases=list(aliases),
|
||||
))
|
||||
return cls(sites)
|
||||
|
||||
def recognise(self, location: Location) -> Optional[str]:
|
||||
"""Return canonical site name if this location matches by name/alias or geofence."""
|
||||
name_lower = (location.name or "").lower()
|
||||
# Pass 1: name/alias substring match
|
||||
for alias, canonical in self.alias_map.items():
|
||||
if alias in name_lower:
|
||||
return canonical
|
||||
# Pass 2: geofence match
|
||||
for canonical, site in self.by_canonical.items():
|
||||
if site.radius_m > 0:
|
||||
max_dist_miles = site.radius_m / 1609.34
|
||||
d = haversine_distance(location.lat, location.lon, site.lat, site.lon)
|
||||
if d <= max_dist_miles:
|
||||
return canonical
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Hop:
|
||||
"""A hop from one recognised site to another, dated by the origin's start date."""
|
||||
date: date
|
||||
origin: str
|
||||
destination: str
|
||||
|
||||
|
||||
def _build_day_hops(day_visits: List[PlaceVisit], site_config: SiteConfig, assume_home_start: bool) -> List[Hop]:
|
||||
"""Build ordered hops for a single day of visits."""
|
||||
# Ensure chronological order by *start* time
|
||||
day_visits = sorted(day_visits, key=lambda v: v.start_time)
|
||||
|
||||
recognised: List[Tuple[str, PlaceVisit]] = []
|
||||
last_site: Optional[str] = None
|
||||
for v in day_visits:
|
||||
s = site_config.recognise(v.location)
|
||||
if not s:
|
||||
continue
|
||||
if s == last_site:
|
||||
continue # ignore duplicates back-to-back
|
||||
recognised.append((s, v))
|
||||
last_site = s
|
||||
|
||||
if not recognised:
|
||||
return []
|
||||
|
||||
# Inject Home at start if enabled and first site isn't Home
|
||||
if assume_home_start and recognised[0][0] != "Home":
|
||||
first_time = recognised[0][1].start_time
|
||||
synthetic_home = PlaceVisit(location=Location(lat=0.0, lon=0.0, name="Home"),
|
||||
start_time=first_time, end_time=first_time)
|
||||
recognised.insert(0, ("Home", synthetic_home))
|
||||
|
||||
# Walk forward, stop at second Home
|
||||
hops: List[Hop] = []
|
||||
home_hits = 1 if recognised and recognised[0][0] == "Home" else 0
|
||||
for i in range(1, len(recognised)):
|
||||
origin_site, origin_visit = recognised[i - 1]
|
||||
dest_site, _dest_visit = recognised[i]
|
||||
hop_date = origin_visit.start_time.date()
|
||||
if origin_site != dest_site:
|
||||
hops.append(Hop(date=hop_date, origin=origin_site, destination=dest_site))
|
||||
if dest_site == "Home":
|
||||
home_hits += 1
|
||||
if home_hits >= 2:
|
||||
break
|
||||
return hops
|
||||
|
||||
|
||||
def detect_itinerary(visits: List[PlaceVisit], site_config: SiteConfig, *, assume_home_start: bool = True) -> List[Hop]:
|
||||
"""Reduce all visits into ordered hops per day, concatenated across the file."""
|
||||
if not visits:
|
||||
return []
|
||||
|
||||
# Group by the local date from each visit's start_time
|
||||
by_day: Dict[date, List[PlaceVisit]] = defaultdict(list)
|
||||
for v in visits:
|
||||
by_day[v.start_time.date()].append(v)
|
||||
|
||||
hops_all: List[Hop] = []
|
||||
for day in sorted(by_day.keys()):
|
||||
day_hops = _build_day_hops(by_day[day], site_config, assume_home_start=assume_home_start)
|
||||
hops_all.extend(day_hops)
|
||||
return hops_all
|
||||
|
Reference in New Issue
Block a user