"""Parser for Google Semantic Location History exports. Google Takeout and on-device exports of the Timeline API are provided as JSON files under a ``timelineObjects`` key. Each entry in ``timelineObjects`` is either a ``placeVisit`` or an ``activitySegment``. This module exposes data classes representing those events and a convenient loader that normalises timestamps and coordinate formats. Timestamps in the source JSON are encoded as millisecond epoch strings. When loaded these are converted into timezone-aware :class:`datetime.datetime` objects. Coordinates in the JSON are stored as integer multiples of 1e-7 degrees; we scale them to floats. """ from __future__ import annotations import json from dataclasses import dataclass from datetime import datetime, timezone from typing import Iterable, List, Optional import pytz @dataclass class Location: """A simple geographic location. Attributes ---------- lat : float Latitude in decimal degrees. lon : float Longitude in decimal degrees. name : str Human readable name of the location as provided by Google. """ lat: float lon: float name: str @dataclass class PlaceVisit: """A visit to a single location for a period of time. Attributes ---------- location : :class:`Location` The geographic coordinates and name of the place. start_time : :class:`datetime` The timezone-aware start timestamp of the visit. end_time : :class:`datetime` The timezone-aware end timestamp of the visit. """ location: Location start_time: datetime end_time: datetime @dataclass class ActivitySegment: """A movement between two locations. While not used directly in itinerary detection, activity segments contain useful timing information that could be used to derive the start date for a hop between recognised sites. This class is provided for completeness and potential future use. """ start_location: Location end_location: Location start_time: datetime end_time: datetime activity_type: str def _ms_to_dt(ms: str, tz: pytz.BaseTzInfo) -> datetime: """Convert a millisecond epoch string into a timezone-aware datetime. Parameters ---------- ms : str Milliseconds since the Unix epoch encoded as a decimal string. tz : :class:`pytz.tzinfo.BaseTzInfo` The timezone into which to localise the resulting datetime. Returns ------- :class:`datetime` A timezone-aware datetime corresponding to the input. """ # Google exports store times in milliseconds since UTC epoch ts = int(ms) / 1000.0 utc_dt = datetime.fromtimestamp(ts, timezone.utc) return utc_dt.astimezone(tz) def _parse_location(raw: dict) -> Location: """Parse a location dictionary from the export format. The export encodes lat/lon in integer multiples of 1e-7 degrees. This helper scales the values into decimals and extracts the ``name`` field. Parameters ---------- raw : dict A mapping containing ``latitudeE7``, ``longitudeE7`` and ``name`` keys. Returns ------- :class:`Location` A populated location object. """ lat = raw.get("latitudeE7") lon = raw.get("longitudeE7") name = raw.get("name", "") return Location(lat=float(lat) / 1e7 if lat is not None else 0.0, lon=float(lon) / 1e7 if lon is not None else 0.0, name=name) def load_place_visits(path: str, tz_name: str = "Europe/London") -> List[PlaceVisit]: """Load all place visits from a Location History JSON file. This function supports both the legacy "Semantic Location History" exports (containing a top-level ``timelineObjects`` array) and newer on-device Timeline exports that expose a ``semanticSegments`` array. In both cases the goal is to extract "place visits" – periods of time spent at a single location. For legacy files the timestamps are millisecond epoch strings and coordinates are encoded as integer multiples of 1e-7 degrees. For device-local exports the timestamps are ISO 8601 strings with timezone offsets and coordinates are stored in a ``latLng`` string on the ``visit.topCandidate.placeLocation``. Parameters ---------- path : str Path to the JSON file produced by Google Takeout or the on-device Timeline export. tz_name : str, optional The name of the timezone used for localisation, by default ``Europe/London``. See the ``pytz`` documentation for valid identifiers. Returns ------- list of :class:`PlaceVisit` A chronologically ordered list of place visits. """ with open(path, "r", encoding="utf-8") as f: data = json.load(f) tz = pytz.timezone(tz_name) visits: List[PlaceVisit] = [] # Legacy Semantic Location History format if "timelineObjects" in data and isinstance(data["timelineObjects"], list): timeline_objects = data.get("timelineObjects", []) for obj in timeline_objects: if "placeVisit" in obj: pv = obj["placeVisit"] loc = _parse_location(pv.get("location", {})) dur = pv.get("duration", {}) start_ms = dur.get("startTimestampMs") end_ms = dur.get("endTimestampMs") if start_ms is None or end_ms is None: # Skip malformed entries continue visits.append(PlaceVisit( location=loc, start_time=_ms_to_dt(start_ms, tz), end_time=_ms_to_dt(end_ms, tz), )) elif "activitySegment" in obj: # We ignore activity segments for now; they are parsed here # only to support potential future features such as deriving # more accurate hop start times. seg = obj["activitySegment"] start_loc = _parse_location(seg.get("startLocation", {})) end_loc = _parse_location(seg.get("endLocation", {})) dur = seg.get("duration", {}) start_ms = dur.get("startTimestampMs") end_ms = dur.get("endTimestampMs") if start_ms is None or end_ms is None: continue # Create ActivitySegment instance (unused for now) # The object is not appended to the visits list because # itinerary detection only relies on place visits. _ = ActivitySegment( start_location=start_loc, end_location=end_loc, start_time=_ms_to_dt(start_ms, tz), end_time=_ms_to_dt(end_ms, tz), activity_type=seg.get("activityType", "UNKNOWN"), ) # New device-local Timeline export format elif "semanticSegments" in data and isinstance(data["semanticSegments"], list): try: from dateutil import parser as dateutil_parser # type: ignore except ImportError: raise ImportError( "python-dateutil is required to parse device-local Timeline exports. " "Install it with 'pip install python-dateutil'." ) for segment in data["semanticSegments"]: # Only interested in visit segments; skip activities and path-only entries visit = segment.get("visit") if not visit: continue # Extract start and end times (ISO 8601 with timezone offsets) start_time_iso = segment.get("startTime") end_time_iso = segment.get("endTime") if not start_time_iso or not end_time_iso: continue try: start_dt = dateutil_parser.isoparse(start_time_iso).astimezone(tz) end_dt = dateutil_parser.isoparse(end_time_iso).astimezone(tz) except (ValueError, OverflowError): # Skip unparseable times continue # Extract coordinates; stored as "latLng": "lat°, lon°" place_loc = visit.get("topCandidate", {}).get("placeLocation", {}) latlng_str = place_loc.get("latLng") if not latlng_str: continue # Strip degree symbol and split into lat/lon components try: lat_str, lon_str = [c.strip().replace("°", "") for c in latlng_str.split(",")] lat = float(lat_str) lon = float(lon_str) except Exception: continue # Use the semantic type or label as the name if available candidate = visit.get("topCandidate", {}) name = candidate.get("label") or candidate.get("semanticType") or "" visits.append(PlaceVisit( location=Location(lat=lat, lon=lon, name=str(name)), start_time=start_dt, end_time=end_dt, )) # Ignore any other structures (e.g. rawSignals, userLocationProfile) else: # If the file doesn't contain known keys, return empty list return [] # Sort visits chronologically by start time visits.sort(key=lambda v: v.start_time) return visits