258 lines
9.2 KiB
Python
258 lines
9.2 KiB
Python
"""Parser for Google Semantic Location History exports.
|
||
|
||
Google Takeout and on-device exports of the Timeline API are provided
|
||
as JSON files under a ``timelineObjects`` key. Each entry in
|
||
``timelineObjects`` is either a ``placeVisit`` or an ``activitySegment``.
|
||
This module exposes data classes representing those events and a
|
||
convenient loader that normalises timestamps and coordinate formats.
|
||
|
||
Timestamps in the source JSON are encoded as millisecond epoch
|
||
strings. When loaded these are converted into timezone-aware
|
||
:class:`datetime.datetime` objects. Coordinates in the JSON are stored
|
||
as integer multiples of 1e-7 degrees; we scale them to floats.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from dataclasses import dataclass
|
||
from datetime import datetime, timezone
|
||
from typing import Iterable, List, Optional
|
||
|
||
import pytz
|
||
|
||
|
||
@dataclass
|
||
class Location:
|
||
"""A simple geographic location.
|
||
|
||
Attributes
|
||
----------
|
||
lat : float
|
||
Latitude in decimal degrees.
|
||
lon : float
|
||
Longitude in decimal degrees.
|
||
name : str
|
||
Human readable name of the location as provided by Google.
|
||
"""
|
||
|
||
lat: float
|
||
lon: float
|
||
name: str
|
||
|
||
|
||
@dataclass
|
||
class PlaceVisit:
|
||
"""A visit to a single location for a period of time.
|
||
|
||
Attributes
|
||
----------
|
||
location : :class:`Location`
|
||
The geographic coordinates and name of the place.
|
||
start_time : :class:`datetime`
|
||
The timezone-aware start timestamp of the visit.
|
||
end_time : :class:`datetime`
|
||
The timezone-aware end timestamp of the visit.
|
||
"""
|
||
|
||
location: Location
|
||
start_time: datetime
|
||
end_time: datetime
|
||
|
||
|
||
@dataclass
|
||
class ActivitySegment:
|
||
"""A movement between two locations.
|
||
|
||
While not used directly in itinerary detection, activity segments
|
||
contain useful timing information that could be used to derive the
|
||
start date for a hop between recognised sites. This class is
|
||
provided for completeness and potential future use.
|
||
"""
|
||
|
||
start_location: Location
|
||
end_location: Location
|
||
start_time: datetime
|
||
end_time: datetime
|
||
activity_type: str
|
||
|
||
|
||
def _ms_to_dt(ms: str, tz: pytz.BaseTzInfo) -> datetime:
|
||
"""Convert a millisecond epoch string into a timezone-aware datetime.
|
||
|
||
Parameters
|
||
----------
|
||
ms : str
|
||
Milliseconds since the Unix epoch encoded as a decimal string.
|
||
tz : :class:`pytz.tzinfo.BaseTzInfo`
|
||
The timezone into which to localise the resulting datetime.
|
||
|
||
Returns
|
||
-------
|
||
:class:`datetime`
|
||
A timezone-aware datetime corresponding to the input.
|
||
"""
|
||
|
||
# Google exports store times in milliseconds since UTC epoch
|
||
ts = int(ms) / 1000.0
|
||
utc_dt = datetime.fromtimestamp(ts, timezone.utc)
|
||
return utc_dt.astimezone(tz)
|
||
|
||
|
||
def _parse_location(raw: dict) -> Location:
|
||
"""Parse a location dictionary from the export format.
|
||
|
||
The export encodes lat/lon in integer multiples of 1e-7 degrees.
|
||
This helper scales the values into decimals and extracts the
|
||
``name`` field.
|
||
|
||
Parameters
|
||
----------
|
||
raw : dict
|
||
A mapping containing ``latitudeE7``, ``longitudeE7`` and
|
||
``name`` keys.
|
||
|
||
Returns
|
||
-------
|
||
:class:`Location`
|
||
A populated location object.
|
||
"""
|
||
|
||
lat = raw.get("latitudeE7")
|
||
lon = raw.get("longitudeE7")
|
||
name = raw.get("name", "")
|
||
return Location(lat=float(lat) / 1e7 if lat is not None else 0.0,
|
||
lon=float(lon) / 1e7 if lon is not None else 0.0,
|
||
name=name)
|
||
|
||
|
||
def load_place_visits(path: str, tz_name: str = "Europe/London") -> List[PlaceVisit]:
|
||
"""Load all place visits from a Location History JSON file.
|
||
|
||
This function supports both the legacy "Semantic Location History"
|
||
exports (containing a top-level ``timelineObjects`` array) and
|
||
newer on-device Timeline exports that expose a ``semanticSegments``
|
||
array. In both cases the goal is to extract "place visits" –
|
||
periods of time spent at a single location.
|
||
|
||
For legacy files the timestamps are millisecond epoch strings and
|
||
coordinates are encoded as integer multiples of 1e-7 degrees. For
|
||
device-local exports the timestamps are ISO 8601 strings with
|
||
timezone offsets and coordinates are stored in a ``latLng`` string
|
||
on the ``visit.topCandidate.placeLocation``.
|
||
|
||
Parameters
|
||
----------
|
||
path : str
|
||
Path to the JSON file produced by Google Takeout or the
|
||
on-device Timeline export.
|
||
tz_name : str, optional
|
||
The name of the timezone used for localisation, by default
|
||
``Europe/London``. See the ``pytz`` documentation for valid
|
||
identifiers.
|
||
|
||
Returns
|
||
-------
|
||
list of :class:`PlaceVisit`
|
||
A chronologically ordered list of place visits.
|
||
"""
|
||
|
||
with open(path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
|
||
tz = pytz.timezone(tz_name)
|
||
visits: List[PlaceVisit] = []
|
||
|
||
# Legacy Semantic Location History format
|
||
if "timelineObjects" in data and isinstance(data["timelineObjects"], list):
|
||
timeline_objects = data.get("timelineObjects", [])
|
||
for obj in timeline_objects:
|
||
if "placeVisit" in obj:
|
||
pv = obj["placeVisit"]
|
||
loc = _parse_location(pv.get("location", {}))
|
||
dur = pv.get("duration", {})
|
||
start_ms = dur.get("startTimestampMs")
|
||
end_ms = dur.get("endTimestampMs")
|
||
if start_ms is None or end_ms is None:
|
||
# Skip malformed entries
|
||
continue
|
||
visits.append(PlaceVisit(
|
||
location=loc,
|
||
start_time=_ms_to_dt(start_ms, tz),
|
||
end_time=_ms_to_dt(end_ms, tz),
|
||
))
|
||
elif "activitySegment" in obj:
|
||
# We ignore activity segments for now; they are parsed here
|
||
# only to support potential future features such as deriving
|
||
# more accurate hop start times.
|
||
seg = obj["activitySegment"]
|
||
start_loc = _parse_location(seg.get("startLocation", {}))
|
||
end_loc = _parse_location(seg.get("endLocation", {}))
|
||
dur = seg.get("duration", {})
|
||
start_ms = dur.get("startTimestampMs")
|
||
end_ms = dur.get("endTimestampMs")
|
||
if start_ms is None or end_ms is None:
|
||
continue
|
||
# Create ActivitySegment instance (unused for now)
|
||
# The object is not appended to the visits list because
|
||
# itinerary detection only relies on place visits.
|
||
_ = ActivitySegment(
|
||
start_location=start_loc,
|
||
end_location=end_loc,
|
||
start_time=_ms_to_dt(start_ms, tz),
|
||
end_time=_ms_to_dt(end_ms, tz),
|
||
activity_type=seg.get("activityType", "UNKNOWN"),
|
||
)
|
||
# New device-local Timeline export format
|
||
elif "semanticSegments" in data and isinstance(data["semanticSegments"], list):
|
||
try:
|
||
from dateutil import parser as dateutil_parser # type: ignore
|
||
except ImportError:
|
||
raise ImportError(
|
||
"python-dateutil is required to parse device-local Timeline exports. "
|
||
"Install it with 'pip install python-dateutil'."
|
||
)
|
||
for segment in data["semanticSegments"]:
|
||
# Only interested in visit segments; skip activities and path-only entries
|
||
visit = segment.get("visit")
|
||
if not visit:
|
||
continue
|
||
# Extract start and end times (ISO 8601 with timezone offsets)
|
||
start_time_iso = segment.get("startTime")
|
||
end_time_iso = segment.get("endTime")
|
||
if not start_time_iso or not end_time_iso:
|
||
continue
|
||
try:
|
||
start_dt = dateutil_parser.isoparse(start_time_iso).astimezone(tz)
|
||
end_dt = dateutil_parser.isoparse(end_time_iso).astimezone(tz)
|
||
except (ValueError, OverflowError):
|
||
# Skip unparseable times
|
||
continue
|
||
# Extract coordinates; stored as "latLng": "lat°, lon°"
|
||
place_loc = visit.get("topCandidate", {}).get("placeLocation", {})
|
||
latlng_str = place_loc.get("latLng")
|
||
if not latlng_str:
|
||
continue
|
||
# Strip degree symbol and split into lat/lon components
|
||
try:
|
||
lat_str, lon_str = [c.strip().replace("°", "") for c in latlng_str.split(",")]
|
||
lat = float(lat_str)
|
||
lon = float(lon_str)
|
||
except Exception:
|
||
continue
|
||
# Use the semantic type or label as the name if available
|
||
candidate = visit.get("topCandidate", {})
|
||
name = candidate.get("label") or candidate.get("semanticType") or ""
|
||
visits.append(PlaceVisit(
|
||
location=Location(lat=lat, lon=lon, name=str(name)),
|
||
start_time=start_dt,
|
||
end_time=end_dt,
|
||
))
|
||
# Ignore any other structures (e.g. rawSignals, userLocationProfile)
|
||
else:
|
||
# If the file doesn't contain known keys, return empty list
|
||
return []
|
||
|
||
# Sort visits chronologically by start time
|
||
visits.sort(key=lambda v: v.start_time)
|
||
return visits |