#!/usr/bin/env python3
from __future__ import annotations

import csv
import json

from pathlib import Path


REPO_ROOT = Path(__file__).resolve().parents[2]
DEFAULT_WORKSPACE = REPO_ROOT / ".tmp" / "speed_limit_training"
DEFAULT_DEBUG_BASE = Path("/data/media/0/vision_speed_limit_debug")
REPO_ASSET_DIR = REPO_ROOT / "starpilot" / "assets" / "vision_models"
DEFAULT_LOCAL_CLIP_ROOT = REPO_ROOT / ".tmp" / "live_route_clips" / "bookmark_windows" / "data" / "media" / "0" / "realdata"
DEFAULT_LOCAL_QLOG_MTIMES = REPO_ROOT / ".tmp" / "live_routes_meta" / "qlog_mtimes.txt"
DEFAULT_LOCAL_FILES_MANIFEST = REPO_ROOT / ".tmp" / "live_routes_meta" / "files.txt"
DEFAULT_LOCAL_SESSION_ROUTE_MAP = REPO_ROOT / ".tmp" / "live_routes_meta" / "session_route_map.json"
DEFAULT_EXTERNAL_ROOT = Path("/Volumes/T5/starpilot_speed_limit")

DETECTOR_CLASS_NAMES = (
  "regulatory_speed_limit",
  "advisory_speed_limit",
  "school_zone_speed_limit",
)
DEFAULT_SPEED_VALUES = (15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75)

DETECTOR_EXPORT_NAME = "speed_limit_us_detector.onnx"
CLASSIFIER_EXPORT_NAME = "speed_limit_us_value_classifier.onnx"


def resolve_workspace(path: str | Path | None) -> Path:
  return Path(path).expanduser().resolve() if path else DEFAULT_WORKSPACE


def preferred_external_root() -> Path | None:
  return DEFAULT_EXTERNAL_ROOT if DEFAULT_EXTERNAL_ROOT.is_dir() else None


def preferred_analysis_root() -> Path | None:
  external_root = preferred_external_root()
  if external_root is None:
    return None
  analysis_root = external_root / "analysis"
  return analysis_root if analysis_root.is_dir() else external_root


def preferred_clip_root() -> Path:
  external_root = preferred_analysis_root()
  if external_root is not None:
    return external_root / "live_route_clips" / "bookmark_windows" / "data" / "media" / "0" / "realdata"
  return DEFAULT_LOCAL_CLIP_ROOT


def preferred_qlog_mtimes_path() -> Path:
  external_root = preferred_analysis_root()
  if external_root is not None:
    return external_root / "live_routes_meta" / "qlog_mtimes.txt"
  return DEFAULT_LOCAL_QLOG_MTIMES


def preferred_files_manifest_path() -> Path:
  external_root = preferred_analysis_root()
  if external_root is not None:
    return external_root / "live_routes_meta" / "files.txt"
  return DEFAULT_LOCAL_FILES_MANIFEST


def preferred_session_route_map_path() -> Path:
  external_root = preferred_analysis_root()
  if external_root is not None:
    return external_root / "live_routes_meta" / "session_route_map.json"
  return DEFAULT_LOCAL_SESSION_ROUTE_MAP


def load_session_route_map(path: str | Path | None = None) -> dict[str, str]:
  route_map_path = Path(path).expanduser().resolve() if path else preferred_session_route_map_path()
  if not route_map_path.is_file():
    return {}
  data = json.loads(route_map_path.read_text(encoding="utf-8"))
  return {str(key): str(value) for key, value in data.items() if key and value}


def default_raw_root(workspace: str | Path | None = None) -> Path:
  resolved = resolve_workspace(workspace)
  if resolved.parent.name == "workspace":
    return resolved.parent.parent / "raw"
  return resolved / "raw"


def ensure_dir(path: Path) -> Path:
  path.mkdir(parents=True, exist_ok=True)
  return path


def write_text(path: Path, text: str, force: bool = False) -> None:
  if path.exists() and not force:
    return
  ensure_dir(path.parent)
  path.write_text(text, encoding="utf-8")


def write_csv_header(path: Path, fieldnames: list[str], force: bool = False) -> None:
  if path.exists() and not force:
    return
  ensure_dir(path.parent)
  with path.open("w", encoding="utf-8", newline="") as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()


def read_jsonl(path: Path) -> list[dict]:
  records: list[dict] = []
  with path.open("r", encoding="utf-8") as jsonl_file:
    for line in jsonl_file:
      line = line.strip()
      if line:
        records.append(json.loads(line))
  return records


def latest_debug_sessions(debug_base: Path, count: int = 1) -> list[Path]:
  if not debug_base.is_dir():
    return []
  sessions = sorted((path for path in debug_base.iterdir() if path.is_dir()), reverse=True)
  return sessions[:max(count, 0)]


def detector_dataset_yaml(workspace: Path) -> str:
  detector_root = workspace / "detector"
  lines = [
    f"path: {detector_root}",
    "train: images/train",
    "val: images/val",
    "names:",
  ]
  for index, class_name in enumerate(DETECTOR_CLASS_NAMES):
    lines.append(f"  {index}: {class_name}")
  return "\n".join(lines) + "\n"


def workspace_readme(speed_values: tuple[int, ...]) -> str:
  supported_values = ", ".join(str(value) for value in speed_values)
  return f"""# Speed Limit Vision Training Workspace

This workspace is generated by `scripts/speed_limit_vision/init_workspace.py`.

Directory layout:

- `detector/images/train` and `detector/images/val`: detector training images
- `detector/labels/train` and `detector/labels/val`: YOLO detector labels
- `classifier/value_labels.csv`: value labels used to crop detector boxes into classifier folders
- `classifier/train` and `classifier/val`: classifier-ready crop folders
- `review/images`: imported snapshots from live debug sessions
- `review/bookmarks.csv`: bookmark/publish/candidate manifest built from debug sessions
- `review/leadins/frames` and `review/leadins/contact_sheets`: sampled frames from 5-second pre-bookmark route windows
- `review/bookmark_leadins.csv`: manifest describing sampled pre-bookmark review frames
- `exports`: exported ONNX models
- `runs`: training outputs

Suggested detector classes:

- `regulatory_speed_limit`
- `advisory_speed_limit`
- `school_zone_speed_limit`

Suggested classifier values:

- `{supported_values}`

Generated manifests:

- `manifests/raw_sources.csv`: raw public/comma source provenance
- `manifests/public_detector_samples.csv`: imported detector samples and sign metadata
- `manifests/public_classifier_samples.csv`: imported classifier-ready value samples
"""


BOOKMARK_MANIFEST_FIELDS = [
  "record_key",
  "source_name",
  "source_region",
  "source_device",
  "source_driver",
  "session_id",
  "event_index",
  "event",
  "session_seconds",
  "wall_time",
  "road_name",
  "stream",
  "status",
  "candidate_speed_limit_mph",
  "candidate_confidence",
  "speed_limit_mph",
  "confidence",
  "source_confidence",
  "source_event",
  "published_speed_limit_mph",
  "published_confidence",
  "map_source",
  "map_current_speed_limit_mph",
  "map_next_speed_limit_mph",
  "map_next_speed_limit_distance_m",
  "map_expected_speed_limit_mph",
  "map_relation",
  "previous_map_speed_limit_mph",
  "review_bucket",
  "bookmark_count",
  "snapshot_path",
  "source_session_path",
]


BOOKMARK_LEADIN_MANIFEST_FIELDS = [
  "record_key",
  "source_name",
  "source_region",
  "source_device",
  "source_driver",
  "session_id",
  "bookmark_number",
  "route",
  "segment",
  "segment_offset_s",
  "leadin_start_s",
  "sample_offset_s",
  "window_result",
  "published_values",
  "candidate_values",
  "frame_path",
  "contact_sheet_path",
  "source_video_path",
]


VALUE_LABEL_FIELDS = [
  "image_path",
  "split",
  "speed_limit_mph",
  "bbox_index",
  "padding",
  "label_path",
]


RAW_SOURCE_FIELDS = [
  "source_name",
  "source_version",
  "source_license",
  "source_type",
  "raw_path",
  "notes",
]


PUBLIC_DETECTOR_SAMPLE_FIELDS = [
  "record_key",
  "source_name",
  "split",
  "image_path",
  "label_path",
  "annotation_path",
  "source_image_id",
  "class_name",
  "speed_limit_mph",
  "sign_code",
  "bbox_left",
  "bbox_top",
  "bbox_right",
  "bbox_bottom",
]


PUBLIC_CLASSIFIER_SAMPLE_FIELDS = [
  "record_key",
  "source_name",
  "split",
  "image_path",
  "speed_limit_mph",
  "bbox_index",
  "label_path",
  "source_image_id",
  "sign_code",
]