200 lines
8.1 KiB
Python
200 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import random
|
|
import shutil
|
|
|
|
from pathlib import Path
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
if __package__ in (None, ""):
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
|
from common import DEFAULT_SPEED_VALUES, DEFAULT_WORKSPACE, ensure_dir, resolve_workspace # type: ignore
|
|
from generate_synthetic_us_speed_limits import KNOWN_REAL_CROPS, augment_sign, render_regulatory_sign # type: ignore
|
|
else:
|
|
from .common import DEFAULT_SPEED_VALUES, DEFAULT_WORKSPACE, ensure_dir, resolve_workspace
|
|
from .generate_synthetic_us_speed_limits import KNOWN_REAL_CROPS, augment_sign, render_regulatory_sign
|
|
|
|
|
|
VALUE_TEMPLATE_ROIS = (
|
|
(0.35, 0.82, 0.15, 0.78),
|
|
(0.45, 0.85, 0.18, 0.78),
|
|
(0.40, 0.84, 0.18, 0.75),
|
|
)
|
|
|
|
|
|
def normalize_binary_mask(binary_mask: np.ndarray, size=(72, 96), padding=6):
|
|
points = cv2.findNonZero(binary_mask)
|
|
if points is None:
|
|
return None
|
|
|
|
x, y, width, height = cv2.boundingRect(points)
|
|
digit = binary_mask[y:y + height, x:x + width]
|
|
target_w, target_h = size
|
|
scale = min((target_w - padding * 2) / max(width, 1), (target_h - padding * 2) / max(height, 1))
|
|
resized_w = max(int(round(width * scale)), 1)
|
|
resized_h = max(int(round(height * scale)), 1)
|
|
resized = cv2.resize(digit, (resized_w, resized_h), interpolation=cv2.INTER_NEAREST)
|
|
|
|
canvas = np.zeros((target_h, target_w), dtype=np.uint8)
|
|
offset_x = (target_w - resized_w) // 2
|
|
offset_y = (target_h - resized_h) // 2
|
|
canvas[offset_y:offset_y + resized_h, offset_x:offset_x + resized_w] = resized
|
|
return canvas
|
|
|
|
|
|
def extract_value_mask(sign_bgr: np.ndarray):
|
|
gray = cv2.cvtColor(sign_bgr, cv2.COLOR_BGR2GRAY)
|
|
height, width = gray.shape
|
|
best_mask = None
|
|
best_fill = 0.0
|
|
|
|
for top_ratio, bottom_ratio, left_ratio, right_ratio in VALUE_TEMPLATE_ROIS:
|
|
roi = gray[int(height * top_ratio):int(height * bottom_ratio), int(width * left_ratio):int(width * right_ratio)]
|
|
if roi.size == 0:
|
|
continue
|
|
|
|
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)).apply(roi)
|
|
_, binary = cv2.threshold(clahe, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, np.ones((2, 2), dtype=np.uint8))
|
|
|
|
num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(binary, 8)
|
|
mask = np.zeros_like(binary)
|
|
for label_idx in range(1, num_labels):
|
|
x, y, comp_w, comp_h, area = stats[label_idx]
|
|
if area < roi.shape[0] * roi.shape[1] * 0.01:
|
|
continue
|
|
if y < binary.shape[0] * 0.08:
|
|
continue
|
|
if comp_h < binary.shape[0] * 0.18:
|
|
continue
|
|
if comp_w > binary.shape[1] * 0.75:
|
|
continue
|
|
mask[labels == label_idx] = 255
|
|
|
|
normalized = normalize_binary_mask(mask, size=(72, 96))
|
|
if normalized is None:
|
|
continue
|
|
|
|
fill_ratio = float(np.count_nonzero(normalized)) / normalized.size
|
|
if fill_ratio > best_fill:
|
|
best_fill = fill_ratio
|
|
best_mask = normalized
|
|
|
|
return best_mask
|
|
|
|
|
|
def perspective_jitter(sign_rgba, rng: random.Random):
|
|
sign = np.array(sign_rgba)
|
|
sign_h, sign_w = sign.shape[:2]
|
|
pad = max(sign_w, sign_h) // 5
|
|
canvas = np.zeros((sign_h + pad * 2, sign_w + pad * 2, 4), dtype=np.uint8)
|
|
canvas[pad:pad + sign_h, pad:pad + sign_w] = sign
|
|
sign_h, sign_w = canvas.shape[:2]
|
|
|
|
src = np.float32([[0, 0], [sign_w - 1, 0], [sign_w - 1, sign_h - 1], [0, sign_h - 1]])
|
|
jitter_x = sign_w * 0.08
|
|
jitter_y = sign_h * 0.08
|
|
dst = src + np.float32([
|
|
[rng.uniform(-jitter_x, jitter_x), rng.uniform(-jitter_y, jitter_y)],
|
|
[rng.uniform(-jitter_x, jitter_x), rng.uniform(-jitter_y, jitter_y)],
|
|
[rng.uniform(-jitter_x, jitter_x), rng.uniform(-jitter_y, jitter_y)],
|
|
[rng.uniform(-jitter_x, jitter_x), rng.uniform(-jitter_y, jitter_y)],
|
|
])
|
|
matrix = cv2.getPerspectiveTransform(src, dst)
|
|
warped = cv2.warpPerspective(canvas, matrix, (sign_w, sign_h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0))
|
|
ys, xs = np.where(warped[:, :, 3] > 0)
|
|
if len(xs) == 0 or len(ys) == 0:
|
|
return canvas
|
|
return warped[ys.min():ys.max() + 1, xs.min():xs.max() + 1]
|
|
|
|
|
|
def augment_mask(mask: np.ndarray, rng: random.Random):
|
|
canvas = np.zeros((128, 128), dtype=np.uint8)
|
|
resized = cv2.resize(mask, None, fx=rng.uniform(0.85, 1.15), fy=rng.uniform(0.85, 1.15), interpolation=cv2.INTER_NEAREST)
|
|
offset_x = max((canvas.shape[1] - resized.shape[1]) // 2 + rng.randint(-8, 8), 0)
|
|
offset_y = max((canvas.shape[0] - resized.shape[0]) // 2 + rng.randint(-8, 8), 0)
|
|
end_x = min(offset_x + resized.shape[1], canvas.shape[1])
|
|
end_y = min(offset_y + resized.shape[0], canvas.shape[0])
|
|
canvas[offset_y:end_y, offset_x:end_x] = resized[:end_y - offset_y, :end_x - offset_x]
|
|
|
|
if rng.random() < 0.45:
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (rng.choice((1, 2, 3)), rng.choice((1, 2, 3))))
|
|
operation = cv2.MORPH_DILATE if rng.random() < 0.5 else cv2.MORPH_ERODE
|
|
canvas = cv2.morphologyEx(canvas, operation, kernel)
|
|
if rng.random() < 0.55:
|
|
canvas = cv2.GaussianBlur(canvas, (3, 3), rng.uniform(0.1, 1.0))
|
|
if rng.random() < 0.35:
|
|
noise = np.random.normal(0.0, rng.uniform(2.0, 9.0), canvas.shape).astype(np.float32)
|
|
canvas = np.clip(canvas.astype(np.float32) + noise, 0, 255).astype(np.uint8)
|
|
|
|
return cv2.cvtColor(canvas, cv2.COLOR_GRAY2BGR)
|
|
|
|
|
|
def save_mask(base_dir: Path, split: str, speed_value: int, image_bgr: np.ndarray, stem: str):
|
|
output_dir = ensure_dir(base_dir / split / str(speed_value))
|
|
cv2.imwrite(str(output_dir / f"{stem}.png"), image_bgr)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate a value-ROI classifier dataset from synthetic U.S. speed-limit signs.")
|
|
parser.add_argument("--workspace", default=str(DEFAULT_WORKSPACE), help="Training workspace root.")
|
|
parser.add_argument("--train-per-class", type=int, default=1800, help="Synthetic training samples per value.")
|
|
parser.add_argument("--val-per-class", type=int, default=260, help="Synthetic validation samples per value.")
|
|
parser.add_argument("--real-augmentations", type=int, default=28, help="Augmented mask samples to create per known real crop.")
|
|
parser.add_argument("--seed", type=int, default=20260330, help="Random seed.")
|
|
args = parser.parse_args()
|
|
|
|
workspace = resolve_workspace(args.workspace)
|
|
classifier_dir = workspace / "classifier"
|
|
if classifier_dir.exists():
|
|
shutil.rmtree(classifier_dir)
|
|
ensure_dir(classifier_dir / "train")
|
|
ensure_dir(classifier_dir / "val")
|
|
|
|
rng = random.Random(args.seed)
|
|
speed_values = tuple(DEFAULT_SPEED_VALUES)
|
|
|
|
for split, per_class in (("train", max(args.train_per_class, 0)), ("val", max(args.val_per_class, 0))):
|
|
for speed_value in speed_values:
|
|
for index in range(per_class):
|
|
school_zone = speed_value in (15, 20, 25) and rng.random() < 0.45
|
|
sign_rgba = render_regulatory_sign(speed_value, school_zone=school_zone, seed=rng.randint(0, 1_000_000))
|
|
sign_rgba = augment_sign(sign_rgba, rng)
|
|
sign_rgba = perspective_jitter(sign_rgba, rng)
|
|
sign_bgr = cv2.cvtColor(sign_rgba[:, :, :3], cv2.COLOR_RGB2BGR)
|
|
mask = extract_value_mask(sign_bgr)
|
|
if mask is None:
|
|
continue
|
|
output = augment_mask(mask, rng)
|
|
save_mask(classifier_dir, split, speed_value, output, f"{split}_{speed_value}_{index:05d}")
|
|
|
|
repo_root = Path(__file__).resolve().parents[2]
|
|
imported_real = 0
|
|
for relative_path, speed_value in KNOWN_REAL_CROPS:
|
|
crop_path = repo_root / relative_path
|
|
if not crop_path.is_file():
|
|
continue
|
|
crop_bgr = cv2.imread(str(crop_path))
|
|
if crop_bgr is None:
|
|
continue
|
|
mask = extract_value_mask(crop_bgr)
|
|
if mask is None:
|
|
continue
|
|
for augmentation_index in range(max(args.real_augmentations, 1)):
|
|
split = "val" if augmentation_index % 5 == 0 else "train"
|
|
output = augment_mask(mask, rng)
|
|
save_mask(classifier_dir, split, speed_value, output, f"real_{speed_value}_{imported_real:03d}_{augmentation_index:03d}")
|
|
imported_real += 1
|
|
|
|
print(f"Generated ROI classifier dataset in {classifier_dir}")
|
|
print(f"Imported real crops: {imported_real}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|