diff --git a/analyze_people_clothing.py b/analyze_people_clothing.py new file mode 100644 index 00000000..8ed831b2 --- /dev/null +++ b/analyze_people_clothing.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 +""" +Analyze an image to detect people and estimate their clothing colors. + +- Detect people using OpenCV's built-in HOG person detector +- For each detected person, sample a torso region to estimate clothing color +- If clothing appears grayscale (low saturation), label as 'light' or 'dark' +- Otherwise, map the dominant hue to a coarse color name (e.g., red, green, blue) +- Output a simple ASCII table with one row per person + +Usage: + python analyze_people_clothing.py --image path/to/image.jpg + +Optional flags: + --save-annotated path/to/output.jpg Save image with bounding boxes and labels + --min-conf 0.0..1.0 Minimum detection confidence to keep (default 0.0) + +Dependencies: + - opencv-python + - numpy +""" +from __future__ import annotations + +import argparse +from dataclasses import dataclass +from typing import List, Tuple + +import cv2 +import numpy as np + + +@dataclass +class Detection: + x: int + y: int + w: int + h: int + confidence: float + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Detect people and estimate clothing colors from an image.", + ) + parser.add_argument( + "--image", required=True, type=str, help="Path to the input image file", + ) + parser.add_argument( + "--save-annotated", type=str, default=None, + help="Optional path to save an annotated image with boxes and labels", + ) + parser.add_argument( + "--min-conf", type=float, default=0.0, + help="Minimum detection confidence (0.0-1.0) for HOG detections to keep", + ) + return parser.parse_args() + + +def load_image(image_path: str) -> np.ndarray: + image = cv2.imread(image_path) + if image is None: + raise FileNotFoundError(f"Could not read image at '{image_path}'") + return image + + +def initialize_hog_detector() -> cv2.HOGDescriptor: + hog = cv2.HOGDescriptor() + hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) + return hog + + +def detect_people(image_bgr: np.ndarray, min_confidence: float) -> List[Detection]: + hog = initialize_hog_detector() + + # HOG works best on reasonably sized images; limit max dimension to ~1200 px for speed + height, width = image_bgr.shape[:2] + scale = 1.0 + max_dim = max(height, width) + resized_image = image_bgr + if max_dim > 1200: + scale = 1200.0 / max_dim + resized_image = cv2.resize(image_bgr, (int(width * scale), int(height * scale))) + + # Detect people + rects, weights = hog.detectMultiScale( + resized_image, + winStride=(8, 8), + padding=(8, 8), + scale=1.05, + ) + + # Map detections back to original scale + detections: List[Detection] = [] + for (x, y, w, h), conf in zip(rects, weights): + if scale != 1.0: + x = int(x / scale) + y = int(y / scale) + w = int(w / scale) + h = int(h / scale) + # OpenCV returns weights as distances; normalize to 0..1 heuristically + try: + confidence = float(conf) + except Exception: + confidence = 0.0 + detections.append(Detection(x=x, y=y, w=w, h=h, confidence=confidence)) + + # Filter by confidence threshold if provided + if min_confidence > 0.0 and len(detections) > 0: + # Convert HOG weights to a relative 0..1 score via min-max scaling + raw = np.array([d.confidence for d in detections], dtype=np.float32) + min_val, max_val = float(np.min(raw)), float(np.max(raw)) + denom = (max_val - min_val) if (max_val - min_val) > 1e-6 else 1.0 + norm = (raw - min_val) / denom + for i, d in enumerate(detections): + detections[i].confidence = float(norm[i]) + detections = [d for d in detections if d.confidence >= min_confidence] + + # Apply non-maximum suppression to reduce overlapping boxes + detections = non_max_suppression(detections, overlap_threshold=0.65) + return detections + + +def non_max_suppression(detections: List[Detection], overlap_threshold: float = 0.65) -> List[Detection]: + if len(detections) == 0: + return [] + + boxes = np.array([[d.x, d.y, d.x + d.w, d.y + d.h] for d in detections], dtype=np.float32) + scores = np.array([d.confidence for d in detections], dtype=np.float32) + + pick: List[int] = [] + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + idxs = np.argsort(scores) # low to high; pop from end for high to low + + while len(idxs) > 0: + last = idxs[-1] + pick.append(int(last)) + + suppress = [len(idxs) - 1] + for pos in range(0, len(idxs) - 1): + i = last + j = idxs[pos] + + xx1 = max(x1[i], x1[j]) + yy1 = max(y1[i], y1[j]) + xx2 = min(x2[i], x2[j]) + yy2 = min(y2[i], y2[j]) + + w = max(0.0, xx2 - xx1 + 1) + h = max(0.0, yy2 - yy1 + 1) + overlap = (w * h) / areas[j] + + if overlap > overlap_threshold: + suppress.append(pos) + + idxs = np.delete(idxs, suppress) + + return [detections[i] for i in pick] + + +def crop_clothing_region(image_bgr: np.ndarray, det: Detection) -> np.ndarray: + height, width = image_bgr.shape[:2] + + x0 = max(0, det.x) + y0 = max(0, det.y) + x1 = min(width, det.x + det.w) + y1 = min(height, det.y + det.h) + + w = max(0, x1 - x0) + h = max(0, y1 - y0) + if w == 0 or h == 0: + return image_bgr[0:0, 0:0] + + # Heuristic torso region: middle 50% vertically, centered horizontally with margin + torso_top = y0 + int(0.35 * h) + torso_bottom = y0 + int(0.80 * h) + torso_left = x0 + int(0.10 * w) + torso_right = x0 + int(0.90 * w) + + torso_top = max(0, min(height, torso_top)) + torso_bottom = max(0, min(height, torso_bottom)) + torso_left = max(0, min(width, torso_left)) + torso_right = max(0, min(width, torso_right)) + + if torso_bottom <= torso_top or torso_right <= torso_left: + return image_bgr[0:0, 0:0] + + roi = image_bgr[torso_top:torso_bottom, torso_left:torso_right] + return roi + + +def estimate_clothing_color(roi_bgr: np.ndarray) -> str: + if roi_bgr.size == 0: + return "unknown" + + # Reduce noise and sampling load + roi_small = roi_bgr + h_roi, w_roi = roi_bgr.shape[:2] + if max(h_roi, w_roi) > 200: + scale = 200.0 / max(h_roi, w_roi) + roi_small = cv2.resize(roi_bgr, (int(w_roi * scale), int(h_roi * scale))) + + roi_small = cv2.GaussianBlur(roi_small, (5, 5), 0) + + hsv = cv2.cvtColor(roi_small, cv2.COLOR_BGR2HSV) + h = hsv[:, :, 0].astype(np.float32) # 0..179 + s = hsv[:, :, 1].astype(np.float32) # 0..255 + v = hsv[:, :, 2].astype(np.float32) # 0..255 + + s_median = float(np.median(s)) / 255.0 + v_median = float(np.median(v)) / 255.0 + + # If low saturation overall, likely black/white/gray clothing + if s_median < 0.22: + return "light" if v_median >= 0.6 else "dark" + + # Focus on colorful pixels to estimate dominant hue + colorful_mask = s > 50 # filter out dull/gray pixels + colorful_hues = h[colorful_mask] + colorful_vs = v[colorful_mask] + + if colorful_hues.size == 0: + # Fallback to light/dark if saturation is not strong enough overall + return "light" if v_median >= 0.6 else "dark" + + # Build hue histogram and find peak + hist_bins = 180 + hist, _ = np.histogram(colorful_hues, bins=hist_bins, range=(0, 180)) + dominant_h = int(np.argmax(hist)) # 0..179 + + # Heuristic for brown vs orange: darker values with orange hue -> brown + is_brown = (10 <= dominant_h <= 25) and (float(np.median(colorful_vs)) < 150) + + if is_brown: + return "brown" + + return hue_to_color_name(dominant_h) + + +def hue_to_color_name(hue: int) -> str: + # Map OpenCV hue (0-179) to coarse color names + if hue <= 10 or hue >= 170: + return "red" + if 11 <= hue <= 25: + return "orange" + if 26 <= hue <= 35: + return "yellow" + if 36 <= hue <= 85: + return "green" + if 86 <= hue <= 95: + return "cyan" + if 96 <= hue <= 130: + return "blue" + if 131 <= hue <= 150: + return "purple" + if 151 <= hue <= 169: + return "pink" + return "unknown" + + +def format_table(rows: List[Tuple[int, str]]) -> str: + headers = ["Person", "Clothing Color"] + person_col_width = max(len(headers[0]), max((len(str(idx)) for idx, _ in rows), default=0)) + color_col_width = max(len(headers[1]), max((len(color) for _, color in rows), default=0)) + + def sep(char: str = "-") -> str: + return f"+{char * (person_col_width + 2)}+{char * (color_col_width + 2)}+" + + lines = [ + sep("-"), + f"| {headers[0].ljust(person_col_width)} | {headers[1].ljust(color_col_width)} |", + sep("="), + ] + for idx, color in rows: + lines.append(f"| {str(idx).ljust(person_col_width)} | {color.ljust(color_col_width)} |") + lines.append(sep("-")) + return "\n".join(lines) + + +def annotate_image(image_bgr: np.ndarray, detections: List[Detection], labels: List[str]) -> np.ndarray: + annotated = image_bgr.copy() + for det, label in zip(detections, labels): + pt1 = (det.x, det.y) + pt2 = (det.x + det.w, det.y + det.h) + cv2.rectangle(annotated, pt1, pt2, (0, 255, 0), 2) + text = label + (text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) + x_text = det.x + y_text = max(0, det.y - 10) + cv2.rectangle( + annotated, + (x_text, y_text - text_h - baseline), + (x_text + text_w + 6, y_text + baseline), + (0, 0, 0), + thickness=-1, + ) + cv2.putText(annotated, text, (x_text + 3, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + return annotated + + +def main() -> None: + args = parse_args() + image = load_image(args.image) + + detections = detect_people(image, min_confidence=args.min_conf) + + if len(detections) == 0: + print("No people detected.") + return + + clothing_colors: List[str] = [] + for det in detections: + roi = crop_clothing_region(image, det) + color_name = estimate_clothing_color(roi) + clothing_colors.append(color_name) + + rows = [(i + 1, clothing_colors[i]) for i in range(len(clothing_colors))] + + print(f"People detected: {len(detections)}") + print(format_table(rows)) + + if args.save_annotated: + labels = [f"{i+1}: {c}" for i, c in enumerate(clothing_colors)] + annotated = annotate_image(image, detections, labels) + ok = cv2.imwrite(args.save_annotated, annotated) + if ok: + print(f"Annotated image saved to: {args.save_annotated}") + else: + print(f"Failed to save annotated image to: {args.save_annotated}") + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..62891556 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +opencv-python>=4.8.0 +numpy>=1.23.0