Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
338 changes: 338 additions & 0 deletions analyze_people_clothing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
#!/usr/bin/env python3
"""
Analyze an image to detect people and estimate their clothing colors.

- Detect people using OpenCV's built-in HOG person detector
- For each detected person, sample a torso region to estimate clothing color
- If clothing appears grayscale (low saturation), label as 'light' or 'dark'
- Otherwise, map the dominant hue to a coarse color name (e.g., red, green, blue)
- Output a simple ASCII table with one row per person

Usage:
python analyze_people_clothing.py --image path/to/image.jpg

Optional flags:
--save-annotated path/to/output.jpg Save image with bounding boxes and labels
--min-conf 0.0..1.0 Minimum detection confidence to keep (default 0.0)

Dependencies:
- opencv-python
- numpy
"""
from __future__ import annotations

import argparse
from dataclasses import dataclass
from typing import List, Tuple

import cv2
import numpy as np


@dataclass
class Detection:
x: int
y: int
w: int
h: int
confidence: float


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Detect people and estimate clothing colors from an image.",
)
parser.add_argument(
"--image", required=True, type=str, help="Path to the input image file",
)
parser.add_argument(
"--save-annotated", type=str, default=None,
help="Optional path to save an annotated image with boxes and labels",
)
parser.add_argument(
"--min-conf", type=float, default=0.0,
help="Minimum detection confidence (0.0-1.0) for HOG detections to keep",
)
return parser.parse_args()


def load_image(image_path: str) -> np.ndarray:
image = cv2.imread(image_path)
if image is None:
raise FileNotFoundError(f"Could not read image at '{image_path}'")
return image


def initialize_hog_detector() -> cv2.HOGDescriptor:
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
return hog


def detect_people(image_bgr: np.ndarray, min_confidence: float) -> List[Detection]:
hog = initialize_hog_detector()

# HOG works best on reasonably sized images; limit max dimension to ~1200 px for speed
height, width = image_bgr.shape[:2]
scale = 1.0
max_dim = max(height, width)
resized_image = image_bgr
if max_dim > 1200:
scale = 1200.0 / max_dim
resized_image = cv2.resize(image_bgr, (int(width * scale), int(height * scale)))

# Detect people
rects, weights = hog.detectMultiScale(
resized_image,
winStride=(8, 8),
padding=(8, 8),
scale=1.05,
)

# Map detections back to original scale
detections: List[Detection] = []
for (x, y, w, h), conf in zip(rects, weights):
if scale != 1.0:
x = int(x / scale)
y = int(y / scale)
w = int(w / scale)
h = int(h / scale)
# OpenCV returns weights as distances; normalize to 0..1 heuristically
try:
confidence = float(conf)
except Exception:
confidence = 0.0
detections.append(Detection(x=x, y=y, w=w, h=h, confidence=confidence))

# Filter by confidence threshold if provided
if min_confidence > 0.0 and len(detections) > 0:
# Convert HOG weights to a relative 0..1 score via min-max scaling
raw = np.array([d.confidence for d in detections], dtype=np.float32)
min_val, max_val = float(np.min(raw)), float(np.max(raw))
denom = (max_val - min_val) if (max_val - min_val) > 1e-6 else 1.0
norm = (raw - min_val) / denom
for i, d in enumerate(detections):
detections[i].confidence = float(norm[i])
detections = [d for d in detections if d.confidence >= min_confidence]

# Apply non-maximum suppression to reduce overlapping boxes
detections = non_max_suppression(detections, overlap_threshold=0.65)
return detections


def non_max_suppression(detections: List[Detection], overlap_threshold: float = 0.65) -> List[Detection]:
if len(detections) == 0:
return []

boxes = np.array([[d.x, d.y, d.x + d.w, d.y + d.h] for d in detections], dtype=np.float32)
scores = np.array([d.confidence for d in detections], dtype=np.float32)

pick: List[int] = []

x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]

areas = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(scores) # low to high; pop from end for high to low

while len(idxs) > 0:
last = idxs[-1]
pick.append(int(last))

suppress = [len(idxs) - 1]
for pos in range(0, len(idxs) - 1):
i = last
j = idxs[pos]

xx1 = max(x1[i], x1[j])
yy1 = max(y1[i], y1[j])
xx2 = min(x2[i], x2[j])
yy2 = min(y2[i], y2[j])

w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
overlap = (w * h) / areas[j]

if overlap > overlap_threshold:
suppress.append(pos)

idxs = np.delete(idxs, suppress)

return [detections[i] for i in pick]


def crop_clothing_region(image_bgr: np.ndarray, det: Detection) -> np.ndarray:
height, width = image_bgr.shape[:2]

x0 = max(0, det.x)
y0 = max(0, det.y)
x1 = min(width, det.x + det.w)
y1 = min(height, det.y + det.h)

w = max(0, x1 - x0)
h = max(0, y1 - y0)
if w == 0 or h == 0:
return image_bgr[0:0, 0:0]

# Heuristic torso region: middle 50% vertically, centered horizontally with margin
torso_top = y0 + int(0.35 * h)
torso_bottom = y0 + int(0.80 * h)
torso_left = x0 + int(0.10 * w)
torso_right = x0 + int(0.90 * w)

torso_top = max(0, min(height, torso_top))
torso_bottom = max(0, min(height, torso_bottom))
torso_left = max(0, min(width, torso_left))
torso_right = max(0, min(width, torso_right))

if torso_bottom <= torso_top or torso_right <= torso_left:
return image_bgr[0:0, 0:0]

roi = image_bgr[torso_top:torso_bottom, torso_left:torso_right]
return roi


def estimate_clothing_color(roi_bgr: np.ndarray) -> str:
if roi_bgr.size == 0:
return "unknown"

# Reduce noise and sampling load
roi_small = roi_bgr
h_roi, w_roi = roi_bgr.shape[:2]
if max(h_roi, w_roi) > 200:
scale = 200.0 / max(h_roi, w_roi)
roi_small = cv2.resize(roi_bgr, (int(w_roi * scale), int(h_roi * scale)))

roi_small = cv2.GaussianBlur(roi_small, (5, 5), 0)

hsv = cv2.cvtColor(roi_small, cv2.COLOR_BGR2HSV)
h = hsv[:, :, 0].astype(np.float32) # 0..179
s = hsv[:, :, 1].astype(np.float32) # 0..255
v = hsv[:, :, 2].astype(np.float32) # 0..255

s_median = float(np.median(s)) / 255.0
v_median = float(np.median(v)) / 255.0

# If low saturation overall, likely black/white/gray clothing
if s_median < 0.22:
return "light" if v_median >= 0.6 else "dark"

# Focus on colorful pixels to estimate dominant hue
colorful_mask = s > 50 # filter out dull/gray pixels
colorful_hues = h[colorful_mask]
colorful_vs = v[colorful_mask]

if colorful_hues.size == 0:
# Fallback to light/dark if saturation is not strong enough overall
return "light" if v_median >= 0.6 else "dark"

# Build hue histogram and find peak
hist_bins = 180
hist, _ = np.histogram(colorful_hues, bins=hist_bins, range=(0, 180))
dominant_h = int(np.argmax(hist)) # 0..179

# Heuristic for brown vs orange: darker values with orange hue -> brown
is_brown = (10 <= dominant_h <= 25) and (float(np.median(colorful_vs)) < 150)

if is_brown:
return "brown"

return hue_to_color_name(dominant_h)


def hue_to_color_name(hue: int) -> str:
# Map OpenCV hue (0-179) to coarse color names
if hue <= 10 or hue >= 170:
return "red"
if 11 <= hue <= 25:
return "orange"
if 26 <= hue <= 35:
return "yellow"
if 36 <= hue <= 85:
return "green"
if 86 <= hue <= 95:
return "cyan"
if 96 <= hue <= 130:
return "blue"
if 131 <= hue <= 150:
return "purple"
if 151 <= hue <= 169:
return "pink"
return "unknown"


def format_table(rows: List[Tuple[int, str]]) -> str:
headers = ["Person", "Clothing Color"]
person_col_width = max(len(headers[0]), max((len(str(idx)) for idx, _ in rows), default=0))
color_col_width = max(len(headers[1]), max((len(color) for _, color in rows), default=0))

def sep(char: str = "-") -> str:
return f"+{char * (person_col_width + 2)}+{char * (color_col_width + 2)}+"

lines = [
sep("-"),
f"| {headers[0].ljust(person_col_width)} | {headers[1].ljust(color_col_width)} |",
sep("="),
]
for idx, color in rows:
lines.append(f"| {str(idx).ljust(person_col_width)} | {color.ljust(color_col_width)} |")
lines.append(sep("-"))
return "\n".join(lines)


def annotate_image(image_bgr: np.ndarray, detections: List[Detection], labels: List[str]) -> np.ndarray:
annotated = image_bgr.copy()
for det, label in zip(detections, labels):
pt1 = (det.x, det.y)
pt2 = (det.x + det.w, det.y + det.h)
cv2.rectangle(annotated, pt1, pt2, (0, 255, 0), 2)
text = label
(text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
x_text = det.x
y_text = max(0, det.y - 10)
cv2.rectangle(
annotated,
(x_text, y_text - text_h - baseline),
(x_text + text_w + 6, y_text + baseline),
(0, 0, 0),
thickness=-1,
)
cv2.putText(annotated, text, (x_text + 3, y_text), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
return annotated


def main() -> None:
args = parse_args()
image = load_image(args.image)

detections = detect_people(image, min_confidence=args.min_conf)

if len(detections) == 0:
print("No people detected.")
return

clothing_colors: List[str] = []
for det in detections:
roi = crop_clothing_region(image, det)
color_name = estimate_clothing_color(roi)
clothing_colors.append(color_name)

rows = [(i + 1, clothing_colors[i]) for i in range(len(clothing_colors))]

print(f"People detected: {len(detections)}")
print(format_table(rows))

if args.save_annotated:
labels = [f"{i+1}: {c}" for i, c in enumerate(clothing_colors)]
annotated = annotate_image(image, detections, labels)
ok = cv2.imwrite(args.save_annotated, annotated)
if ok:
print(f"Annotated image saved to: {args.save_annotated}")
else:
print(f"Failed to save annotated image to: {args.save_annotated}")


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
opencv-python>=4.8.0
numpy>=1.23.0