cs712/preprocess.py at master · inflaton/cs712 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import cv2
from pathlib import Path
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import albumentations as A
from albumentations.pytorch import ToTensorV2

# RANDOM_SEED = 193

# # initialising seed for reproducibility
# torch.manual_seed(RANDOM_SEED)
# torch.cuda.manual_seed(RANDOM_SEED)
# seeded_generator = torch.Generator().manual_seed(RANDOM_SEED)
# np.random.seed(RANDOM_SEED)
# random.seed(RANDOM_SEED)
# torch.backends.cudnn.deterministic = True

# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")  # Use GPU
else:
    device = torch.device("cpu")  # Use CPU

print("device: ", device)

input_size = 224
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

preprocess_training_image = A.Compose(
    [
        A.SmallestMaxSize(max_size=input_size + 48),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RandomCrop(height=input_size, width=input_size),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
        ToTensorV2(),
    ]
)

folders = ["train", "validation", "test"]

for folder in folders:
    # Define the directory where your images are located
    data_dir = f"data/{folder}"
    print("data_dir: ", data_dir)

    # Load a pretrained ResNet-50 model
    pretrained_model = models.resnet50(
        weights="ResNet50_Weights.DEFAULT"
    )  # Use updated weights argument
    pretrained_model = nn.Sequential(*list(pretrained_model.children())[:-1])
    pretrained_model = pretrained_model.to(device)
    pretrained_model.eval()

    # Preprocess the images and extract image vectors
    def preprocess_and_extract_vectors(image_path, preprocess=None):
        # Preprocessing transforms
        if preprocess is None:
            image = Image.open(image_path).convert("RGB")
            preprocess = transforms.Compose(
                [
                    transforms.Resize(
                        (224, 224)
                    ),  # Resize to match the pretrained model's input size
                    transforms.ToTensor(),  # Convert image to tensor
                    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),  # Normalize
                ]
            )
            image = preprocess(image)
        else:
            # use cv2 for albumentation
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = preprocess(image=image)["image"]

        # Extract image vector using the pretrained model
        image_vector = extract_image_vector(image)
        return image_vector

    def extract_image_vector(image):
        image = image.unsqueeze(0)
        image = image.to(device)
        with torch.no_grad():
            image_vector = pretrained_model(image)
        image_vector = image_vector.view(-1)
        return image_vector.cpu().numpy()

    # Define the dimensions of your images
    image_width, image_height = 56, 56

    # Define the total number of data points and images per data point
    images_per_data_point = 36
    total_data_points = int(len(os.listdir(data_dir)) / images_per_data_point)
    print("total_data_points: ", total_data_points)

    if folder == "train":
        fold = 10
        preporcess = preprocess_training_image
    else:
        fold = 1
        preporcess = None

    for i in range(fold):
        filename = (
            f"data/preprocessed_{folder}_{i}.npy"
            if fold > 1
            else f"data/preprocessed_{folder}.npy"
        )

        path = Path(filename)
        if path.is_file():
            print(f"File {filename} exists - skipping ...", flush=True)
            continue

        print(f"File {filename} does not exist - creating a new one ...", flush=True)

        # # Initialize an empty NumPy array to store the data
        data = np.empty(
            (total_data_points, images_per_data_point, 2048), dtype=np.float32
        )  # Assuming ResNet-50 outputs 2048-dimensional vectors

        # Loop through each data point
        for data_point_index in range(total_data_points):
            # Loop through each image within a data point
            for image_index in range(images_per_data_point):
                # Construct the image file path
                image_filename = f"{data_point_index}_{image_index}.jpg"
                image_path = os.path.join(data_dir, image_filename)

                # Check if the image file exists
                if os.path.exists(image_path):
                    # Preprocess the image and extract image vector
                    image_vector = preprocess_and_extract_vectors(
                        image_path, preporcess
                    )
                    # Store the image vector in the data array
                    data[data_point_index, image_index] = image_vector

        np.save(filename, data)
        print(f"saved file: {filename}", flush=True)