-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpreprocess.py
More file actions
147 lines (125 loc) · 5.04 KB
/
preprocess.py
File metadata and controls
147 lines (125 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import cv2
from pathlib import Path
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import albumentations as A
from albumentations.pytorch import ToTensorV2
# RANDOM_SEED = 193
# # initialising seed for reproducibility
# torch.manual_seed(RANDOM_SEED)
# torch.cuda.manual_seed(RANDOM_SEED)
# seeded_generator = torch.Generator().manual_seed(RANDOM_SEED)
# np.random.seed(RANDOM_SEED)
# random.seed(RANDOM_SEED)
# torch.backends.cudnn.deterministic = True
# Check if GPU is available
if torch.cuda.is_available():
device = torch.device("cuda") # Use GPU
else:
device = torch.device("cpu") # Use CPU
print("device: ", device)
input_size = 224
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
preprocess_training_image = A.Compose(
[
A.SmallestMaxSize(max_size=input_size + 48),
A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
A.RandomCrop(height=input_size, width=input_size),
A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
A.RandomBrightnessContrast(p=0.5),
A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
ToTensorV2(),
]
)
folders = ["train", "validation", "test"]
for folder in folders:
# Define the directory where your images are located
data_dir = f"data/{folder}"
print("data_dir: ", data_dir)
# Load a pretrained ResNet-50 model
pretrained_model = models.resnet50(
weights="ResNet50_Weights.DEFAULT"
) # Use updated weights argument
pretrained_model = nn.Sequential(*list(pretrained_model.children())[:-1])
pretrained_model = pretrained_model.to(device)
pretrained_model.eval()
# Preprocess the images and extract image vectors
def preprocess_and_extract_vectors(image_path, preprocess=None):
# Preprocessing transforms
if preprocess is None:
image = Image.open(image_path).convert("RGB")
preprocess = transforms.Compose(
[
transforms.Resize(
(224, 224)
), # Resize to match the pretrained model's input size
transforms.ToTensor(), # Convert image to tensor
transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), # Normalize
]
)
image = preprocess(image)
else:
# use cv2 for albumentation
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = preprocess(image=image)["image"]
# Extract image vector using the pretrained model
image_vector = extract_image_vector(image)
return image_vector
def extract_image_vector(image):
image = image.unsqueeze(0)
image = image.to(device)
with torch.no_grad():
image_vector = pretrained_model(image)
image_vector = image_vector.view(-1)
return image_vector.cpu().numpy()
# Define the dimensions of your images
image_width, image_height = 56, 56
# Define the total number of data points and images per data point
images_per_data_point = 36
total_data_points = int(len(os.listdir(data_dir)) / images_per_data_point)
print("total_data_points: ", total_data_points)
if folder == "train":
fold = 10
preporcess = preprocess_training_image
else:
fold = 1
preporcess = None
for i in range(fold):
filename = (
f"data/preprocessed_{folder}_{i}.npy"
if fold > 1
else f"data/preprocessed_{folder}.npy"
)
path = Path(filename)
if path.is_file():
print(f"File {filename} exists - skipping ...", flush=True)
continue
print(f"File {filename} does not exist - creating a new one ...", flush=True)
# # Initialize an empty NumPy array to store the data
data = np.empty(
(total_data_points, images_per_data_point, 2048), dtype=np.float32
) # Assuming ResNet-50 outputs 2048-dimensional vectors
# Loop through each data point
for data_point_index in range(total_data_points):
# Loop through each image within a data point
for image_index in range(images_per_data_point):
# Construct the image file path
image_filename = f"{data_point_index}_{image_index}.jpg"
image_path = os.path.join(data_dir, image_filename)
# Check if the image file exists
if os.path.exists(image_path):
# Preprocess the image and extract image vector
image_vector = preprocess_and_extract_vectors(
image_path, preporcess
)
# Store the image vector in the data array
data[data_point_index, image_index] = image_vector
np.save(filename, data)
print(f"saved file: {filename}", flush=True)