-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsift_preprossing.py
More file actions
109 lines (92 loc) · 2.8 KB
/
sift_preprossing.py
File metadata and controls
109 lines (92 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# import the necessary packages
from matplotlib import pyplot as plt
from rootsift import RootSIFT
import numpy as np
import os
import cv2
import json
def load(filename,name,numpy=False):
"""Carrega os dados de alguma rede anteriormente treinada."""
f = open(filename, "r")
data = json.load(f)
f.close()
if numpy:
data_array = [np.array(w) for w in data[name]]
data_array = np.asarray(data_array)
else:
data_array = [w for w in data[name]]
return data_array
def save(filename,name,archive):
"""Salva os valores do vies e dos pesos da rede num arquivo."""
filename = filename
data = {name: [v.tolist() for v in archive]}
f = open(filename, "w")
json.dump(data, f)
f.close()
# 1) Load cifar_10 database
test_folder = "./img/cifar-10/test"
class_names = os.listdir(test_folder) # there are a folde for each class
# processing train folder
print "PROCESSING TEST FOLDER: "
X = []
cluster_data = []
y = []
count = 0
# extract RootSIFT descriptors
rs = RootSIFT()
for name in class_names:
files = os.listdir(test_folder+"/"+name)
# transform each file into a feature vector using rootsift
for file_name in files:
image = cv2.imread(test_folder+"/"+name+"/"+file_name)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
detector = cv2.FeatureDetector_create("SIFT")
kps = detector.detect(gray)
# extract normal SIFT descriptors
extractor = cv2.DescriptorExtractor_create("SIFT")
(kps, descs) = extractor.compute(gray, kps)
(kps, descs) = rs.compute(gray, kps)
if descs is None:
continue
vec = descs
X.append([v for v in vec])
cluster_data.extend([v for v in vec])
y_vec = [0] * len(class_names) # <<<<<<<<<<<<<< HOT ENCODING REPRESENTATION <<<<<
y_vec[class_names.index(name)] = 1
y.append(y_vec)
count += 1
if count % 1000 == 0:
print count, " images processed"
print "Lost images: ", 10000-count
# after generate rootsift vector, we will compute the descriptor
# of an image by assigning each SIFT of the image to one of the
# K clusters. In this way you obtain a histogram of length K.
from sklearn.cluster import KMeans
train = True
if train:
n_clusters = 500
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(cluster_data)
centers = kmeans.cluster_centers_
X_pred = kmeans.labels_
else:
centers = load("kmeans_centers.json","input",True)
X_pred = load("kmeans_labels.json","input",True)
n_clusters = centers.shape[0]
c = 0
newX = []
for x in X:
aux = np.zeros((n_clusters))
for desc in x:
aux[X_pred[c]] += 1
c += 1
# mod = np.linalg.norm(aux)
# aux = aux/aux.sum()
newX.append(aux)
X = np.asarray(newX)
y = np.asarray(y)
print X.shape
print X[:10]
save("rootsift_input2.json","input",X)
save("rootsift_output2.json","output",y)
save("kmeans_centers2.json","centers",centers)
save("kmeans_labels2.json","labels",X_pred)