forked from k-l-a/DatasetSearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSearch.py
More file actions
66 lines (56 loc) · 1.53 KB
/
Search.py
File metadata and controls
66 lines (56 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import faiss
from MetaFeatures import Output
import os
path = ""
defaultPath = "./ConvertedDatasets/"
fileList = []
searchTerm = ""
useDefault = False
d = 50
k = 5
def readData():
folderpath = input("Path to dataset base folder :")
global path
if useDefault:
path = defaultPath
else:
path = folderpath
def getMetafeatures():
featureList = []
global d
for filename in os.listdir(path):
print(filename)
metaFeatures = normalizeDimension(Output.get_metafeatures(path + filename))
featureList.append(metaFeatures)
fileList.append(filename)
return np.array(featureList).astype('float32')
def normalizeDimension(vector):
if len(vector) < d:
for i in range(d - len(vector)):
vector.append(0) # Pad the features to reach desired dimension
else:
vector = vector[:d]
return vector
def readSearchTerm():
term = input("Dataset to use as search term :")
global searchTerm
searchTerm = term
def search(termPath, metaFeatureList):
global d, k
index = faiss.IndexFlatL2(d)
print(index.is_trained)
index.add(metaFeatureList)
termFeatures = normalizeDimension(Output.get_metafeatures(termPath))
termFeatures = np.array([termFeatures]).astype('float32')
D, I = index.search(termFeatures, k)
print(I[:5])
for i in I[:5]:
for j in i:
print(fileList[j])
def main():
readData()
meta = getMetafeatures()
readSearchTerm()
search(searchTerm, meta)
main()