Skip to content

Commit a8e09de

Browse files
Pull NESE data from AllTheThings instead of locally stored files (#445)
1 parent dd08dce commit a8e09de

2 files changed

Lines changed: 30 additions & 52 deletions

File tree

coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ def handle(self, *args, **options):
4848
result_json = attconn.pull_quota_data()
4949
result_json = attconn.format_query_results(result_json)
5050
resp_json_by_lab = {entry['lab']:[] for entry in result_json}
51-
[resp_json_by_lab[e['lab']].append(e) for e in result_json]
51+
for entry in result_json:
52+
resp_json_by_lab[entry['lab']].append(entry)
5253
result_file = 'local_data/att_quota_data.json'
5354
save_json(result_file, resp_json_by_lab)
5455

@@ -87,7 +88,7 @@ def handle(self, *args, **options):
8788
i for i in allocation_usages if i['vol_name'] == lab_server
8889
and lab_path in i['path'] and i['group_name'] == lab_name
8990
]
90-
if not lab_usage_entries:
91+
if entry['server'] != 'nesetape' and not lab_usage_entries:
9192
logger.info("No starfish usage data found for %s %s %s", lab_name, lab_server, lab_path)
9293
continue
9394

coldfront/plugins/fasrc/utils.py

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import json
22
import logging
33

4-
import pandas as pd
54
import requests
65

76
from coldfront.core.utils.common import import_from_settings
@@ -41,8 +40,7 @@ def produce_query_statement(self, vol_type, volumes=None):
4140
'usedgb': 'usedGB',
4241
'sizebytes': 'limitBytes',
4342
'usedbytes': 'usedBytes',
44-
'fs_path': 'Path',
45-
'server_replace': '/n/',
43+
'server_replace': "'/n/', ''",
4644
'path_def': "substring(e.Path, size('/n/') + size(split(e.Path, '/')[2]) + 1)",
4745
'unique':'datetime(e.DotsLFSUpdateDate) as begin_date'
4846
},
@@ -56,13 +54,12 @@ def produce_query_statement(self, vol_type, volumes=None):
5654
AND (e.Path =~ '.*labs.*')\
5755
AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate))\
5856
AND NOT (e.SizeGB = 0)",
59-
'fs_path':'Path',
6057
'r_updated': 'DotsUpdateDate',
6158
'storage_type': 'Isilon',
6259
'usedgb': 'UsedGB',
6360
'sizebytes': 'SizeBytes',
6461
'usedbytes': 'UsedBytes',
65-
'server_replace': '01.rc.fas.harvard.edu',
62+
'server_replace': "'01.rc.fas.harvard.edu', ''",
6663
'path_def': "replace(e.Path, '/ifs/', '')",
6764
'unique': 'datetime(e.DotsUpdateDate) as begin_date'
6865
},
@@ -74,15 +71,29 @@ def produce_query_statement(self, vol_type, volumes=None):
7471
'validation_query': 'NOT (e.SizeGB = 0)',
7572
'r_updated': 'DotsLVSUpdateDate',
7673
'storage_type': 'Volume',
77-
'fs_path': 'LogicalVolume',
7874
'path_def': "replace(e.LogicalVolume, '/dev/data/', '')",
7975
'usedgb': 'UsedGB',
8076
'sizebytes': 'SizeGB * 1000000000',
8177
'usedbytes': 'UsedGB * 1000000000',
82-
'server_replace': '.rc.fas.harvard.edu',
78+
'server_replace': "'.rc.fas.harvard.edu', ''",
8379
'unique': 'datetime(e.DotsLVSUpdateDate) as update_date,\
8480
datetime(e.DotsLVDisplayUpdateDate) as display_date'
8581
},
82+
'tapeallocation': {
83+
'volumes': 'NESE',
84+
'relation': 'Owns',
85+
'match': '(e:TapeAllocation)',
86+
'server': 'Provider',
87+
'validation_query': "e.Pool =~ '.*1'",
88+
'r_updated': 'DotsUpdateDate',
89+
'storage_type': 'Tape',
90+
'path_def': 'e.Pool',
91+
'usedgb': 'UsedGB',
92+
'sizebytes': 'SizeGB * 1000000000',
93+
'usedbytes': 'UsedGB * 1000000000',
94+
'server_replace': "'NESE', 'nesetape'",
95+
'unique': 'datetime(e.DotsUpdateDate) as begin_date',
96+
},
8697
}
8798
d = query_dict[vol_type]
8899

@@ -101,7 +112,7 @@ def produce_query_statement(self, vol_type, volumes=None):
101112
{d['path_def']} as fs_path,\
102113
'{d['storage_type']}' as storage_type,\
103114
datetime(r.{d['r_updated']}) as rel_updated,\
104-
replace(e.{d['server']}, '{d['server_replace']}', '') as server"
115+
replace(e.{d['server']}, {d['server_replace']}) as server"
105116
}
106117
self.queries['statements'].append(statement)
107118

@@ -119,48 +130,13 @@ def pull(self, standard):
119130
def get_standardizer(self, standard):
120131
if standard == 'ATTQuery':
121132
return self._standardize_attquery
122-
if standard == 'NESEfile':
123-
return self._standardize_nesefile
124133
raise ValueError(standard)
125134

126135
def _standardize_attquery(self):
127136
attconn = AllTheThingsConn(volumes=self.volumes)
128137
resp_json = attconn.pull_quota_data()
129138
return attconn.format_query_results(resp_json)
130139

131-
def _standardize_nesefile(self):
132-
datafile = 'nese_data/pools'
133-
header_file = 'nese_data/pools.header'
134-
with open('nese_data/local_groupkey') as groupkey_file:
135-
translator = dict((
136-
kv.split('=') for kv in (l.strip('\n') for l in groupkey_file)
137-
))
138-
headers_df = pd.read_csv(header_file, header=0, sep='\s+')
139-
headers = headers_df.columns.values.tolist()
140-
data = pd.read_csv(datafile, names=headers, sep='\s+')
141-
data = data.loc[data['pool'].str.contains('1')]
142-
data['fs_path'] = data['pool']
143-
data['lab'] = data['pool'].str.replace('1', '').str.replace('hugl', '').str.replace('hus3', '')
144-
data['server'] = 'nesetape'
145-
data['storage_type'] = 'tape'
146-
data['byte_allocation'] = (data['mib_capacity'] + data['mib_capacity']*0.025) * 1048576
147-
data['byte_usage'] = data['mib_used'] * 1048576
148-
data['tb_allocation'] = round((
149-
(data['mib_capacity'] + data['mib_capacity']*0.025) / 953674.31640625
150-
), -1)
151-
data['tb_usage'] = data['mib_used'] / 953674.31640625
152-
data = data[[
153-
'lab', 'server', 'storage_type', 'byte_allocation',
154-
'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path',
155-
]]
156-
nesedict = data.to_dict(orient='records')
157-
for d in nesedict:
158-
if translator.get(d['lab']):
159-
d['lab'] = translator[d['lab']]
160-
else:
161-
d['lab'] = d['lab']+'_lab'
162-
return nesedict
163-
164140

165141
class AllTheThingsConn:
166142

@@ -228,6 +204,7 @@ def pull_quota_data(self):
228204
query.produce_query_statement('isilon', volumes=self.volumes)
229205
query.produce_query_statement('quota', volumes=self.volumes)
230206
query.produce_query_statement('volume', volumes=self.volumes)
207+
query.produce_query_statement('tapeallocation')
231208
resp_json = self.post_query(query.queries)
232209
logger.debug(resp_json)
233210
return resp_json
@@ -250,7 +227,7 @@ def pair_allocations_data(project, quota_dicts):
250227
allocs = project.allocation_set.filter(
251228
status__name__in=['Active','Pending Deactivation'],
252229
resources__resource_type__name='Storage'
253-
)
230+
).exclude(resources__name__icontains='vast')
254231
paired_allocs = {}
255232
# first, pair allocations with those that have same
256233
for allocation in allocs:
@@ -344,19 +321,19 @@ def match_entries_with_projects(result_json):
344321
logger.warning('missing projects: %s', missing_projs)
345322
# remove them from result_json
346323
missing_proj_titles = [list(p.values())[0] for p in missing_projs]
347-
[result_json.pop(t) for t in missing_proj_titles]
324+
for title in missing_proj_titles:
325+
result_json.pop(title)
348326
return result_json, proj_models
349327

350328

351329
def pull_push_quota_data(volumes=None):
352330
logger = logging.getLogger('coldfront.import_quotas')
353331
att_data = QuotaDataPuller(volumes=volumes).pull('ATTQuery')
354-
nese_data = QuotaDataPuller(volumes=volumes).pull('NESEfile')
355-
combined_data = att_data + nese_data
356-
resp_json_by_lab = {entry['lab']:[] for entry in combined_data}
357-
[resp_json_by_lab[e['lab']].append(e) for e in combined_data]
332+
resp_json_by_lab = {entry['lab']:[] for entry in att_data}
333+
for entry in att_data:
334+
resp_json_by_lab[entry['lab']].append(entry)
358335
logger.debug(resp_json_by_lab)
359-
result_file = 'local_data/att_nese_quota_data.json'
336+
result_file = 'local_data/att_quota_data.json'
360337
save_json(result_file, resp_json_by_lab)
361338
push_quota_data(result_file)
362339

0 commit comments

Comments
 (0)