Pull NESE data from AllTheThings instead of locally stored files (#445)

claire-peters · web-flow · commit a8e09de263a9 · 2026-02-25T12:17:00.000-08:00
diff --git a/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py b/coldfront/plugins/fasrc/management/commands/id_import_new_allocations.py
@@ -48,7 +48,8 @@ def handle(self, *args, **options):
         result_json = attconn.pull_quota_data()
         result_json = attconn.format_query_results(result_json)
         resp_json_by_lab = {entry['lab']:[] for entry in result_json}
-        [resp_json_by_lab[e['lab']].append(e) for e in result_json]
+        for entry in result_json:
+            resp_json_by_lab[entry['lab']].append(entry)
         result_file = 'local_data/att_quota_data.json'
         save_json(result_file, resp_json_by_lab)
 
@@ -87,7 +88,7 @@ def handle(self, *args, **options):
                     i for i in allocation_usages if i['vol_name'] == lab_server
                     and lab_path in i['path'] and i['group_name'] == lab_name
                 ]
-                if not lab_usage_entries:
+                if entry['server'] != 'nesetape' and not lab_usage_entries:
                     logger.info("No starfish usage data found for %s %s %s", lab_name, lab_server, lab_path)
                     continue
 
diff --git a/coldfront/plugins/fasrc/utils.py b/coldfront/plugins/fasrc/utils.py
@@ -1,7 +1,6 @@
 import json
 import logging
 
-import pandas as pd
 import requests
 
 from coldfront.core.utils.common import import_from_settings
@@ -41,8 +40,7 @@ def produce_query_statement(self, vol_type, volumes=None):
                 'usedgb': 'usedGB',
                 'sizebytes': 'limitBytes',
                 'usedbytes': 'usedBytes',
-                'fs_path': 'Path',
-                'server_replace': '/n/',
+                'server_replace': "'/n/', ''",
                 'path_def': "substring(e.Path, size('/n/') + size(split(e.Path, '/')[2]) + 1)",
                 'unique':'datetime(e.DotsLFSUpdateDate) as begin_date'
             },
@@ -56,13 +54,12 @@ def produce_query_statement(self, vol_type, volumes=None):
                     AND (e.Path =~ '.*labs.*')\
                     AND (datetime() - duration('P31D') <= datetime(r.DotsUpdateDate))\
                     AND NOT (e.SizeGB = 0)",
-                'fs_path':'Path',
                 'r_updated': 'DotsUpdateDate',
                 'storage_type': 'Isilon',
                 'usedgb': 'UsedGB',
                 'sizebytes': 'SizeBytes',
                 'usedbytes': 'UsedBytes',
-                'server_replace': '01.rc.fas.harvard.edu',
+                'server_replace': "'01.rc.fas.harvard.edu', ''",
                 'path_def': "replace(e.Path, '/ifs/', '')",
                 'unique': 'datetime(e.DotsUpdateDate) as begin_date'
             },
@@ -74,15 +71,29 @@ def produce_query_statement(self, vol_type, volumes=None):
                 'validation_query': 'NOT (e.SizeGB = 0)',
                 'r_updated': 'DotsLVSUpdateDate',
                 'storage_type': 'Volume',
-                'fs_path': 'LogicalVolume',
                 'path_def': "replace(e.LogicalVolume, '/dev/data/', '')",
                 'usedgb': 'UsedGB',
                 'sizebytes': 'SizeGB * 1000000000',
                 'usedbytes': 'UsedGB * 1000000000',
-                'server_replace': '.rc.fas.harvard.edu',
+                'server_replace': "'.rc.fas.harvard.edu', ''",
                 'unique': 'datetime(e.DotsLVSUpdateDate) as update_date,\
                         datetime(e.DotsLVDisplayUpdateDate) as display_date'
             },
+            'tapeallocation': {
+                'volumes': 'NESE',
+                'relation': 'Owns',
+                'match': '(e:TapeAllocation)',
+                'server': 'Provider',
+                'validation_query': "e.Pool =~ '.*1'",
+                'r_updated': 'DotsUpdateDate',
+                'storage_type': 'Tape',
+                'path_def': 'e.Pool',
+                'usedgb': 'UsedGB',
+                'sizebytes': 'SizeGB * 1000000000',
+                'usedbytes': 'UsedGB * 1000000000',
+                'server_replace': "'NESE', 'nesetape'",
+                'unique': 'datetime(e.DotsUpdateDate) as begin_date',
+            },
         }
         d = query_dict[vol_type]
 
@@ -101,7 +112,7 @@ def produce_query_statement(self, vol_type, volumes=None):
             {d['path_def']} as fs_path,\
             '{d['storage_type']}' as storage_type,\
             datetime(r.{d['r_updated']}) as rel_updated,\
-            replace(e.{d['server']}, '{d['server_replace']}', '') as server"
+            replace(e.{d['server']}, {d['server_replace']}) as server"
         }
         self.queries['statements'].append(statement)
 
@@ -119,48 +130,13 @@ def pull(self, standard):
     def get_standardizer(self, standard):
         if standard == 'ATTQuery':
             return self._standardize_attquery
-        if standard == 'NESEfile':
-            return self._standardize_nesefile
         raise ValueError(standard)
 
     def _standardize_attquery(self):
         attconn = AllTheThingsConn(volumes=self.volumes)
         resp_json = attconn.pull_quota_data()
         return attconn.format_query_results(resp_json)
 
-    def _standardize_nesefile(self):
-        datafile = 'nese_data/pools'
-        header_file = 'nese_data/pools.header'
-        with open('nese_data/local_groupkey') as groupkey_file:
-            translator = dict((
-                kv.split('=') for kv in (l.strip('\n') for l in groupkey_file)
-            ))
-        headers_df = pd.read_csv(header_file, header=0, sep='\s+')
-        headers = headers_df.columns.values.tolist()
-        data = pd.read_csv(datafile, names=headers, sep='\s+')
-        data = data.loc[data['pool'].str.contains('1')]
-        data['fs_path'] = data['pool']
-        data['lab'] = data['pool'].str.replace('1', '').str.replace('hugl', '').str.replace('hus3', '')
-        data['server'] = 'nesetape'
-        data['storage_type'] = 'tape'
-        data['byte_allocation'] = (data['mib_capacity'] + data['mib_capacity']*0.025) * 1048576
-        data['byte_usage'] = data['mib_used'] * 1048576
-        data['tb_allocation'] = round((
-            (data['mib_capacity'] + data['mib_capacity']*0.025) / 953674.31640625
-        ), -1)
-        data['tb_usage'] = data['mib_used'] / 953674.31640625
-        data = data[[
-            'lab', 'server', 'storage_type', 'byte_allocation',
-            'byte_usage', 'tb_allocation', 'tb_usage', 'fs_path',
-        ]]
-        nesedict = data.to_dict(orient='records')
-        for d in nesedict:
-            if translator.get(d['lab']):
-                d['lab'] = translator[d['lab']]
-            else:
-                d['lab'] = d['lab']+'_lab'
-        return nesedict
-
 
 class AllTheThingsConn:
 
@@ -228,6 +204,7 @@ def pull_quota_data(self):
         query.produce_query_statement('isilon', volumes=self.volumes)
         query.produce_query_statement('quota', volumes=self.volumes)
         query.produce_query_statement('volume', volumes=self.volumes)
+        query.produce_query_statement('tapeallocation')
         resp_json = self.post_query(query.queries)
         logger.debug(resp_json)
         return resp_json
@@ -250,7 +227,7 @@ def pair_allocations_data(project, quota_dicts):
     allocs = project.allocation_set.filter(
         status__name__in=['Active','Pending Deactivation'],
         resources__resource_type__name='Storage'
-    )
+    ).exclude(resources__name__icontains='vast')
     paired_allocs = {}
     # first, pair allocations with those that have same
     for allocation in allocs:
@@ -344,19 +321,19 @@ def match_entries_with_projects(result_json):
         logger.warning('missing projects: %s', missing_projs)
     # remove them from result_json
     missing_proj_titles = [list(p.values())[0] for p in missing_projs]
-    [result_json.pop(t) for t in missing_proj_titles]
+    for title in missing_proj_titles:
+        result_json.pop(title)
     return result_json, proj_models
 
 
 def pull_push_quota_data(volumes=None):
     logger = logging.getLogger('coldfront.import_quotas')
     att_data = QuotaDataPuller(volumes=volumes).pull('ATTQuery')
-    nese_data = QuotaDataPuller(volumes=volumes).pull('NESEfile')
-    combined_data = att_data + nese_data
-    resp_json_by_lab = {entry['lab']:[] for entry in combined_data}
-    [resp_json_by_lab[e['lab']].append(e) for e in combined_data]
+    resp_json_by_lab = {entry['lab']:[] for entry in att_data}
+    for entry in att_data:
+        resp_json_by_lab[entry['lab']].append(entry)
     logger.debug(resp_json_by_lab)
-    result_file = 'local_data/att_nese_quota_data.json'
+    result_file = 'local_data/att_quota_data.json'
     save_json(result_file, resp_json_by_lab)
     push_quota_data(result_file)