11import logging
22from collections import Counter , namedtuple
33from datetime import date
4- from typing import Optional
4+ from typing import Optional , Tuple , Set
55
66from core .logic .debug import log_memory
77from logs .logic .validation import clean_and_validate_issn , ValidationError , normalize_isbn
@@ -244,7 +244,9 @@ def import_counter_records(
244244 to_compare [key ] = (pk , value )
245245 # make the comparison
246246 log_memory ('XX2' )
247- dicts_to_insert = []
247+ als_to_insert = []
248+ target_date_tuples = set ()
249+ max_batch_size = 100_000
248250 for key , value in to_insert .items ():
249251 db_pk , db_value = to_compare .get (key , (None , None ))
250252 if db_pk :
@@ -257,21 +259,26 @@ def import_counter_records(
257259 else :
258260 rec = dict (key )
259261 rec ['value' ] = value
260- dicts_to_insert .append (rec )
262+ als_to_insert .append (AccessLog (import_batch = import_batch , ** rec ))
263+ if rec ['target_id' ] is not None :
264+ target_date_tuples .add ((rec ['target_id' ], rec ['date' ]))
265+ if len (als_to_insert ) >= max_batch_size :
266+ log_memory ('Batch create' )
267+ AccessLog .objects .bulk_create (als_to_insert )
268+ stats ['new logs' ] += len (als_to_insert )
269+ als_to_insert = []
261270 # now insert the records that are clean to be inserted
262271 log_memory ('XX3' )
263- AccessLog .objects .bulk_create (
264- [AccessLog (import_batch = import_batch , ** rec ) for rec in dicts_to_insert ]
265- )
266- stats ['new logs' ] += len (dicts_to_insert )
272+ AccessLog .objects .bulk_create (als_to_insert )
273+ stats ['new logs' ] += len (als_to_insert )
267274 log_memory ('XX4' )
268275 # and insert the PlatformTitle links
269- stats .update (create_platformtitle_links (organization , platform , dicts_to_insert ))
276+ stats .update (create_platformtitle_links (organization , platform , target_date_tuples ))
270277 log_memory ('XX5' )
271278 return stats
272279
273280
274- def create_platformtitle_links (organization , platform , records : [ dict ]):
281+ def create_platformtitle_links (organization , platform , target_date_tuples : Set [ Tuple ]):
275282 """
276283 Takes list of dicts that are used to create AccessLogs in `import_counter_records`
277284 and creates the explicit PlatformTitle objects from the data
@@ -280,10 +287,9 @@ def create_platformtitle_links(organization, platform, records: [dict]):
280287 (pt .title_id , pt .date .isoformat ())
281288 for pt in PlatformTitle .objects .filter (organization = organization , platform = platform )
282289 }
283- tuples = {(rec ['target_id' ], rec ['date' ]) for rec in records if rec ['target_id' ] is not None }
284290 pts = []
285291 before_count = PlatformTitle .objects .count ()
286- for title_id , rec_date in tuples - existing :
292+ for title_id , rec_date in target_date_tuples - existing :
287293 pts .append (
288294 PlatformTitle (
289295 organization = organization , platform = platform , title_id = title_id , date = rec_date
0 commit comments