From 2988a15b8ba29cacee02898c1c3bb2d26234b9ef Mon Sep 17 00:00:00 2001 From: Olaf Targowski Date: Sun, 19 Apr 2026 11:09:44 +0200 Subject: [PATCH] Update collectgarbage management command --- .../management/commands/collectgarbage.py | 135 +++++++++++++++--- 1 file changed, 113 insertions(+), 22 deletions(-) diff --git a/oioioi/filetracker/management/commands/collectgarbage.py b/oioioi/filetracker/management/commands/collectgarbage.py index f4ea67b5a..9338fc33e 100644 --- a/oioioi/filetracker/management/commands/collectgarbage.py +++ b/oioioi/filetracker/management/commands/collectgarbage.py @@ -1,13 +1,65 @@ import datetime + +# import email.utils import itertools +from concurrent.futures import ProcessPoolExecutor +from django.apps import apps +from django.conf import settings from django.core.management.base import BaseCommand -from django.db.models.loading import cache -from django.utils.translation import gettext as _ +from django.db import connections +from django.utils.translation import gettext_lazy as _ from django.utils.translation import ngettext +from filetracker.client import Client from filetracker.utils import split_name -from oioioi.filetracker.client import get_client + +# import requests + + +client = Client(remote_url=settings.FILETRACKER_URL, local_store=None) + + +def set_client(): + global client + client = Client(remote_url=settings.FILETRACKER_URL, local_store=None) + + +# Used for SZKOpuł filetracker health checks. +FILES_TO_KEEP = [ + "nagios_check.txt", +] +DIRS_TO_KEEP = [ + "sandboxes", +] + + +def keepfilter(filename): + return filename.split("/")[0] in DIRS_TO_KEEP or filename in FILES_TO_KEEP + + +def delete_file(args): + global client + if args[2] > 1: + print(" " + args[0]) + client.delete_file("/" + args[0] + "@" + str(args[1])) + + +def list_files_for_model(args): + model = args[0] + subpath = args[1] + # Safety for multiprocessing. + connections.close_all() + file_fields = [field.name for field in model._meta.fields if field.get_internal_type() in ["FileField", "ImageField"]] + if not file_fields: + return [] + base_qs = model.objects.all() + if len(file_fields) == 1: + base_qs = base_qs.exclude(**{file_fields[0]: None}) + if subpath: + base_qs = base_qs.filter(**{(file_fields[0] + "__startswith"): subpath}) + files = base_qs.values_list(*file_fields).distinct() + return [split_name(file)[0] for file in itertools.chain.from_iterable(files) if file and file.startswith(subpath)] class Command(BaseCommand): @@ -24,6 +76,25 @@ def add_arguments(self, parser): help=_("Orphaned files older than DAYS days will be deleted. Default value is 30."), metavar=_("DAYS"), ) + parser.add_argument( + "-s", + "--subpath", + action="store", + type=str, + dest="subpath", + default="", + help=_("Restrict the cleaning to a filetracker subpath."), + metavar=_("SUBPATH"), + ) + parser.add_argument( + "-n", + "--paralell", + action="store", + type=int, + dest="workers", + default=0, + help=_("How many files to delete in paralell."), + ) parser.add_argument( "-p", "--pretend", @@ -33,25 +104,41 @@ def add_arguments(self, parser): help=_("If set, the orphaned files will only be displayed, not deleted."), ) - def _get_needed_files(self): - result = [] - for app in cache.get_apps(): - model_list = cache.get_models(app) - for model in model_list: - file_fields = [field.name for field in model._meta.fields if field.get_internal_type() == "FileField"] - - if len(file_fields) > 0: - files = model.objects.all().values_list(*file_fields) - result.extend([split_name(file)[0] for file in itertools.chain.from_iterable(files) if file]) + def _get_needed_files(self, subpath): + models_list = [(model, subpath) for app in apps.get_app_configs() for model in app.get_models()] + with ProcessPoolExecutor() as executor: + results_list = executor.map(list_files_for_model, models_list) + result = list(itertools.chain.from_iterable(results_list)) return result + # def get_ft_files(self, cutoff_timestamp, subpath): + # """Returns a list of paths""" + # ft_url = settings.FILETRACKER_URL + # url = ft_url + "/list/" + subpath.lstrip('/') + # rfc2822_date = email.utils.formatdate(cutoff_timestamp) + # response = requests.get(url, params={'last_modified': rfc2822_date}) + # response.raise_for_status() + # result = response.content.decode('utf-8').split('\n') + # assert len(result.pop()) == 0 + # return result + + def get_ft_files(self, cutoff_timestamp, subpath): + subpath = "/" + subpath.lstrip("/") + return client.list_remote_files(cutoff_timestamp, subpath, absolute_paths=True) + def handle(self, *args, **options): - needed_files = self._get_needed_files() - all_files = get_client().list_local_files() + assert options["workers"] >= 0 max_date_to_delete = datetime.datetime.now() - datetime.timedelta(days=options["days"]) - - diff = {f[0] for f in all_files} - set(needed_files) - to_delete = [f[0] for f in all_files if f[0] in diff and datetime.datetime.fromtimestamp(f[1]) < max_date_to_delete] + cutoff_timestamp = int(max_date_to_delete.timestamp()) + print(_("Cutoff date is"), max_date_to_delete) + print(_("Getting needed files...")) + needed_files = self._get_needed_files(options["subpath"]) + print(_("Got needed files.")) + print(_("Getting list of files on filetracker...")) + all_files = self.get_ft_files(cutoff_timestamp, options["subpath"]) + print(_("Got list of files on filetracker.")) + all_files = [f for f in all_files if not keepfilter(f)] + to_delete = set(all_files) - set(needed_files) files_count = len(to_delete) if files_count == 0 and int(options["verbosity"]) > 0: @@ -89,7 +176,11 @@ def handle(self, *args, **options): ) if int(options["verbosity"]) == 1: print(ngettext("Deleting %d file", "Deleting %d files", files_count) % files_count) - for file in to_delete: - if int(options["verbosity"]) > 1: - print(" ", file) - get_client().delete_file("/" + file) + if options["workers"] < 2: + for file in to_delete: + delete_file((file, cutoff_timestamp, options["verbosity"])) + else: + print(_("Starting {workers} paralell workers.").format(workers=str(options["workers"]))) + with ProcessPoolExecutor(max_workers=options["workers"], initializer=set_client) as pool: + len([*pool.map(delete_file, [(file, cutoff_timestamp, options["verbosity"]) for file in to_delete])]) + print(_("Done."))