Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 113 additions & 22 deletions oioioi/filetracker/management/commands/collectgarbage.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,65 @@
import datetime

# import email.utils

Check warning on line 3 in oioioi/filetracker/management/commands/collectgarbage.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this commented out code.

See more on https://sonarcloud.io/project/issues?id=sio2project_oioioi&issues=AZ2lBhJ-yuBU8AxYcyfO&open=AZ2lBhJ-yuBU8AxYcyfO&pullRequest=696
import itertools
from concurrent.futures import ProcessPoolExecutor

from django.apps import apps
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db.models.loading import cache
from django.utils.translation import gettext as _
from django.db import connections
from django.utils.translation import gettext_lazy as _
from django.utils.translation import ngettext

from filetracker.client import Client
from filetracker.utils import split_name
from oioioi.filetracker.client import get_client

# import requests

Check warning on line 17 in oioioi/filetracker/management/commands/collectgarbage.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this commented out code.

See more on https://sonarcloud.io/project/issues?id=sio2project_oioioi&issues=AZ2lBhJ-yuBU8AxYcyfP&open=AZ2lBhJ-yuBU8AxYcyfP&pullRequest=696


client = Client(remote_url=settings.FILETRACKER_URL, local_store=None)


def set_client():
global client
client = Client(remote_url=settings.FILETRACKER_URL, local_store=None)


# Used for SZKOpuł filetracker health checks.
FILES_TO_KEEP = [
"nagios_check.txt",
]
DIRS_TO_KEEP = [
"sandboxes",
]


def keepfilter(filename):
return filename.split("/")[0] in DIRS_TO_KEEP or filename in FILES_TO_KEEP


def delete_file(args):
global client
if args[2] > 1:
print(" " + args[0])
client.delete_file("/" + args[0] + "@" + str(args[1]))


def list_files_for_model(args):
model = args[0]
subpath = args[1]
# Safety for multiprocessing.
connections.close_all()
file_fields = [field.name for field in model._meta.fields if field.get_internal_type() in ["FileField", "ImageField"]]
if not file_fields:
return []
base_qs = model.objects.all()
if len(file_fields) == 1:
base_qs = base_qs.exclude(**{file_fields[0]: None})
if subpath:
base_qs = base_qs.filter(**{(file_fields[0] + "__startswith"): subpath})
files = base_qs.values_list(*file_fields).distinct()
return [split_name(file)[0] for file in itertools.chain.from_iterable(files) if file and file.startswith(subpath)]


class Command(BaseCommand):
Expand All @@ -24,6 +76,25 @@
help=_("Orphaned files older than DAYS days will be deleted. Default value is 30."),
metavar=_("DAYS"),
)
parser.add_argument(
"-s",
"--subpath",
action="store",
type=str,
dest="subpath",
default="",
help=_("Restrict the cleaning to a filetracker subpath."),
metavar=_("SUBPATH"),
)
parser.add_argument(
"-n",
"--paralell",
action="store",
type=int,
dest="workers",
default=0,
help=_("How many files to delete in paralell."),
)
parser.add_argument(
"-p",
"--pretend",
Expand All @@ -33,25 +104,41 @@
help=_("If set, the orphaned files will only be displayed, not deleted."),
)

def _get_needed_files(self):
result = []
for app in cache.get_apps():
model_list = cache.get_models(app)
for model in model_list:
file_fields = [field.name for field in model._meta.fields if field.get_internal_type() == "FileField"]

if len(file_fields) > 0:
files = model.objects.all().values_list(*file_fields)
result.extend([split_name(file)[0] for file in itertools.chain.from_iterable(files) if file])
def _get_needed_files(self, subpath):
models_list = [(model, subpath) for app in apps.get_app_configs() for model in app.get_models()]
with ProcessPoolExecutor() as executor:
results_list = executor.map(list_files_for_model, models_list)
result = list(itertools.chain.from_iterable(results_list))
return result

# def get_ft_files(self, cutoff_timestamp, subpath):

Check warning on line 114 in oioioi/filetracker/management/commands/collectgarbage.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this commented out code.

See more on https://sonarcloud.io/project/issues?id=sio2project_oioioi&issues=AZ2lBhJ-yuBU8AxYcyfQ&open=AZ2lBhJ-yuBU8AxYcyfQ&pullRequest=696
# """Returns a list of paths"""
# ft_url = settings.FILETRACKER_URL
# url = ft_url + "/list/" + subpath.lstrip('/')
# rfc2822_date = email.utils.formatdate(cutoff_timestamp)
# response = requests.get(url, params={'last_modified': rfc2822_date})
# response.raise_for_status()
# result = response.content.decode('utf-8').split('\n')
# assert len(result.pop()) == 0
# return result

def get_ft_files(self, cutoff_timestamp, subpath):
subpath = "/" + subpath.lstrip("/")
return client.list_remote_files(cutoff_timestamp, subpath, absolute_paths=True)

def handle(self, *args, **options):
needed_files = self._get_needed_files()
all_files = get_client().list_local_files()
assert options["workers"] >= 0
max_date_to_delete = datetime.datetime.now() - datetime.timedelta(days=options["days"])

diff = {f[0] for f in all_files} - set(needed_files)
to_delete = [f[0] for f in all_files if f[0] in diff and datetime.datetime.fromtimestamp(f[1]) < max_date_to_delete]
cutoff_timestamp = int(max_date_to_delete.timestamp())
print(_("Cutoff date is"), max_date_to_delete)
print(_("Getting needed files..."))
needed_files = self._get_needed_files(options["subpath"])
print(_("Got needed files."))
print(_("Getting list of files on filetracker..."))
all_files = self.get_ft_files(cutoff_timestamp, options["subpath"])
print(_("Got list of files on filetracker."))
all_files = [f for f in all_files if not keepfilter(f)]
to_delete = set(all_files) - set(needed_files)

files_count = len(to_delete)
if files_count == 0 and int(options["verbosity"]) > 0:
Expand Down Expand Up @@ -89,7 +176,11 @@
)
if int(options["verbosity"]) == 1:
print(ngettext("Deleting %d file", "Deleting %d files", files_count) % files_count)
for file in to_delete:
if int(options["verbosity"]) > 1:
print(" ", file)
get_client().delete_file("/" + file)
if options["workers"] < 2:
for file in to_delete:
delete_file((file, cutoff_timestamp, options["verbosity"]))
else:
print(_("Starting {workers} paralell workers.").format(workers=str(options["workers"])))
with ProcessPoolExecutor(max_workers=options["workers"], initializer=set_client) as pool:
len([*pool.map(delete_file, [(file, cutoff_timestamp, options["verbosity"]) for file in to_delete])])
print(_("Done."))
Loading