diff --git a/README.md b/README.md index 01928d6..395e353 100644 --- a/README.md +++ b/README.md @@ -362,6 +362,23 @@ policies: count: 1 ``` +- `KeepLatestNDockerImagesAllVersions(count=N)` - Like `KeepLatestNDockerImages`, but the latest N + digests are computed over **all** tags currently in the repository via its own AQL query, independently + of the other rules in the policy. Use this when the policy also contains a filtering `Delete*` rule + (e.g. `DeleteDockerImagesNotUsed`): because all rules share a single AQL `find` combined with `$and`, + `KeepLatestNDockerImages` only sees the artifacts left after the delete filter and cannot protect the + truly latest N versions. This rule makes the common "delete unused, but always keep the latest N" + intent expressible in a single policy + (see [#183](https://github.com/devopshq/artifactory-cleanup/issues/183)). + +```yaml +# Delete images not downloaded for 120 days, but always keep the latest 5 versions of each image +- rule: DeleteDockerImagesNotUsed + days: 120 +- rule: KeepLatestNDockerImagesAllVersions + count: 5 +``` + - `DeleteDockerImageIfNotContainedInProperties(docker_repo='docker-local', properties_prefix='my-prop', image_prefix=None, full_docker_repo_name=None)` \- Remove Docker image, if it is not found in the properties of the artifact repository. diff --git a/artifactory_cleanup/rules/docker.py b/artifactory_cleanup/rules/docker.py index 2b11d08..943eb74 100644 --- a/artifactory_cleanup/rules/docker.py +++ b/artifactory_cleanup/rules/docker.py @@ -1,3 +1,4 @@ +import json import re from collections import defaultdict from datetime import timedelta @@ -238,6 +239,78 @@ def filter(self, artifacts): return artifacts + +class KeepLatestNDockerImagesAllVersions(RuleForDocker): + """ + Keeps the ``count`` latest Docker image digests for each image, evaluated over **all** + tags currently present in the repository - independently of the other rules in the policy. + + Use this instead of ``KeepLatestNDockerImages`` when the policy also contains a filtering + ``Delete*`` rule (for example ``DeleteDockerImagesNotUsed``). All rules in a policy share a + single AQL ``find`` combined with ``$and``, so ``KeepLatestNDockerImages`` only sees the + artifacts that survived the ``Delete*`` rule's filter and therefore cannot protect the + truly latest N versions. This rule issues its own AQL query for every manifest, so the + common "delete unused, but always keep the latest N" intent becomes expressible in a single + policy. See https://github.com/devopshq/artifactory-cleanup/issues/183 + """ + + def __init__(self, count: int): + self.count = count + + def _get_all_image_manifests(self, repos) -> ArtifactsList: + """ + Independent AQL query: every image manifest in ``repos``, regardless of the other rules + in the policy. Returned artifacts are converted to image-folder form (path=image, + name=tag), keeping ``sha256`` and ``updated``. + """ + find = { + "$and": [ + {"$or": [{"repo": repo} for repo in repos]}, + { + "$or": [ + {"name": {"$match": self.MANIFEST_FILENAME}}, + {"name": {"$match": self.FAT_MANIFEST_FILENAME}}, + ] + }, + ] + } + aql_text = f'items.find({json.dumps(find)}).include("*")' + r = self.session.post("/api/search/aql", data=aql_text) + r.raise_for_status() + manifests = ArtifactsList.from_response(r.json()["results"]) + return self._manifest_to_docker_images(manifests) + + def _latest_digests(self, manifests) -> set: + """``(repo, path, sha256)`` of the latest ``count`` digests for each image.""" + manifests_by_image = defaultdict(list) + for manifest in manifests: + manifests_by_image[(manifest["repo"], manifest["path"])].append(manifest) + + protected = set() + for (repo, path), image_manifests in manifests_by_image.items(): + image_manifests.sort(reverse=True, key=lambda x: x["updated"]) + digests_to_keep = set() + for manifest in image_manifests: + if len(digests_to_keep) < self.count: + digests_to_keep.add(manifest["sha256"]) + if manifest["sha256"] in digests_to_keep: + protected.add((repo, path, manifest["sha256"])) + return protected + + def filter(self, artifacts): + artifacts = self._manifest_to_docker_images(artifacts) + repos = sorted(set(artifact["repo"] for artifact in artifacts)) + if repos: + protected = self._latest_digests(self._get_all_image_manifests(repos)) + to_keep = [ + artifact + for artifact in artifacts + if (artifact["repo"], artifact["path"], artifact["sha256"]) in protected + ] + artifacts.keep(to_keep) + return super().filter(artifacts) + + class KeepLatestNVersionImagesByProperty(RuleForDocker): r""" Leaves ``count`` Docker images with the same major. diff --git a/tests/test_rules_docker.py b/tests/test_rules_docker.py index 7901519..dffdaf0 100644 --- a/tests/test_rules_docker.py +++ b/tests/test_rules_docker.py @@ -1,6 +1,7 @@ from artifactory_cleanup import CleanupPolicy from artifactory_cleanup.rules import ( KeepLatestNDockerImages, + KeepLatestNDockerImagesAllVersions, KeepLatestNVersionImagesByProperty, ArtifactsList, RuleForDocker, @@ -318,3 +319,79 @@ def test_filter_custom_version(self): "stats": {}, }, ] + + +class TestKeepLatestNDockerImagesAllVersions: + def test_keeps_latest_n_over_all_versions_not_just_candidates(self): + # Skip collecting docker size (it would hit Artifactory) + RuleForDocker._collect_docker_size = lambda self, x: x + + # The independent query returns ALL tags of the image, newest `updated` first: + # 1.2.0 (newest), 1.1.0, 1.0.0 (oldest) + all_versions = ArtifactsList.from_response( + [ + { + "repo": "docker-local", + "path": "foobar", + "name": "1.2.0", + "sha256": "ccc", + "updated": "2021-03-22T13:54:52.383+02:00", + }, + { + "repo": "docker-local", + "path": "foobar", + "name": "1.1.0", + "sha256": "bbb", + "updated": "2021-03-21T13:54:52.383+02:00", + }, + { + "repo": "docker-local", + "path": "foobar", + "name": "1.0.0", + "sha256": "aaa", + "updated": "2021-03-20T13:54:52.383+02:00", + }, + ] + ) + KeepLatestNDockerImagesAllVersions._get_all_image_manifests = ( + lambda self, repos: all_versions + ) + + # The policy's AQL query returned only the "stale" subset: imagine a Delete rule + # filtered out the recently-used newest tag 1.2.0. Candidates = 1.1.0 and 1.0.0. + candidates = ArtifactsList.from_response( + [ + { + "repo": "docker-local", + "path": "foobar/1.1.0", + "name": "manifest.json", + "sha256": "bbb", + "updated": "2021-03-21T13:54:52.383+02:00", + }, + { + "repo": "docker-local", + "path": "foobar/1.0.0", + "name": "manifest.json", + "sha256": "aaa", + "updated": "2021-03-20T13:54:52.383+02:00", + }, + ] + ) + + policy = CleanupPolicy("test", KeepLatestNDockerImagesAllVersions(count=2)) + + # Latest 2 digests over ALL versions = {ccc (1.2.0), bbb (1.1.0)}. + # 1.1.0 (bbb) is protected even though it is a candidate; 1.0.0 (aaa) is not in the + # latest 2 -> only 1.0.0 is deleted. (Plain KeepLatestNDockerImages would keep both, + # because it only sees the 2-item stale subset.) + assert policy.filter(candidates) == [ + { + "repo": "docker-local", + "path": "foobar", + "name": "1.0.0", + "sha256": "aaa", + "updated": "2021-03-20T13:54:52.383+02:00", + "properties": {}, + "stats": {}, + } + ]