From 4c9af134361e023d9ba8ce2c3745fc29b01a4d2c Mon Sep 17 00:00:00 2001 From: Sergio Garcia Date: Wed, 18 Mar 2026 12:50:39 +0100 Subject: [PATCH] Speed up package listing --- publish/aliPublishS3 | 101 +++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/publish/aliPublishS3 b/publish/aliPublishS3 index 0e3c1a65..3dadbcd0 100755 --- a/publish/aliPublishS3 +++ b/publish/aliPublishS3 @@ -636,61 +636,66 @@ def sync(pub, architectures, s3Client, bucket, baseUrl, basePrefix, rules, if nv ) - # Get versions for all valid packages and filter them according to the rules - def process_package(pkgName): - result = [] + def process_pkg_ver(pkgName_pkgTar): + pkgName, pkgTar = pkgName_pkgTar if pkgName not in rules["include"][arch]: - return result - for pkgTar in pkgVerDirs[pkgName]: - nameVer = nameVerFromTar(pkgTar["name"], arch, [pkgName]) - if nameVer is None: - continue - pkgVer = nameVer["ver"] - # Here we decide whether to include/exclude it - if not applyFilter(pkgVer, - rules["include"][arch].get(pkgName, None), - rules["exclude"][arch].get(pkgName, None)): - debug("%s / %s / %s: excluded", arch, pkgName, pkgVer) - continue - - if "%s-%s.%s.tar.gz" % (pkgName, pkgVer, arch) not in tarballs: - debug("%s / %s / %s: excluded because matching tarball not found", - arch, pkgName, pkgVer) - continue - - if not autoIncludeDeps: - # Not automatically including dependencies, add this package only. - # Not checking for dups because we can't have any - result.append({ "name": pkgName, "ver": pkgVer }) + return [] + nameVer = nameVerFromTar(pkgTar["name"], arch, [pkgName]) + if nameVer is None: + return [] + pkgVer = nameVer["ver"] + if not applyFilter(pkgVer, + rules["include"][arch].get(pkgName, None), + rules["exclude"][arch].get(pkgName, None)): + debug("%s / %s / %s: excluded", arch, pkgName, pkgVer) + return [] + + if "%s-%s.%s.tar.gz" % (pkgName, pkgVer, arch) not in tarballs: + debug("%s / %s / %s: excluded because matching tarball not found", + arch, pkgName, pkgVer) + return [] + + if not autoIncludeDeps: + return [{"name": pkgName, "ver": pkgVer}] + + # At this point we have filtered in the package: let's see its dependencies! + # Note that a package always depends on itself (list cannot be empty). + distPath = f"{arch}/dist-runtime/{pkgName}/{pkgName}-{pkgVer}" + runtimeDeps = list(listDir(distPath)) + if not runtimeDeps: + error("%s / %s / %s: cannot list dependencies from %s: skipping", + arch, pkgName, pkgVer, distPath) + return [] + debug("%s / %s / %s: listing all dependencies under %s", + arch, pkgName, pkgVer, distPath) + result = [] + for depTar in runtimeDeps: + if depTar["type"] != "file": continue - - # At this point we have filtered in the package: let's see its dependencies! - # Note that a package always depends on itself (list cannot be empty). - distPath = f"{arch}/dist-runtime/{pkgName}/{pkgName}-{pkgVer}" - runtimeDeps = list(listDir(distPath)) - if not runtimeDeps: - error("%s / %s / %s: cannot list dependencies from %s: skipping", - arch, pkgName, pkgVer, distPath) + depNameVer = nameVerFromTarCached(depTar["name"]) + if depNameVer is None: continue - debug("%s / %s / %s: listing all dependencies under %s", - arch, pkgName, pkgVer, distPath) - for depTar in runtimeDeps: - if depTar["type"] != "file": - continue - depNameVer = nameVerFromTar(depTar["name"], arch, distPackages, distVersions) - if depNameVer is None: - continue - depName = depNameVer["name"] - depVer = depNameVer["ver"] - result.append({"name": depName, "ver": depVer}) + result.append({"name": depNameVer["name"], "ver": depNameVer["ver"]}) return result + _dep_nv_cache: dict = {} + def nameVerFromTarCached(tar): + if tar not in _dep_nv_cache: + _dep_nv_cache[tar] = nameVerFromTar(tar, arch, distPackages, distVersions) + return _dep_nv_cache[tar] + + _all_pkg_ver_pairs = [ + (pkgName, pkgTar) + for pkgName in distPackages + for pkgTar in pkgVerDirs[pkgName] + ] + _seen_packages: set = set() with ThreadPoolExecutor(max_workers=20) as executor: - for packages in executor.map(process_package, distPackages): + for packages in executor.map(process_pkg_ver, _all_pkg_ver_pairs): for pkg in packages: - # Append only if it does not exist yet - if not any(p["name"] == pkg["name"] and p["ver"] == pkg["ver"] - for p in pubPackages): + key = (pkg["name"], pkg["ver"]) + if key not in _seen_packages: + _seen_packages.add(key) pubPackages.append(pkg) pubPackages.sort(key=lambda itm: itm["name"])