osmith has uploaded this change for review. ( https://gerrit.osmocom.org/c/osmo-ci/+/39913?usp=email )
Change subject: OBS: cleanup: new script ......................................................................
OBS: cleanup: new script
Add a script for cleaning old sources uploaded to OBS. This was needed as we were running out of space on the OBS server, due to source tarballs being generated in nightly jobs (e.g. Osmocom_OBS_nightly and never getting deleted). This script is now running automatically every day.
The script does the following: * Iterate over all OBS projects and packages inside the projects. * Delete all source files matching *.dsc, *.tar.xz in old revisions from disk, which are not in the most recent revision anymore. * Write a marker file with the revisions already iterated through (per project+package) and skip those on the next run.
Related: SYS#7407 Change-Id: I4649dedf0bb52f0bbdc7577ac0bc9e93cb3be192 --- A .ruff.toml M lint/lint_diff.sh A scripts/obs/cleanup.py M scripts/obs/lib/osc.py 4 files changed, 198 insertions(+), 0 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-ci refs/changes/13/39913/1
diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..4194a9f --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,2 @@ +line-length = 120 +include = ["scripts/obs/cleanup.py"] diff --git a/lint/lint_diff.sh b/lint/lint_diff.sh index 408d79f..28bb922 100755 --- a/lint/lint_diff.sh +++ b/lint/lint_diff.sh @@ -88,6 +88,7 @@ " local format_projects=" osmo-ttcn3-hacks + osmo-ci "
if ! command -v ruff >/dev/null; then diff --git a/scripts/obs/cleanup.py b/scripts/obs/cleanup.py new file mode 100755 index 0000000..3365e34 --- /dev/null +++ b/scripts/obs/cleanup.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright 2025 sysmocom - s.f.m.c. GmbH info@sysmocom.de +import argparse +import fnmatch +import hashlib +import lib +import lib.osc +import os +import sys +import time + +# Only delete files that were created by the Osmocom_OBS_* jenkins jobs +safe_to_delete_patterns = [ + "*.dsc", + "*.tar.xz", +] + +cache_dir = os.path.expanduser("~/.cache/osmo_ci_obs_cleanup") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Clean up old sources to free up space.") + + parser.add_argument("-P", "--project", help="optional path to a specific project (e.g. osmocom:master)") + + parser.add_argument("-p", "--package", help="optional path to a specific package (e.g. osmo-mgw)") + + parser.add_argument("-k", "--keep-revisions", type=int, default=5, help="how many revisions to keep (default: 5)") + + parser.add_argument( + "-s", "--sleep", type=int, default=0, help="how many seconds to sleep between queries (default: 0)" + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="always print shell commands and their output, instead of only printing them on error", + ) + parser.add_argument( + "-A", + "--apiurl", + help="OBS API URL or .oscrc alias (default: https://obs.osmocom.org)", + default="https://obs.osmocom.org", + ) + + args = parser.parse_args() + lib.set_args(args) + lib.osc.set_apiurl(args.apiurl) + + +def sleep(): + if lib.args.sleep != 0: + time.sleep(lib.args.sleep) + + +def get_projects(): + if lib.args.project: + return [lib.args.project] + return lib.osc.get_projects() + + +def get_packages(project): + if lib.args.package: + return [lib.args.package] + sleep() + return lib.osc.get_remote_pkgs() + + +def check_safe_to_delete(source): + for pattern in safe_to_delete_patterns: + if fnmatch.fnmatch(source, pattern): + return True + return False + + +def get_start_rev_file(project, package): + h = hashlib.new("sha512") + h.update(f"{project}/{package}".encode()) + return f"{cache_dir}/{h.hexdigest()}" + + +def get_start_rev(project, package): + f = get_start_rev_file(project, package) + if not os.path.exists(f): + return 1 + + with open(f, "r") as h: + return int(h.read().rstrip()) + + +def set_start_rev(project, package, rev): + f = get_start_rev_file(project, package) + with open(f, "w") as h: + h.write(f"{rev}\n") + + +def clean_package(project, package): + sleep() + last_rev = lib.osc.get_last_rev(package) + if last_rev <= lib.args.keep_revisions: + return + + sleep() + sources_current = lib.osc.get_package_sources(package, last_rev) + + start = get_start_rev(project, package) + end = last_rev - lib.args.keep_revisions + 1 + for rev in range(start, end): + print(f" checking rev {rev}/{last_rev}") + sleep() + sources_rev = lib.osc.get_package_sources(package, rev) + for source in sources_rev: + if source not in sources_current: + assert "/" not in package + assert "/" not in source + path = f"/srv/obs/sources/{package}/{source}" + if check_safe_to_delete(source) and os.path.exists(path): + print(f" rm {source}") + lib.run_cmd(["rm", path]) + set_start_rev(project, package, rev) + + +def main(): + if not os.path.exists("/srv/obs/sources"): + print("ERROR: this script needs to run on an OBS server!") + sys.exit(1) + + lib.run_cmd(["mkdir", "-p", cache_dir]) + + parse_args() + lib.osc.check_oscrc() + + for project in get_projects(): + if ":" not in project: + continue + + lib.osc.set_apiurl(lib.args.apiurl, project) + + packages = get_packages(project) + if len(packages) == 1 and packages[0] == "": + continue + + for package in packages: + clean_package(project, package) + + +if __name__ == "__main__": + main() + print("Success") diff --git a/scripts/obs/lib/osc.py b/scripts/obs/lib/osc.py index 8838b1a..3627bd2 100644 --- a/scripts/obs/lib/osc.py +++ b/scripts/obs/lib/osc.py @@ -8,6 +8,7 @@ import sys import lib import lib.config +import xml.etree.ElementTree
apiurl = None proj = None @@ -90,6 +91,37 @@ return "0"
+def get_package_sources(package, rev=None): + # Use the API directly, because "osc list" throws an exception when trying + # to list a directory with deleted files. + url = f"/source/{proj}/{os.path.basename(package)}" + if rev: + url = f"{url}?rev={rev}" + + osc_ret = run_osc(["api", url]) + root = xml.etree.ElementTree.fromstring(osc_ret.output) + + # === Output === + # <directory name="open5gs" rev="1012" vrev="1" srcmd5="d98c9f8faeada3e291aa2197ca7fda03"> + # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.dsc" md5="7101346f69282beda8c1e2c191fadd4e" size="2040" mtime="1743367015"/> + # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.tar.xz" md5="71fc5f9a885204d38f712236684822ac" size="14531220" mtime="1743367016"/> + # </directory + + # === Output with already deleted files === + # <directory name="open5gs" rev="1" vrev="1" srcmd5="bcd19a5960921d5e30e99d988fffbd15"> + # <entry name="open5gs_2.4.8.202206260026.dsc" md5="bf154599a1493d23f2f7f8669c5adb7c" error="No such file or directory"/> + # <entry name="open5gs_2.4.8.202206260026.tar.xz" md5="3f26b59b342a35d80d5ac790ff0a8ff2" error="No such file or directory"/> + # </directory> + + ret = [] + for entry in root.findall("entry"): + if entry.get("error"): + # already deleted + continue + ret += [f"{entry.get('md5')}-{entry.get('name')}"] + return ret + + def create_package(package): print(f"{package}: creating new OBS package")
@@ -180,3 +212,15 @@ def get_projects(): print(f"OBS: getting list of projects ({apiurl})") return lib.osc.run_osc(["ls"]).output.rstrip().split("\n") + + +def get_last_rev(package): + print(f"OBS: getting latest revision of {proj}:{package}") + + url = f"/source/{proj}/{os.path.basename(package)}" + osc_ret = run_osc(["api", url]) + root = xml.etree.ElementTree.fromstring(osc_ret.output) + rev = root.get("rev") + if rev: + return int(rev) + return 0