osmith has submitted this change. ( https://gerrit.osmocom.org/c/osmo-ci/+/39913?usp=email )
Change subject: OBS: cleanup: new script ......................................................................
OBS: cleanup: new script
Add a script for cleaning old sources uploaded to OBS. This was needed as we were running out of space on the OBS server, due to source tarballs being generated in nightly jobs (e.g. Osmocom_OBS_nightly and never getting deleted). This script is now running automatically every day.
The script does the following: * Iterate over all OBS projects and packages inside the projects. * Delete all source files matching *.dsc, *.tar.xz in old revisions from disk, which are not in the most recent revision anymore. * Create an empty file instead (as otherwise OBS becomes unstable) * Write a marker file with the revisions already iterated through (per project+package) and skip those on the next run.
Related: SYS#7407 Change-Id: I4649dedf0bb52f0bbdc7577ac0bc9e93cb3be192 --- A .ruff.toml M lint/lint_diff.sh A scripts/obs/cleanup.py M scripts/obs/lib/osc.py 4 files changed, 229 insertions(+), 0 deletions(-)
Approvals: laforge: Looks good to me, but someone else must approve pespin: Looks good to me, approved Jenkins Builder: Verified
diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..6f6bd68 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,30 @@ +line-length = 120 + +[format] +exclude = [ + "lint/checkpatch/checkpatch_json.py", + "scripts/jenkins-gerrit/comment_generate.py", + "scripts/obs/build_binpkg.py", + "scripts/obs/build_srcpkg.py", + "scripts/obs/check_new_distros.py", + "scripts/obs/lib/__init__.py", + "scripts/obs/lib/binpkg_deb.py", + "scripts/obs/lib/binpkg_rpm.py", + "scripts/obs/lib/config.py", + "scripts/obs/lib/debian.py", + "scripts/obs/lib/docker.py", + "scripts/obs/lib/git.py", + "scripts/obs/lib/metapkg.py", + "scripts/obs/lib/osc.py", + "scripts/obs/lib/rpm_spec.py", + "scripts/obs/lib/srcpkg.py", + "scripts/obs/sync_obs_projects.py", + "scripts/obs/update_obs_project.py", + "scripts/osmo-depcheck/buildstack.py", + "scripts/osmo-depcheck/config.py", + "scripts/osmo-depcheck/dependencies.py", + "scripts/osmo-depcheck/osmo-depcheck.py", + "scripts/osmo-depcheck/parse.py", + "scripts/verify_log_statements.py", + "scripts/verify_value_string_arrays_are_terminated.py", +] diff --git a/lint/lint_diff.sh b/lint/lint_diff.sh index 408d79f..28bb922 100755 --- a/lint/lint_diff.sh +++ b/lint/lint_diff.sh @@ -88,6 +88,7 @@ " local format_projects=" osmo-ttcn3-hacks + osmo-ci "
if ! command -v ruff >/dev/null; then diff --git a/scripts/obs/cleanup.py b/scripts/obs/cleanup.py new file mode 100755 index 0000000..b652424 --- /dev/null +++ b/scripts/obs/cleanup.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright 2025 sysmocom - s.f.m.c. GmbH info@sysmocom.de +import argparse +import fnmatch +import hashlib +import lib +import lib.osc +import os +import sys +import time + +# Only delete files that were created by the Osmocom_OBS_* jenkins jobs +safe_to_delete_patterns = [ + "*.dsc", + "*.tar.xz", +] + +cache_dir = os.path.expanduser("~/.cache/osmo_ci_obs_cleanup") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Clean up old sources to free up space.") + + parser.add_argument("-P", "--project", help="optional path to a specific project (e.g. osmocom:master)") + + parser.add_argument("-p", "--package", help="optional path to a specific package (e.g. osmo-mgw)") + + parser.add_argument("-k", "--keep-revisions", type=int, default=5, help="how many revisions to keep (default: 5)") + + parser.add_argument( + "-s", "--sleep", type=int, default=0, help="how many seconds to sleep between queries (default: 0)" + ) + + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="always print shell commands and their output, instead of only printing them on error", + ) + parser.add_argument( + "-A", + "--apiurl", + help="OBS API URL or .oscrc alias (default: https://obs.osmocom.org)", + default="https://obs.osmocom.org", + ) + + args = parser.parse_args() + lib.set_args(args) + lib.osc.set_apiurl(args.apiurl) + + +def sleep(): + if lib.args.sleep != 0: + time.sleep(lib.args.sleep) + + +def get_projects(): + if lib.args.project: + return [lib.args.project] + return lib.osc.get_projects() + + +def get_packages(project): + if lib.args.package: + return [lib.args.package] + sleep() + return lib.osc.get_remote_pkgs() + + +def check_safe_to_delete(source): + for pattern in safe_to_delete_patterns: + if fnmatch.fnmatch(source, pattern): + return True + return False + + +def get_start_rev_file(project, package): + h = hashlib.new("sha512") + h.update(f"{project}/{package}".encode()) + return f"{cache_dir}/{h.hexdigest()}" + + +def get_start_rev(project, package): + f = get_start_rev_file(project, package) + if not os.path.exists(f): + return 0 + + with open(f, "r") as h: + return int(h.read().rstrip()) + + +def set_start_rev(project, package, rev): + f = get_start_rev_file(project, package) + with open(f, "w") as h: + h.write(f"{rev}\n") + + +def clean_package(project, package): + sleep() + last_rev = lib.osc.get_last_rev(package) + if last_rev <= lib.args.keep_revisions: + return + + sleep() + sources_current = lib.osc.get_package_sources(package, last_rev) + + start = get_start_rev(project, package) + 1 + end = last_rev - lib.args.keep_revisions + 1 + for rev in range(start, end): + print(f" checking rev {rev}/{last_rev}") + sleep() + sources_rev = lib.osc.get_package_sources(package, rev) + for source in sources_rev: + if source not in sources_current: + assert "/" not in package + assert "/" not in source + path = f"/srv/obs/sources/{package}/{source}" + if check_safe_to_delete(source): + if os.path.exists(path) and os.path.getsize(path) != 0: + print(f" rm {source}") + lib.run_cmd(["rm", path]) + # Leave empty dummy files behind, so OBS doensn't throw 50x + # errors (SYS#7407#note-8) + if not os.path.exists(path): + print(f" touch {source}") + lib.run_cmd(["touch", path]) + set_start_rev(project, package, rev) + + +def main(): + if not os.path.exists("/srv/obs/sources"): + print("ERROR: this script needs to run on an OBS server!") + sys.exit(1) + + lib.run_cmd(["mkdir", "-p", cache_dir]) + + parse_args() + lib.osc.check_oscrc() + + for project in get_projects(): + if ":" not in project: + continue + + lib.osc.set_apiurl(lib.args.apiurl, project) + + packages = get_packages(project) + if len(packages) == 1 and packages[0] == "": + continue + + for package in packages: + clean_package(project, package) + + +if __name__ == "__main__": + main() + print("Success") diff --git a/scripts/obs/lib/osc.py b/scripts/obs/lib/osc.py index 8838b1a..51b2142 100644 --- a/scripts/obs/lib/osc.py +++ b/scripts/obs/lib/osc.py @@ -8,6 +8,7 @@ import sys import lib import lib.config +import xml.etree.ElementTree
apiurl = None proj = None @@ -90,6 +91,34 @@ return "0"
+def get_package_sources(package, rev=None): + # Use the API directly, because "osc list" throws an exception when trying + # to list a directory with deleted files. + url = f"/source/{proj}/{os.path.basename(package)}" + if rev: + url = f"{url}?rev={rev}" + + osc_ret = run_osc(["api", url]) + root = xml.etree.ElementTree.fromstring(osc_ret.output) + + # === Output === + # <directory name="open5gs" rev="1012" vrev="1" srcmd5="d98c9f8faeada3e291aa2197ca7fda03"> + # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.dsc" md5="7101346f69282beda8c1e2c191fadd4e" size="2040" mtime="1743367015"/> + # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.tar.xz" md5="71fc5f9a885204d38f712236684822ac" size="14531220" mtime="1743367016"/> + # </directory + + # === Output with already deleted files === + # <directory name="open5gs" rev="1" vrev="1" srcmd5="bcd19a5960921d5e30e99d988fffbd15"> + # <entry name="open5gs_2.4.8.202206260026.dsc" md5="bf154599a1493d23f2f7f8669c5adb7c" error="No such file or directory"/> + # <entry name="open5gs_2.4.8.202206260026.tar.xz" md5="3f26b59b342a35d80d5ac790ff0a8ff2" error="No such file or directory"/> + # </directory> + + ret = [] + for entry in root.findall("entry"): + ret += [f"{entry.get('md5')}-{entry.get('name')}"] + return ret + + def create_package(package): print(f"{package}: creating new OBS package")
@@ -180,3 +209,15 @@ def get_projects(): print(f"OBS: getting list of projects ({apiurl})") return lib.osc.run_osc(["ls"]).output.rstrip().split("\n") + + +def get_last_rev(package): + print(f"OBS: getting latest revision of {proj}:{package}") + + url = f"/source/{proj}/{os.path.basename(package)}" + osc_ret = run_osc(["api", url]) + root = xml.etree.ElementTree.fromstring(osc_ret.output) + rev = root.get("rev") + if rev: + return int(rev) + return 0