osmith has uploaded this change for review. (
https://gerrit.osmocom.org/c/osmo-ci/+/39913?usp=email )
Change subject: OBS: cleanup: new script
......................................................................
OBS: cleanup: new script
Add a script for cleaning old sources uploaded to OBS. This was needed
as we were running out of space on the OBS server, due to source
tarballs being generated in nightly jobs (e.g. Osmocom_OBS_nightly and
never getting deleted). This script is now running automatically every
day.
The script does the following:
* Iterate over all OBS projects and packages inside the projects.
* Delete all source files matching *.dsc, *.tar.xz in old revisions from
disk, which are not in the most recent revision anymore.
* Write a marker file with the revisions already iterated through (per
project+package) and skip those on the next run.
Related: SYS#7407
Change-Id: I4649dedf0bb52f0bbdc7577ac0bc9e93cb3be192
---
A .ruff.toml
M lint/lint_diff.sh
A scripts/obs/cleanup.py
M scripts/obs/lib/osc.py
4 files changed, 198 insertions(+), 0 deletions(-)
git pull ssh://gerrit.osmocom.org:29418/osmo-ci refs/changes/13/39913/1
diff --git a/.ruff.toml b/.ruff.toml
new file mode 100644
index 0000000..4194a9f
--- /dev/null
+++ b/.ruff.toml
@@ -0,0 +1,2 @@
+line-length = 120
+include = ["scripts/obs/cleanup.py"]
diff --git a/lint/lint_diff.sh b/lint/lint_diff.sh
index 408d79f..28bb922 100755
--- a/lint/lint_diff.sh
+++ b/lint/lint_diff.sh
@@ -88,6 +88,7 @@
"
local format_projects="
osmo-ttcn3-hacks
+ osmo-ci
"
if ! command -v ruff >/dev/null; then
diff --git a/scripts/obs/cleanup.py b/scripts/obs/cleanup.py
new file mode 100755
index 0000000..3365e34
--- /dev/null
+++ b/scripts/obs/cleanup.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright 2025 sysmocom - s.f.m.c. GmbH <info(a)sysmocom.de>
+import argparse
+import fnmatch
+import hashlib
+import lib
+import lib.osc
+import os
+import sys
+import time
+
+# Only delete files that were created by the Osmocom_OBS_* jenkins jobs
+safe_to_delete_patterns = [
+ "*.dsc",
+ "*.tar.xz",
+]
+
+cache_dir = os.path.expanduser("~/.cache/osmo_ci_obs_cleanup")
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Clean up old sources to free up
space.")
+
+ parser.add_argument("-P", "--project", help="optional path
to a specific project (e.g. osmocom:master)")
+
+ parser.add_argument("-p", "--package", help="optional path
to a specific package (e.g. osmo-mgw)")
+
+ parser.add_argument("-k", "--keep-revisions", type=int,
default=5, help="how many revisions to keep (default: 5)")
+
+ parser.add_argument(
+ "-s", "--sleep", type=int, default=0, help="how many
seconds to sleep between queries (default: 0)"
+ )
+
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help="always print shell commands and their output, instead of only printing
them on error",
+ )
+ parser.add_argument(
+ "-A",
+ "--apiurl",
+ help="OBS API URL or .oscrc alias (default:
https://obs.osmocom.org)"quot;,
+ default="https://obs.osmocom.org",
+ )
+
+ args = parser.parse_args()
+ lib.set_args(args)
+ lib.osc.set_apiurl(args.apiurl)
+
+
+def sleep():
+ if lib.args.sleep != 0:
+ time.sleep(lib.args.sleep)
+
+
+def get_projects():
+ if lib.args.project:
+ return [lib.args.project]
+ return lib.osc.get_projects()
+
+
+def get_packages(project):
+ if lib.args.package:
+ return [lib.args.package]
+ sleep()
+ return lib.osc.get_remote_pkgs()
+
+
+def check_safe_to_delete(source):
+ for pattern in safe_to_delete_patterns:
+ if fnmatch.fnmatch(source, pattern):
+ return True
+ return False
+
+
+def get_start_rev_file(project, package):
+ h = hashlib.new("sha512")
+ h.update(f"{project}/{package}".encode())
+ return f"{cache_dir}/{h.hexdigest()}"
+
+
+def get_start_rev(project, package):
+ f = get_start_rev_file(project, package)
+ if not os.path.exists(f):
+ return 1
+
+ with open(f, "r") as h:
+ return int(h.read().rstrip())
+
+
+def set_start_rev(project, package, rev):
+ f = get_start_rev_file(project, package)
+ with open(f, "w") as h:
+ h.write(f"{rev}\n")
+
+
+def clean_package(project, package):
+ sleep()
+ last_rev = lib.osc.get_last_rev(package)
+ if last_rev <= lib.args.keep_revisions:
+ return
+
+ sleep()
+ sources_current = lib.osc.get_package_sources(package, last_rev)
+
+ start = get_start_rev(project, package)
+ end = last_rev - lib.args.keep_revisions + 1
+ for rev in range(start, end):
+ print(f" checking rev {rev}/{last_rev}")
+ sleep()
+ sources_rev = lib.osc.get_package_sources(package, rev)
+ for source in sources_rev:
+ if source not in sources_current:
+ assert "/" not in package
+ assert "/" not in source
+ path = f"/srv/obs/sources/{package}/{source}"
+ if check_safe_to_delete(source) and os.path.exists(path):
+ print(f" rm {source}")
+ lib.run_cmd(["rm", path])
+ set_start_rev(project, package, rev)
+
+
+def main():
+ if not os.path.exists("/srv/obs/sources"):
+ print("ERROR: this script needs to run on an OBS server!")
+ sys.exit(1)
+
+ lib.run_cmd(["mkdir", "-p", cache_dir])
+
+ parse_args()
+ lib.osc.check_oscrc()
+
+ for project in get_projects():
+ if ":" not in project:
+ continue
+
+ lib.osc.set_apiurl(lib.args.apiurl, project)
+
+ packages = get_packages(project)
+ if len(packages) == 1 and packages[0] == "":
+ continue
+
+ for package in packages:
+ clean_package(project, package)
+
+
+if __name__ == "__main__":
+ main()
+ print("Success")
diff --git a/scripts/obs/lib/osc.py b/scripts/obs/lib/osc.py
index 8838b1a..3627bd2 100644
--- a/scripts/obs/lib/osc.py
+++ b/scripts/obs/lib/osc.py
@@ -8,6 +8,7 @@
import sys
import lib
import lib.config
+import xml.etree.ElementTree
apiurl = None
proj = None
@@ -90,6 +91,37 @@
return "0"
+def get_package_sources(package, rev=None):
+ # Use the API directly, because "osc list" throws an exception when trying
+ # to list a directory with deleted files.
+ url = f"/source/{proj}/{os.path.basename(package)}"
+ if rev:
+ url = f"{url}?rev={rev}"
+
+ osc_ret = run_osc(["api", url])
+ root = xml.etree.ElementTree.fromstring(osc_ret.output)
+
+ # === Output ===
+ # <directory name="open5gs" rev="1012" vrev="1"
srcmd5="d98c9f8faeada3e291aa2197ca7fda03">
+ # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.dsc"
md5="7101346f69282beda8c1e2c191fadd4e" size="2040"
mtime="1743367015"/>
+ # <entry name="open5gs_2.7.5.4648.7dfd.202503302026.tar.xz"
md5="71fc5f9a885204d38f712236684822ac" size="14531220"
mtime="1743367016"/>
+ # </directory
+
+ # === Output with already deleted files ===
+ # <directory name="open5gs" rev="1" vrev="1"
srcmd5="bcd19a5960921d5e30e99d988fffbd15">
+ # <entry name="open5gs_2.4.8.202206260026.dsc"
md5="bf154599a1493d23f2f7f8669c5adb7c" error="No such file or
directory"/>
+ # <entry name="open5gs_2.4.8.202206260026.tar.xz"
md5="3f26b59b342a35d80d5ac790ff0a8ff2" error="No such file or
directory"/>
+ # </directory>
+
+ ret = []
+ for entry in root.findall("entry"):
+ if entry.get("error"):
+ # already deleted
+ continue
+ ret += [f"{entry.get('md5')}-{entry.get('name')}"]
+ return ret
+
+
def create_package(package):
print(f"{package}: creating new OBS package")
@@ -180,3 +212,15 @@
def get_projects():
print(f"OBS: getting list of projects ({apiurl})")
return lib.osc.run_osc(["ls"]).output.rstrip().split("\n")
+
+
+def get_last_rev(package):
+ print(f"OBS: getting latest revision of {proj}:{package}")
+
+ url = f"/source/{proj}/{os.path.basename(package)}"
+ osc_ret = run_osc(["api", url])
+ root = xml.etree.ElementTree.fromstring(osc_ret.output)
+ rev = root.get("rev")
+ if rev:
+ return int(rev)
+ return 0
--
To view, visit
https://gerrit.osmocom.org/c/osmo-ci/+/39913?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.osmocom.org/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: osmo-ci
Gerrit-Branch: master
Gerrit-Change-Id: I4649dedf0bb52f0bbdc7577ac0bc9e93cb3be192
Gerrit-Change-Number: 39913
Gerrit-PatchSet: 1
Gerrit-Owner: osmith <osmith(a)sysmocom.de>