osmith submitted this change.

View Change

Approvals:
  laforge: Looks good to me, but someone else must approve
  pespin: Looks good to me, but someone else must approve
  osmith: Looks good to me, approved
  Jenkins Builder: Verified

testenv: don't use all RAM in 5gc testsuite build

When building the 5gc testsuite with as many parallel jobs as CPU cores
are available (the default), on at least Pau's and my system, all RAM
gets consumed and the system becomes unusable. On other systems, an
out-of-memory killer might kill the compiler, which is also not great.
This can currently only be avoided if the user remembers to pass "-j4"
to testenv, to ensure that only 4 jobs run in parallel.

Add a new max_jobs_per_gb_ram= option to 5gc/testenv.cfg. Set it to 0.3,
so on e.g. systems with 15 GiB of RAM, we get 4 jobs as maximum.

Change-Id: I1b9c9a2e3fa244337342e95457ac6d3a8caa0ae6
---
M 5gc/testenv.cfg
M _testenv/README.md
M _testenv/testenv/__init__.py
M _testenv/testenv/testenv_cfg.py
M _testenv/testenv/testsuite.py
5 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/5gc/testenv.cfg b/5gc/testenv.cfg
index d70c135..876aacf 100644
--- a/5gc/testenv.cfg
+++ b/5gc/testenv.cfg
@@ -1,5 +1,6 @@
 [testsuite]
 titan_min=11.1.0
+max_jobs_per_gb_ram=0.3
 program=C5G_Tests
 config=C5G_Tests.cfg
 copy=testsuite_prepare.sh
diff --git a/_testenv/README.md b/_testenv/README.md
index ecc8701..072dcd2 100644
--- a/_testenv/README.md
+++ b/_testenv/README.md
@@ -89,6 +89,11 @@
   depending on when the script runs. The script will not run on crash if podman
   is used, as the container gets shutdown beforehand.
 
+* `max_jobs_per_gb_ram=`: optional value that can be set to reduce the amount
+  of parallel jobs when compiling the testsuite. This is set in the 5gc
+  testsuite to avoid consuming the whole RAM and freezing (or possibly getting
+  killed from an out-of-memory daemon).
+
 #### Component section
 
 * `program=`: executable for starting a test component, may contain arguments.
diff --git a/_testenv/testenv/__init__.py b/_testenv/testenv/__init__.py
index 44e0e92..c9e63b4 100644
--- a/_testenv/testenv/__init__.py
+++ b/_testenv/testenv/__init__.py
@@ -125,6 +125,7 @@
         "--jobs",
         help="number of jobs to run simultaneously (default: nproc)",
         type=int,
+        default=os.cpu_count(),
     )
     group.add_argument(
         "-a",
diff --git a/_testenv/testenv/testenv_cfg.py b/_testenv/testenv/testenv_cfg.py
index c687d88..9edc102 100644
--- a/_testenv/testenv/testenv_cfg.py
+++ b/_testenv/testenv/testenv_cfg.py
@@ -5,9 +5,11 @@
 import fnmatch
 import glob
 import logging
+import math
 import os.path
 import sys
 import testenv
+import traceback
 
 cfgs = {}
 current = None
@@ -147,6 +149,64 @@
     return get_titan_version(cfg)
 
 
+def verify_max_jobs_per_gb_ram(cfgs_all):
+    error = False
+    max_jobs_per_gb_ram = None
+
+    for cfg_name, cfg in cfgs_all.items():
+        cfg_max = cfg["testsuite"].get("max_jobs_per_gb_ram", None)
+        if cfg_max is not None:
+            if max_jobs_per_gb_ram is not None:
+                if cfg_max != max_jobs_per_gb_ram:
+                    error = True
+                    break
+            else:
+                max_jobs_per_gb_ram = cfg_max
+        elif max_jobs_per_gb_ram:
+            error = True
+            break
+
+    if not error:
+        return
+
+    logging.error("Found different max_jobs_per_gb_ram= values in testenv.cfg files of the same directory.")
+    logging.error("This is not supported, please fix it.")
+    sys.exit(1)
+
+
+def get_titan_make_job_count():
+    _, cfg = next(iter(cfgs.items()))
+    max_jobs_per_gb_ram = cfg["testsuite"].get("max_jobs_per_gb_ram", None)
+    max_jobs = None
+
+    if max_jobs_per_gb_ram:
+        try:
+            gb_ram = 0
+            with open("/proc/meminfo") as f:
+                line = f.readline()
+                # Parse e.g. "MemTotal:       15571604 kB"
+                if line.startswith("MemTotal:"):
+                    gb_ram = int(line.split(" ")[-2]) / 1024 / 1024
+                    logging.debug(f"Building with {round(gb_ram, 2)} GB of RAM")
+            max_jobs = math.floor(gb_ram * float(max_jobs_per_gb_ram))
+            if max_jobs < 1:
+                raise RuntimeError(f"max_jobs is invalid: max_jobs={max_jobs}, gb_ram={gb_ram}")
+
+        except Exception as ex:
+            traceback.print_exception(type(ex), ex, ex.__traceback__)
+            logging.error(f"Calculating max jobs with max_jobs_per_gb_ram={max_jobs_per_gb_ram} failed, assuming 4")
+            max_jobs = 4
+
+    if max_jobs and max_jobs < testenv.args.jobs:
+        logging.info(
+            f"Using only {max_jobs} jobs instead of {testenv.args.jobs} because of"
+            f" max_jobs_per_gb_ram={max_jobs_per_gb_ram} in testenv.cfg"
+        )
+        return max_jobs
+
+    return testenv.args.jobs
+
+
 def verify_qemu_cfgs():
     """Check if passed -C or -K args make sense with the testenv configs."""
     testsuite = testenv.args.testsuite
@@ -194,6 +254,7 @@
         "prepare",
         "program",
         "titan_min",
+        "max_jobs_per_gb_ram",
     ]
     keys_valid_component = [
         "clean",
@@ -346,6 +407,8 @@
 
         cfgs_all[basename] = cfg
 
+    verify_max_jobs_per_gb_ram(cfgs_all)
+
     # Select configs based on --config argument(s)
     for config_arg in testenv.args.config:
         if config_arg == "all":
diff --git a/_testenv/testenv/testsuite.py b/_testenv/testenv/testsuite.py
index 0e00b0f..0007c57 100644
--- a/_testenv/testenv/testsuite.py
+++ b/_testenv/testenv/testsuite.py
@@ -58,8 +58,7 @@
     logging.info(f"Building testsuite (eclipse-titan {titan_version}, {titan_reason})")
 
     env = copy.copy(builddir_env)
-    if testenv.args.jobs:
-        env["PARALLEL_MAKE"] = f"-j{testenv.args.jobs}"
+    env["PARALLEL_MAKE"] = f"-j{testenv.testenv_cfg.get_titan_make_job_count()}"
 
     testenv.cmd.run(["make", testenv.args.testsuite], cwd=testenv.ttcn3_hacks_dir, env=env)

To view, visit change 40968. To unsubscribe, or for help writing mail filters, visit settings.