You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

313 lines
12 KiB

From 7ec6e537898e139cc33017e03465ef40a86dd433 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 19 Jul 2016 15:58:49 +0200
Subject: [PATCH] core: support percentage specifications on TasksMax=
This adds support for a TasksMax=40% syntax for specifying values relative to
the system's configured maximum number of processes. This is useful in order to
neatly subdivide the available room for tasks within containers.
Cherry-picked from: 83f8e80857090f63cf6a02c54d381dad3c0fad55
Related: #1337244
---
man/systemd.resource-control.xml | 15 +++---
src/core/dbus-cgroup.c | 22 +++++++++
src/core/load-fragment.c | 15 ++++--
src/libsystemd/sd-bus/bus-util.c | 19 ++++++++
src/shared/util.c | 84 ++++++++++++++++++++++++++++++++
src/shared/util.h | 5 ++
src/test/test-util.c | 39 +++++++++++++++
7 files changed, 187 insertions(+), 12 deletions(-)
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 217105ee5a..f507c67487 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -221,15 +221,12 @@
<term><varname>TasksMax=<replaceable>N</replaceable></varname></term>
<listitem>
- <para>Specify the maximum number of tasks that may be
- created in the unit. This ensures that the number of tasks
- accounted for the unit (see above) stays below a specific
- limit. If assigned the special value
- <literal>infinity</literal> no tasks limit is applied. This
- controls the <literal>pids.max</literal> control group
- attribute. For details about this control group attribute,
- see <ulink
- url="https://www.kernel.org/doc/Documentation/cgroups/pids.txt">pids.txt</ulink>.</para>
+ <para>Specify the maximum number of tasks that may be created in the unit. This ensures that the number of
+ tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number
+ of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the
+ system. If assigned the special value <literal>infinity</literal>, no tasks limit is applied. This controls
+ the <literal>pids.max</literal> control group attribute. For details about this control group attribute, see
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para>
<para>Implies <literal>TasksAccounting=true</literal>. The
system default for this setting may be controlled with
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index a4465dc7aa..fa76c60c1f 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -694,6 +694,8 @@ int bus_cgroup_set_property(
r = sd_bus_message_read(message, "t", &limit);
if (r < 0)
return r;
+ if (limit <= 0)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name);
if (mode != UNIT_CHECK) {
c->tasks_max = limit;
@@ -705,6 +707,26 @@ int bus_cgroup_set_property(
unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit);
}
+ return 1;
+ } else if (streq(name, "TasksMaxScale")) {
+ uint64_t limit;
+ uint32_t raw;
+
+ r = sd_bus_message_read(message, "u", &raw);
+ if (r < 0)
+ return r;
+
+ limit = system_tasks_max_scale(raw, UINT32_MAX);
+ if (limit <= 0 || limit >= UINT64_MAX)
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name);
+
+ if (mode != UNIT_CHECK) {
+ c->tasks_max = limit;
+ u->cgroup_realized_mask &= ~CGROUP_PIDS;
+ unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%",
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
+ }
+
return 1;
}
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index c1ffee2c7e..4114750244 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -3089,9 +3089,18 @@ int config_parse_tasks_max(
return 0;
}
- r = safe_atou64(rvalue, &u);
- if (r < 0 || u < 1) {
- log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ r = parse_percent(rvalue);
+ if (r < 0) {
+ r = safe_atou64(rvalue, &u);
+ if (r < 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue);
+ return 0;
+ }
+ } else
+ u = system_tasks_max_scale(r, 100U);
+
+ if (u <= 0 || u >= UINT64_MAX) {
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue);
return 0;
}
diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c
index ed0849b638..f46fa2bbf3 100644
--- a/src/libsystemd/sd-bus/bus-util.c
+++ b/src/libsystemd/sd-bus/bus-util.c
@@ -1409,7 +1409,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
}
r = sd_bus_message_append(m, "v", "t", (uint64_t) bytes);
+ } else if (streq(field, "TasksMax")) {
+ uint64_t t;
+
+ if (isempty(eq) || streq(eq, "infinity"))
+ t = (uint64_t) -1;
+ else {
+ r = parse_percent(eq);
+ if (r >= 0) {
+ r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U));
+ if (r < 0)
+ return bus_log_create_error(r);
+ } else {
+ r = safe_atou64(eq, &t);
+ if (r < 0)
+ return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment);
+ }
+
+ }
+ r = sd_bus_message_append(m, "sv", "TasksMax", "t", t);
} else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) {
uint64_t u;
diff --git a/src/shared/util.c b/src/shared/util.c
index cadaddee32..bbb4577590 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -94,6 +94,7 @@
#include "def.h"
#include "sparse-endian.h"
#include "conf-parser.h"
+#include "cgroup-util.h"
int saved_argc = 0;
char **saved_argv = NULL;
@@ -8707,3 +8708,86 @@ int extract_many_words(const char **p, const char *separators, ExtractFlags flag
return c;
}
+
+int parse_percent_unbounded(const char *p) {
+ const char *pc, *n;
+ unsigned v;
+ int r;
+
+ pc = endswith(p, "%");
+ if (!pc)
+ return -EINVAL;
+
+ n = strndupa(p, pc - p);
+ r = safe_atou(n, &v);
+ if (r < 0)
+ return r;
+
+ return (int) v;
+}
+
+int parse_percent(const char *p) {
+ int v;
+
+ v = parse_percent_unbounded(p);
+ if (v > 100)
+ return -ERANGE;
+
+ return v;
+}
+
+uint64_t system_tasks_max(void) {
+
+#if SIZEOF_PID_T == 4
+#define TASKS_MAX ((uint64_t) (INT32_MAX-1))
+#elif SIZEOF_PID_T == 2
+#define TASKS_MAX ((uint64_t) (INT16_MAX-1))
+#else
+#error "Unknown pid_t size"
+#endif
+
+ _cleanup_free_ char *value = NULL, *root = NULL;
+ uint64_t a = TASKS_MAX, b = TASKS_MAX;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
+ * limit:
+ *
+ * a) the maximum value for the pid_t type
+ * b) the cgroups pids_max attribute for the system
+ * c) the kernel's configure maximum PID value
+ *
+ * And then pick the smallest of the three */
+
+ if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
+ (void) safe_atou64(value, &a);
+
+ if (cg_get_root_path(&root) >= 0) {
+ free(value);
+ value = NULL;
+
+ if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
+ (void) safe_atou64(value, &b);
+ }
+
+ return MIN3(TASKS_MAX,
+ a <= 0 ? TASKS_MAX : a,
+ b <= 0 ? TASKS_MAX : b);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ m = t * v;
+ if (m / t != v) /* overflow? */
+ return UINT64_MAX;
+
+ return m / max;
+}
diff --git a/src/shared/util.h b/src/shared/util.h
index 12afcc3429..f1b6c348f8 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -1098,3 +1098,8 @@ typedef enum ExtractFlags {
int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags);
int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue);
int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) _sentinel_;
+int parse_percent_unbounded(const char *p);
+int parse_percent(const char *p);
+
+uint64_t system_tasks_max(void);
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
diff --git a/src/test/test-util.c b/src/test/test-util.c
index 9ae347b434..971f97d7c3 100644
--- a/src/test/test-util.c
+++ b/src/test/test-util.c
@@ -1530,6 +1530,43 @@ static void test_shell_maybe_quote(void) {
test_shell_maybe_quote_one("foo$bar", "\"foo\\$bar\"");
}
+static void test_system_tasks_max(void) {
+ uint64_t t;
+
+ t = system_tasks_max();
+ assert_se(t > 0);
+ assert_se(t < UINT64_MAX);
+
+ log_info("Max tasks: %" PRIu64, t);
+}
+
+static void test_system_tasks_max_scale(void) {
+ uint64_t t;
+
+ t = system_tasks_max();
+
+ assert_se(system_tasks_max_scale(0, 100) == 0);
+ assert_se(system_tasks_max_scale(100, 100) == t);
+
+ assert_se(system_tasks_max_scale(0, 1) == 0);
+ assert_se(system_tasks_max_scale(1, 1) == t);
+ assert_se(system_tasks_max_scale(2, 1) == 2*t);
+
+ assert_se(system_tasks_max_scale(0, 2) == 0);
+ assert_se(system_tasks_max_scale(1, 2) == t/2);
+ assert_se(system_tasks_max_scale(2, 2) == t);
+ assert_se(system_tasks_max_scale(3, 2) == (3*t)/2);
+ assert_se(system_tasks_max_scale(4, 2) == t*2);
+
+ assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0);
+ assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2);
+ assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t);
+
+ /* overflow */
+
+ assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
+}
+
int main(int argc, char *argv[]) {
log_parse_environment();
log_open();
@@ -1608,6 +1645,8 @@ int main(int argc, char *argv[]) {
test_uid_ptr();
test_sparse_write();
test_shell_maybe_quote();
+ test_system_tasks_max();
+ test_system_tasks_max_scale();
return 0;
}