You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
313 lines
12 KiB
313 lines
12 KiB
From 7ec6e537898e139cc33017e03465ef40a86dd433 Mon Sep 17 00:00:00 2001 |
|
From: Lennart Poettering <lennart@poettering.net> |
|
Date: Tue, 19 Jul 2016 15:58:49 +0200 |
|
Subject: [PATCH] core: support percentage specifications on TasksMax= |
|
|
|
This adds support for a TasksMax=40% syntax for specifying values relative to |
|
the system's configured maximum number of processes. This is useful in order to |
|
neatly subdivide the available room for tasks within containers. |
|
|
|
Cherry-picked from: 83f8e80857090f63cf6a02c54d381dad3c0fad55 |
|
Related: #1337244 |
|
--- |
|
man/systemd.resource-control.xml | 15 +++---- |
|
src/core/dbus-cgroup.c | 22 +++++++++++ |
|
src/core/load-fragment.c | 15 +++++-- |
|
src/libsystemd/sd-bus/bus-util.c | 19 +++++++++ |
|
src/shared/util.c | 84 ++++++++++++++++++++++++++++++++++++++++ |
|
src/shared/util.h | 5 +++ |
|
src/test/test-util.c | 39 +++++++++++++++++++ |
|
7 files changed, 187 insertions(+), 12 deletions(-) |
|
|
|
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml |
|
index 217105ee5..f507c6748 100644 |
|
--- a/man/systemd.resource-control.xml |
|
+++ b/man/systemd.resource-control.xml |
|
@@ -221,15 +221,12 @@ |
|
<term><varname>TasksMax=<replaceable>N</replaceable></varname></term> |
|
|
|
<listitem> |
|
- <para>Specify the maximum number of tasks that may be |
|
- created in the unit. This ensures that the number of tasks |
|
- accounted for the unit (see above) stays below a specific |
|
- limit. If assigned the special value |
|
- <literal>infinity</literal> no tasks limit is applied. This |
|
- controls the <literal>pids.max</literal> control group |
|
- attribute. For details about this control group attribute, |
|
- see <ulink |
|
- url="https://www.kernel.org/doc/Documentation/cgroups/pids.txt">pids.txt</ulink>.</para> |
|
+ <para>Specify the maximum number of tasks that may be created in the unit. This ensures that the number of |
|
+ tasks accounted for the unit (see above) stays below a specific limit. This either takes an absolute number |
|
+ of tasks or a percentage value that is taken relative to the configured maximum number of tasks on the |
|
+ system. If assigned the special value <literal>infinity</literal>, no tasks limit is applied. This controls |
|
+ the <literal>pids.max</literal> control group attribute. For details about this control group attribute, see |
|
+ <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v1/pids.txt">pids.txt</ulink>.</para> |
|
|
|
<para>Implies <literal>TasksAccounting=true</literal>. The |
|
system default for this setting may be controlled with |
|
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c |
|
index a4465dc7a..fa76c60c1 100644 |
|
--- a/src/core/dbus-cgroup.c |
|
+++ b/src/core/dbus-cgroup.c |
|
@@ -694,6 +694,8 @@ int bus_cgroup_set_property( |
|
r = sd_bus_message_read(message, "t", &limit); |
|
if (r < 0) |
|
return r; |
|
+ if (limit <= 0) |
|
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is too small", name); |
|
|
|
if (mode != UNIT_CHECK) { |
|
c->tasks_max = limit; |
|
@@ -705,6 +707,26 @@ int bus_cgroup_set_property( |
|
unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu64, limit); |
|
} |
|
|
|
+ return 1; |
|
+ } else if (streq(name, "TasksMaxScale")) { |
|
+ uint64_t limit; |
|
+ uint32_t raw; |
|
+ |
|
+ r = sd_bus_message_read(message, "u", &raw); |
|
+ if (r < 0) |
|
+ return r; |
|
+ |
|
+ limit = system_tasks_max_scale(raw, UINT32_MAX); |
|
+ if (limit <= 0 || limit >= UINT64_MAX) |
|
+ return sd_bus_error_set_errnof(error, EINVAL, "%s= is out of range", name); |
|
+ |
|
+ if (mode != UNIT_CHECK) { |
|
+ c->tasks_max = limit; |
|
+ u->cgroup_realized_mask &= ~CGROUP_PIDS; |
|
+ unit_write_drop_in_private_format(u, mode, name, "TasksMax=%" PRIu32 "%%", |
|
+ (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX))); |
|
+ } |
|
+ |
|
return 1; |
|
} |
|
|
|
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c |
|
index c1ffee2c7..411475024 100644 |
|
--- a/src/core/load-fragment.c |
|
+++ b/src/core/load-fragment.c |
|
@@ -3089,9 +3089,18 @@ int config_parse_tasks_max( |
|
return 0; |
|
} |
|
|
|
- r = safe_atou64(rvalue, &u); |
|
- if (r < 0 || u < 1) { |
|
- log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); |
|
+ r = parse_percent(rvalue); |
|
+ if (r < 0) { |
|
+ r = safe_atou64(rvalue, &u); |
|
+ if (r < 0) { |
|
+ log_syntax(unit, LOG_ERR, filename, line, r, "Maximum tasks value '%s' invalid. Ignoring.", rvalue); |
|
+ return 0; |
|
+ } |
|
+ } else |
|
+ u = system_tasks_max_scale(r, 100U); |
|
+ |
|
+ if (u <= 0 || u >= UINT64_MAX) { |
|
+ log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range. Ignoring.", rvalue); |
|
return 0; |
|
} |
|
|
|
diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c |
|
index ed0849b63..f46fa2bbf 100644 |
|
--- a/src/libsystemd/sd-bus/bus-util.c |
|
+++ b/src/libsystemd/sd-bus/bus-util.c |
|
@@ -1409,7 +1409,26 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen |
|
} |
|
|
|
r = sd_bus_message_append(m, "v", "t", (uint64_t) bytes); |
|
+ } else if (streq(field, "TasksMax")) { |
|
+ uint64_t t; |
|
+ |
|
+ if (isempty(eq) || streq(eq, "infinity")) |
|
+ t = (uint64_t) -1; |
|
+ else { |
|
+ r = parse_percent(eq); |
|
+ if (r >= 0) { |
|
+ r = sd_bus_message_append(m, "sv", "TasksMaxScale", "u", (uint32_t) (((uint64_t) UINT32_MAX * r) / 100U)); |
|
+ if (r < 0) |
|
+ return bus_log_create_error(r); |
|
+ } else { |
|
+ r = safe_atou64(eq, &t); |
|
+ if (r < 0) |
|
+ return log_error_errno(r, "Failed to parse maximum tasks specification %s", assignment); |
|
+ } |
|
+ |
|
+ } |
|
|
|
+ r = sd_bus_message_append(m, "sv", "TasksMax", "t", t); |
|
} else if (STR_IN_SET(field, "CPUShares", "BlockIOWeight")) { |
|
uint64_t u; |
|
|
|
diff --git a/src/shared/util.c b/src/shared/util.c |
|
index cadaddee3..bbb457759 100644 |
|
--- a/src/shared/util.c |
|
+++ b/src/shared/util.c |
|
@@ -94,6 +94,7 @@ |
|
#include "def.h" |
|
#include "sparse-endian.h" |
|
#include "conf-parser.h" |
|
+#include "cgroup-util.h" |
|
|
|
int saved_argc = 0; |
|
char **saved_argv = NULL; |
|
@@ -8707,3 +8708,86 @@ int extract_many_words(const char **p, const char *separators, ExtractFlags flag |
|
|
|
return c; |
|
} |
|
+ |
|
+int parse_percent_unbounded(const char *p) { |
|
+ const char *pc, *n; |
|
+ unsigned v; |
|
+ int r; |
|
+ |
|
+ pc = endswith(p, "%"); |
|
+ if (!pc) |
|
+ return -EINVAL; |
|
+ |
|
+ n = strndupa(p, pc - p); |
|
+ r = safe_atou(n, &v); |
|
+ if (r < 0) |
|
+ return r; |
|
+ |
|
+ return (int) v; |
|
+} |
|
+ |
|
+int parse_percent(const char *p) { |
|
+ int v; |
|
+ |
|
+ v = parse_percent_unbounded(p); |
|
+ if (v > 100) |
|
+ return -ERANGE; |
|
+ |
|
+ return v; |
|
+} |
|
+ |
|
+uint64_t system_tasks_max(void) { |
|
+ |
|
+#if SIZEOF_PID_T == 4 |
|
+#define TASKS_MAX ((uint64_t) (INT32_MAX-1)) |
|
+#elif SIZEOF_PID_T == 2 |
|
+#define TASKS_MAX ((uint64_t) (INT16_MAX-1)) |
|
+#else |
|
+#error "Unknown pid_t size" |
|
+#endif |
|
+ |
|
+ _cleanup_free_ char *value = NULL, *root = NULL; |
|
+ uint64_t a = TASKS_MAX, b = TASKS_MAX; |
|
+ |
|
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this |
|
+ * limit: |
|
+ * |
|
+ * a) the maximum value for the pid_t type |
|
+ * b) the cgroups pids_max attribute for the system |
|
+ * c) the kernel's configure maximum PID value |
|
+ * |
|
+ * And then pick the smallest of the three */ |
|
+ |
|
+ if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0) |
|
+ (void) safe_atou64(value, &a); |
|
+ |
|
+ if (cg_get_root_path(&root) >= 0) { |
|
+ free(value); |
|
+ value = NULL; |
|
+ |
|
+ if (cg_get_attribute("pids", root, "pids.max", &value) >= 0) |
|
+ (void) safe_atou64(value, &b); |
|
+ } |
|
+ |
|
+ return MIN3(TASKS_MAX, |
|
+ a <= 0 ? TASKS_MAX : a, |
|
+ b <= 0 ? TASKS_MAX : b); |
|
+} |
|
+ |
|
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { |
|
+ uint64_t t, m; |
|
+ |
|
+ assert(max > 0); |
|
+ |
|
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages |
|
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ |
|
+ |
|
+ t = system_tasks_max(); |
|
+ assert(t > 0); |
|
+ |
|
+ m = t * v; |
|
+ if (m / t != v) /* overflow? */ |
|
+ return UINT64_MAX; |
|
+ |
|
+ return m / max; |
|
+} |
|
diff --git a/src/shared/util.h b/src/shared/util.h |
|
index 12afcc342..f1b6c348f 100644 |
|
--- a/src/shared/util.h |
|
+++ b/src/shared/util.h |
|
@@ -1098,3 +1098,8 @@ typedef enum ExtractFlags { |
|
int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags); |
|
int extract_first_word_and_warn(const char **p, char **ret, const char *separators, ExtractFlags flags, const char *unit, const char *filename, unsigned line, const char *rvalue); |
|
int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) _sentinel_; |
|
+int parse_percent_unbounded(const char *p); |
|
+int parse_percent(const char *p); |
|
+ |
|
+uint64_t system_tasks_max(void); |
|
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max); |
|
diff --git a/src/test/test-util.c b/src/test/test-util.c |
|
index 9ae347b43..971f97d7c 100644 |
|
--- a/src/test/test-util.c |
|
+++ b/src/test/test-util.c |
|
@@ -1530,6 +1530,43 @@ static void test_shell_maybe_quote(void) { |
|
test_shell_maybe_quote_one("foo$bar", "\"foo\\$bar\""); |
|
} |
|
|
|
+static void test_system_tasks_max(void) { |
|
+ uint64_t t; |
|
+ |
|
+ t = system_tasks_max(); |
|
+ assert_se(t > 0); |
|
+ assert_se(t < UINT64_MAX); |
|
+ |
|
+ log_info("Max tasks: %" PRIu64, t); |
|
+} |
|
+ |
|
+static void test_system_tasks_max_scale(void) { |
|
+ uint64_t t; |
|
+ |
|
+ t = system_tasks_max(); |
|
+ |
|
+ assert_se(system_tasks_max_scale(0, 100) == 0); |
|
+ assert_se(system_tasks_max_scale(100, 100) == t); |
|
+ |
|
+ assert_se(system_tasks_max_scale(0, 1) == 0); |
|
+ assert_se(system_tasks_max_scale(1, 1) == t); |
|
+ assert_se(system_tasks_max_scale(2, 1) == 2*t); |
|
+ |
|
+ assert_se(system_tasks_max_scale(0, 2) == 0); |
|
+ assert_se(system_tasks_max_scale(1, 2) == t/2); |
|
+ assert_se(system_tasks_max_scale(2, 2) == t); |
|
+ assert_se(system_tasks_max_scale(3, 2) == (3*t)/2); |
|
+ assert_se(system_tasks_max_scale(4, 2) == t*2); |
|
+ |
|
+ assert_se(system_tasks_max_scale(0, UINT32_MAX) == 0); |
|
+ assert_se(system_tasks_max_scale((UINT32_MAX-1)/2, UINT32_MAX-1) == t/2); |
|
+ assert_se(system_tasks_max_scale(UINT32_MAX, UINT32_MAX) == t); |
|
+ |
|
+ /* overflow */ |
|
+ |
|
+ assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX); |
|
+} |
|
+ |
|
int main(int argc, char *argv[]) { |
|
log_parse_environment(); |
|
log_open(); |
|
@@ -1608,6 +1645,8 @@ int main(int argc, char *argv[]) { |
|
test_uid_ptr(); |
|
test_sparse_write(); |
|
test_shell_maybe_quote(); |
|
+ test_system_tasks_max(); |
|
+ test_system_tasks_max_scale(); |
|
|
|
return 0; |
|
}
|
|
|