You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
6.6 KiB
169 lines
6.6 KiB
From db57bf73d3e5e650b261834a0c39c9d368f9eeea Mon Sep 17 00:00:00 2001 |
|
From: Kyle Walker <kwalker@redhat.com> |
|
Date: Thu, 14 Dec 2017 11:46:03 -0500 |
|
Subject: [PATCH] core: Implement sync_with_progress() |
|
|
|
In similar fashion to the previous change, sync() operations can stall |
|
endlessly if cache is unable to be written out. In order to avoid an |
|
unbounded hang, the sync takes place within a child process. Every 10 |
|
seconds (SYNC_TIMEOUT_USEC), the value of /proc/meminfo "Dirty" is checked |
|
to verify it is smaller than the last iteration. If the sync is not making |
|
progress for 3 successive iterations (SYNC_PROGRESS_ATTEMPTS), a SIGKILL is |
|
sent to the sync process and the shutdown continues. |
|
|
|
(cherry picked from commit 73ad712fcfea5d8ba475044698d31d2c15d4180d) |
|
|
|
Related: #1571098 |
|
--- |
|
src/core/shutdown.c | 116 ++++++++++++++++++++++++++++++++++++++++++-- |
|
1 file changed, 111 insertions(+), 5 deletions(-) |
|
|
|
diff --git a/src/core/shutdown.c b/src/core/shutdown.c |
|
index 71f001ac13..0b0a54a7de 100644 |
|
--- a/src/core/shutdown.c |
|
+++ b/src/core/shutdown.c |
|
@@ -53,6 +53,9 @@ |
|
|
|
#define FINALIZE_ATTEMPTS 50 |
|
|
|
+#define SYNC_PROGRESS_ATTEMPTS 3 |
|
+#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC) |
|
+ |
|
static char* arg_verb; |
|
|
|
static int parse_argv(int argc, char *argv[]) { |
|
@@ -152,6 +155,102 @@ static int switch_root_initramfs(void) { |
|
return switch_root("/run/initramfs", "/oldroot", false, MS_BIND); |
|
} |
|
|
|
+/* Read the following fields from /proc/meminfo: |
|
+ * |
|
+ * NFS_Unstable |
|
+ * Writeback |
|
+ * Dirty |
|
+ * |
|
+ * Return true if the sum of these fields is greater than the previous |
|
+ * value input. For all other issues, report the failure and indicate that |
|
+ * the sync is not making progress. |
|
+ */ |
|
+static bool sync_making_progress(unsigned long long *prev_dirty) { |
|
+ _cleanup_fclose_ FILE *f = NULL; |
|
+ char line[LINE_MAX]; |
|
+ bool r = false; |
|
+ unsigned long long val = 0; |
|
+ |
|
+ f = fopen("/proc/meminfo", "re"); |
|
+ if (!f) |
|
+ return log_warning_errno(errno, "Failed to open /proc/meminfo: %m"); |
|
+ |
|
+ FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) { |
|
+ unsigned long long ull = 0; |
|
+ |
|
+ if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:")) |
|
+ continue; |
|
+ |
|
+ errno = 0; |
|
+ if (sscanf(line, "%*s %llu %*s", &ull) != 1) { |
|
+ if (errno != 0) |
|
+ log_warning_errno(errno, "Failed to parse /proc/meminfo: %m"); |
|
+ else |
|
+ log_warning("Failed to parse /proc/meminfo"); |
|
+ |
|
+ return false; |
|
+ } |
|
+ |
|
+ val += ull; |
|
+ } |
|
+ |
|
+ r = *prev_dirty > val; |
|
+ |
|
+ *prev_dirty = val; |
|
+ |
|
+ return r; |
|
+} |
|
+ |
|
+static void sync_with_progress(void) { |
|
+ unsigned checks; |
|
+ pid_t pid; |
|
+ int r; |
|
+ unsigned long long dirty = ULONG_LONG_MAX; |
|
+ |
|
+ BLOCK_SIGNALS(SIGCHLD); |
|
+ |
|
+ /* Due to the possiblity of the sync operation hanging, we fork |
|
+ * a child process and monitor the progress. If the timeout |
|
+ * lapses, the assumption is that that particular sync stalled. */ |
|
+ pid = fork(); |
|
+ if (pid < 0) { |
|
+ log_error_errno(errno, "Failed to fork: %m"); |
|
+ return; |
|
+ } |
|
+ |
|
+ if (pid == 0) { |
|
+ /* Start the sync operation here in the child */ |
|
+ sync(); |
|
+ _exit(EXIT_SUCCESS); |
|
+ } |
|
+ |
|
+ log_info("Syncing filesystems and block devices."); |
|
+ |
|
+ /* Start monitoring the sync operation. If more than |
|
+ * SYNC_PROGRESS_ATTEMPTS lapse without progress being made, |
|
+ * we assume that the sync is stalled */ |
|
+ for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) { |
|
+ r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC); |
|
+ if (r == 0) |
|
+ /* Sync finished without error. |
|
+ * (The sync itself does not return an error code) */ |
|
+ return; |
|
+ else if (r == -ETIMEDOUT) { |
|
+ /* Reset the check counter if the "Dirty" value is |
|
+ * decreasing */ |
|
+ if (sync_making_progress(&dirty)) |
|
+ checks = 0; |
|
+ } else { |
|
+ log_error_errno(r, "Failed to sync filesystems and block devices: %m"); |
|
+ return; |
|
+ } |
|
+ } |
|
+ |
|
+ /* Only reached in the event of a timeout. We should issue a kill |
|
+ * to the stray process. */ |
|
+ log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid); |
|
+ (void) kill(pid, SIGKILL); |
|
+} |
|
|
|
int main(int argc, char *argv[]) { |
|
bool need_umount, need_swapoff, need_loop_detach, need_dm_detach; |
|
@@ -202,6 +301,13 @@ int main(int argc, char *argv[]) { |
|
/* lock us into memory */ |
|
mlockall(MCL_CURRENT|MCL_FUTURE); |
|
|
|
+ /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that |
|
+ * slow IO is processed here already and the final process killing spree is not impacted by processes |
|
+ * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will |
|
+ * result. */ |
|
+ if (!in_container) |
|
+ sync_with_progress(); |
|
+ |
|
log_info("Sending SIGTERM to remaining processes..."); |
|
broadcast_signal(SIGTERM, true, true); |
|
|
|
@@ -338,12 +444,12 @@ int main(int argc, char *argv[]) { |
|
need_loop_detach ? " loop devices," : "", |
|
need_dm_detach ? " DM devices," : ""); |
|
|
|
- /* The kernel will automaticall flush ATA disks and suchlike |
|
- * on reboot(), but the file systems need to be synce'd |
|
- * explicitly in advance. So let's do this here, but not |
|
- * needlessly slow down containers. */ |
|
+ /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be |
|
+ * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we |
|
+ * sync'ed things already once above, but we did some more work since then which might have caused IO, hence |
|
+ * let's do it once more. Do not remove this sync, data corruption will result. */ |
|
if (!in_container) |
|
- sync(); |
|
+ sync_with_progress(); |
|
|
|
switch (cmd) { |
|
|
|
|