You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
132 lines
3.7 KiB
132 lines
3.7 KiB
6 years ago
|
From hbathini at linux.vnet.ibm.com Thu Aug 17 05:31:51 2017
|
||
|
From: hbathini at linux.vnet.ibm.com (Hari Bathini)
|
||
|
Date: Thu, 17 Aug 2017 18:01:51 +0530
|
||
|
Subject: [PATCH] kexec-tools: ppc64: avoid adding coherent memory regions to
|
||
|
crash memory ranges
|
||
|
Message-ID: <150297311110.25328.11468130779639120510.stgit@hbathini.in.ibm.com>
|
||
|
Content-Length: 3407
|
||
|
Lines: 121
|
||
|
|
||
|
Accelerator devices like GPU and FPGA cards contain onboard memory. This
|
||
|
onboard memory is represented as a memory only NUMA node, integrating it
|
||
|
with core memory subsystem. Since, the link through which these devices
|
||
|
are integrated to core memory goes down after a system crash and they are
|
||
|
meant for user workloads, avoid adding coherent device memory regions to
|
||
|
crash memory ranges. Without this change, makedumpfile tool tries to save
|
||
|
unaccessible coherent device memory regions, crashing the system.
|
||
|
|
||
|
Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
|
||
|
---
|
||
|
kexec/arch/ppc64/crashdump-ppc64.c | 64 +++++++++++++++++++++++++++++++++++-
|
||
|
kexec/arch/ppc64/kexec-ppc64.h | 1 +
|
||
|
2 files changed, 63 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c
|
||
|
index 13995bf..7ea3983 100644
|
||
|
--- a/kexec/arch/ppc64/crashdump-ppc64.c
|
||
|
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
|
||
|
@@ -181,6 +181,53 @@ static int get_dyn_reconf_crash_memory_ranges(void)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/*
|
||
|
+ * For a given memory node, check if it is mapped to system RAM or
|
||
|
+ * to onboard memory on accelerator device like GPU card or such.
|
||
|
+ */
|
||
|
+static int is_coherent_device_mem(const char *fname)
|
||
|
+{
|
||
|
+ char fpath[PATH_LEN];
|
||
|
+ char buf[32];
|
||
|
+ DIR *dmem;
|
||
|
+ FILE *file;
|
||
|
+ struct dirent *mentry;
|
||
|
+ int cnt, ret = 0;
|
||
|
+
|
||
|
+ strcpy(fpath, fname);
|
||
|
+ if ((dmem = opendir(fpath)) == NULL) {
|
||
|
+ perror(fpath);
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ while ((mentry = readdir(dmem)) != NULL) {
|
||
|
+ if (strcmp(mentry->d_name, "compatible"))
|
||
|
+ continue;
|
||
|
+
|
||
|
+ strcat(fpath, "/compatible");
|
||
|
+ if ((file = fopen(fpath, "r")) == NULL) {
|
||
|
+ perror(fpath);
|
||
|
+ ret = -1;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ if ((cnt = fread(buf, 1, 32, file)) < 0) {
|
||
|
+ perror(fpath);
|
||
|
+ fclose(file);
|
||
|
+ ret = -1;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ if (!strncmp(buf, "ibm,coherent-device-memory", 26)) {
|
||
|
+ ret = 1;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ fclose(file);
|
||
|
+ }
|
||
|
+
|
||
|
+ closedir(dmem);
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
/* Reads the appropriate file and retrieves the SYSTEM RAM regions for whom to
|
||
|
* create Elf headers. Keeping it separate from get_memory_ranges() as
|
||
|
* requirements are different in the case of normal kexec and crashdumps.
|
||
|
@@ -196,12 +243,12 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
|
||
|
{
|
||
|
|
||
|
char device_tree[256] = "/proc/device-tree/";
|
||
|
- char fname[256];
|
||
|
+ char fname[PATH_LEN];
|
||
|
char buf[MAXBYTES];
|
||
|
DIR *dir, *dmem;
|
||
|
FILE *file;
|
||
|
struct dirent *dentry, *mentry;
|
||
|
- int n, crash_rng_len = 0;
|
||
|
+ int n, ret, crash_rng_len = 0;
|
||
|
unsigned long long start, end;
|
||
|
int page_size;
|
||
|
|
||
|
@@ -240,6 +287,19 @@ static int get_crash_memory_ranges(struct memory_range **range, int *ranges)
|
||
|
continue;
|
||
|
strcpy(fname, device_tree);
|
||
|
strcat(fname, dentry->d_name);
|
||
|
+
|
||
|
+ ret = is_coherent_device_mem(fname);
|
||
|
+ if (ret == -1) {
|
||
|
+ closedir(dir);
|
||
|
+ goto err;
|
||
|
+ } else if (ret == 1) {
|
||
|
+ /*
|
||
|
+ * Avoid adding this memory region as it is not
|
||
|
+ * mapped to system RAM.
|
||
|
+ */
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+
|
||
|
if ((dmem = opendir(fname)) == NULL) {
|
||
|
perror(fname);
|
||
|
closedir(dir);
|
||
|
diff --git a/kexec/arch/ppc64/kexec-ppc64.h b/kexec/arch/ppc64/kexec-ppc64.h
|
||
|
index 633ae77..434b4bf 100644
|
||
|
--- a/kexec/arch/ppc64/kexec-ppc64.h
|
||
|
+++ b/kexec/arch/ppc64/kexec-ppc64.h
|
||
|
@@ -1,6 +1,7 @@
|
||
|
#ifndef KEXEC_PPC64_H
|
||
|
#define KEXEC_PPC64_H
|
||
|
|
||
|
+#define PATH_LEN 256
|
||
|
#define MAXBYTES 128
|
||
|
#define MAX_LINE 160
|
||
|
#define CORE_TYPE_ELF32 1
|
||
|
|
||
|
|
||
|
|