You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
807 lines
25 KiB
807 lines
25 KiB
commit 38d48ed48f9d0baa20786d98abe2b4085fca7d5d |
|
Author: Luck, Tony <tony.luck@intel.com> |
|
Date: Mon Aug 4 13:29:01 2014 -0700 |
|
|
|
rasdaemon: Add support for extlog trace events |
|
|
|
Linux kernel 3.17 includes a new trace event to pick up extended |
|
error logs produced by BIOS in the Common Platform Error Record |
|
format described in appendix N of the UEFI standard. This patch |
|
adds support to collect that information and log it both in |
|
readable ASCII and into the sqlite3 database that rasdaemon |
|
uses to store all error information. In addition ras-mc-ctl |
|
is updated to query that database for both detailed and summary |
|
reports. |
|
|
|
Big thanks to Aristeu for pretty much all the sqlite3 pieces, |
|
plus testing and fixing miscellaneous issues elsewhere. |
|
|
|
Signed-off-by: Tony Luck <tony.luck@intel.com> |
|
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
|
|
|
diff --git a/Makefile.am b/Makefile.am |
|
index 0fa615f..117c970 100644 |
|
--- a/Makefile.am |
|
+++ b/Makefile.am |
|
@@ -30,13 +30,17 @@ if WITH_MCE |
|
mce-intel-dunnington.c mce-intel-tulsa.c \ |
|
mce-intel-sb.c mce-intel-ivb.c |
|
endif |
|
+if WITH_EXTLOG |
|
+ rasdaemon_SOURCES += ras-extlog-handler.c |
|
+endif |
|
if WITH_ABRT_REPORT |
|
rasdaemon_SOURCES += ras-report.c |
|
endif |
|
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a |
|
|
|
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ |
|
- ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h |
|
+ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ |
|
+ ras-extlog-handler.h |
|
|
|
# This rule can't be called with more than one Makefile job (like make -j8) |
|
# I can't figure out a way to fix that |
|
diff --git a/configure.ac b/configure.ac |
|
index 64a5b13..9495491 100644 |
|
--- a/configure.ac |
|
+++ b/configure.ac |
|
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [ |
|
]) |
|
AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes]) |
|
|
|
+AC_ARG_ENABLE([extlog], |
|
+ AS_HELP_STRING([--enable-extlog], [enable EXTLOG events (currently experimental)])) |
|
+ |
|
+AS_IF([test "x$enable_extlog" = "xyes"], [ |
|
+ AC_DEFINE(HAVE_EXTLOG,1,"have EXTLOG events collect") |
|
+ AC_SUBST([WITH_EXTLOG]) |
|
+]) |
|
+AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes]) |
|
+ |
|
AC_ARG_ENABLE([abrt_report], |
|
AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) |
|
|
|
diff --git a/ras-aer-handler.c b/ras-aer-handler.c |
|
index 50526af..bb7c0b9 100644 |
|
--- a/ras-aer-handler.c |
|
+++ b/ras-aer-handler.c |
|
@@ -70,7 +70,7 @@ int ras_aer_event_handler(struct trace_seq *s, |
|
*/ |
|
|
|
if (ras->use_uptime) |
|
- now = record->ts/1000000000L + ras->uptime_diff; |
|
+ now = record->ts/user_hz + ras->uptime_diff; |
|
else |
|
now = time(NULL); |
|
|
|
diff --git a/ras-events.c b/ras-events.c |
|
index ecbbd3a..0be7c3f 100644 |
|
--- a/ras-events.c |
|
+++ b/ras-events.c |
|
@@ -30,6 +30,7 @@ |
|
#include "ras-mc-handler.h" |
|
#include "ras-aer-handler.h" |
|
#include "ras-mce-handler.h" |
|
+#include "ras-extlog-handler.h" |
|
#include "ras-record.h" |
|
#include "ras-logger.h" |
|
|
|
@@ -203,6 +204,10 @@ int toggle_ras_mc_event(int enable) |
|
rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable); |
|
#endif |
|
|
|
+#ifdef HAVE_EXTLOG |
|
+ rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); |
|
+#endif |
|
+ |
|
free_ras: |
|
free(ras); |
|
return rc; |
|
@@ -688,6 +693,19 @@ int handle_ras_events(int record_events) |
|
"mce", "mce_record"); |
|
} |
|
#endif |
|
+ |
|
+#ifdef HAVE_EXTLOG |
|
+ rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", |
|
+ ras_extlog_mem_event_handler); |
|
+ if (!rc) { |
|
+ /* tell kernel we are listening, so don't printk to console */ |
|
+ (void)open("/sys/kernel/debug/ras/daemon_active", 0); |
|
+ num_events++; |
|
+ } else |
|
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", |
|
+ "ras", "aer_event"); |
|
+#endif |
|
+ |
|
if (!num_events) { |
|
log(ALL, LOG_INFO, |
|
"Failed to trace all supported RAS events. Aborting.\n"); |
|
diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c |
|
new file mode 100644 |
|
index 0000000..5fd3580 |
|
--- /dev/null |
|
+++ b/ras-extlog-handler.c |
|
@@ -0,0 +1,246 @@ |
|
+/* |
|
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com> |
|
+ * |
|
+ * This program is free software; you can redistribute it and/or modify |
|
+ * it under the terms of the GNU General Public License as published by |
|
+ * the Free Software Foundation; either version 2 of the License, or |
|
+ * (at your option) any later version. |
|
+ * |
|
+ * This program is distributed in the hope that it will be useful, |
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
+ * GNU General Public License for more details. |
|
+ * |
|
+ * You should have received a copy of the GNU General Public License |
|
+ * along with this program; if not, write to the Free Software |
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
+*/ |
|
+#include <ctype.h> |
|
+#include <errno.h> |
|
+#include <stdio.h> |
|
+#include <stdlib.h> |
|
+#include <string.h> |
|
+#include <unistd.h> |
|
+#include <stdint.h> |
|
+#include "libtrace/kbuffer.h" |
|
+#include "ras-extlog-handler.h" |
|
+#include "ras-record.h" |
|
+#include "ras-logger.h" |
|
+#include "ras-report.h" |
|
+ |
|
+static char *err_type(int etype) |
|
+{ |
|
+ switch (etype) { |
|
+ case 0: return "unknown"; |
|
+ case 1: return "no error"; |
|
+ case 2: return "single-bit ECC"; |
|
+ case 3: return "multi-bit ECC"; |
|
+ case 4: return "single-symbol chipkill ECC"; |
|
+ case 5: return "multi-symbol chipkill ECC"; |
|
+ case 6: return "master abort"; |
|
+ case 7: return "target abort"; |
|
+ case 8: return "parity error"; |
|
+ case 9: return "watchdog timeout"; |
|
+ case 10: return "invalid address"; |
|
+ case 11: return "mirror Broken"; |
|
+ case 12: return "memory sparing"; |
|
+ case 13: return "scrub corrected error"; |
|
+ case 14: return "scrub uncorrected error"; |
|
+ case 15: return "physical memory map-out event"; |
|
+ } |
|
+ return "unknown-type"; |
|
+} |
|
+ |
|
+static char *err_severity(int severity) |
|
+{ |
|
+ switch (severity) { |
|
+ case 0: return "recoverable"; |
|
+ case 1: return "fatal"; |
|
+ case 2: return "corrected"; |
|
+ case 3: return "informational"; |
|
+ } |
|
+ return "unknown-severity"; |
|
+} |
|
+ |
|
+static unsigned long long err_mask(int lsb) |
|
+{ |
|
+ if (lsb == 0xff) |
|
+ return ~0ull; |
|
+ return ~((1ull << lsb) - 1); |
|
+} |
|
+ |
|
+#define CPER_MEM_VALID_NODE 0x0008 |
|
+#define CPER_MEM_VALID_CARD 0x0010 |
|
+#define CPER_MEM_VALID_MODULE 0x0020 |
|
+#define CPER_MEM_VALID_BANK 0x0040 |
|
+#define CPER_MEM_VALID_DEVICE 0x0080 |
|
+#define CPER_MEM_VALID_ROW 0x0100 |
|
+#define CPER_MEM_VALID_COLUMN 0x0200 |
|
+#define CPER_MEM_VALID_BIT_POSITION 0x0400 |
|
+#define CPER_MEM_VALID_REQUESTOR_ID 0x0800 |
|
+#define CPER_MEM_VALID_RESPONDER_ID 0x1000 |
|
+#define CPER_MEM_VALID_TARGET_ID 0x2000 |
|
+#define CPER_MEM_VALID_RANK_NUMBER 0x8000 |
|
+#define CPER_MEM_VALID_CARD_HANDLE 0x10000 |
|
+#define CPER_MEM_VALID_MODULE_HANDLE 0x20000 |
|
+ |
|
+struct cper_mem_err_compact { |
|
+ unsigned long long validation_bits; |
|
+ unsigned short node; |
|
+ unsigned short card; |
|
+ unsigned short module; |
|
+ unsigned short bank; |
|
+ unsigned short device; |
|
+ unsigned short row; |
|
+ unsigned short column; |
|
+ unsigned short bit_pos; |
|
+ unsigned long long requestor_id; |
|
+ unsigned long long responder_id; |
|
+ unsigned long long target_id; |
|
+ unsigned short rank; |
|
+ unsigned short mem_array_handle; |
|
+ unsigned short mem_dev_handle; |
|
+}; |
|
+ |
|
+static char *err_cper_data(const char *c) |
|
+{ |
|
+ const struct cper_mem_err_compact *cpd = (struct cper_mem_err_compact *)c; |
|
+ static char buf[256]; |
|
+ char *p = buf; |
|
+ |
|
+ if (cpd->validation_bits == 0) |
|
+ return ""; |
|
+ p += sprintf(p, " ("); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_NODE) |
|
+ p += sprintf(p, "node: %d ", cpd->node); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_CARD) |
|
+ p += sprintf(p, "card: %d ", cpd->card); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_MODULE) |
|
+ p += sprintf(p, "module: %d ", cpd->module); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_BANK) |
|
+ p += sprintf(p, "bank: %d ", cpd->bank); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_DEVICE) |
|
+ p += sprintf(p, "device: %d ", cpd->device); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_ROW) |
|
+ p += sprintf(p, "row: %d ", cpd->row); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_COLUMN) |
|
+ p += sprintf(p, "column: %d ", cpd->column); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_BIT_POSITION) |
|
+ p += sprintf(p, "bit_pos: %d ", cpd->bit_pos); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) |
|
+ p += sprintf(p, "req_id: 0x%llx ", cpd->requestor_id); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_RESPONDER_ID) |
|
+ p += sprintf(p, "resp_id: 0x%llx ", cpd->responder_id); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_TARGET_ID) |
|
+ p += sprintf(p, "tgt_id: 0x%llx ", cpd->target_id); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_RANK_NUMBER) |
|
+ p += sprintf(p, "rank: %d ", cpd->rank); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_CARD_HANDLE) |
|
+ p += sprintf(p, "card_handle: %d ", cpd->mem_array_handle); |
|
+ if (cpd->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) |
|
+ p += sprintf(p, "module_handle: %d ", cpd->mem_dev_handle); |
|
+ p += sprintf(p-1, ")"); |
|
+ |
|
+ return buf; |
|
+} |
|
+ |
|
+static char *uuid_le(const char *uu) |
|
+{ |
|
+ static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; |
|
+ char *p = uuid; |
|
+ int i; |
|
+ static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; |
|
+ |
|
+ for (i = 0; i < 16; i++) { |
|
+ p += sprintf(p, "%.2x", uu[le[i]]); |
|
+ switch (i) { |
|
+ case 3: |
|
+ case 5: |
|
+ case 7: |
|
+ case 9: |
|
+ *p++ = '-'; |
|
+ break; |
|
+ } |
|
+ } |
|
+ |
|
+ *p = 0; |
|
+ |
|
+ return uuid; |
|
+} |
|
+ |
|
+ |
|
+static void report_extlog_mem_event(struct ras_events *ras, |
|
+ struct pevent_record *record, |
|
+ struct trace_seq *s, |
|
+ struct ras_extlog_event *ev) |
|
+{ |
|
+ trace_seq_printf(s, "%d %s error: %s physical addr: 0x%llx mask: 0x%llx%s %s %s", |
|
+ ev->error_seq, err_severity(ev->severity), |
|
+ err_type(ev->etype), ev->address, |
|
+ err_mask(ev->pa_mask_lsb), |
|
+ err_cper_data(ev->cper_data), |
|
+ ev->fru_text, |
|
+ uuid_le(ev->fru_id)); |
|
+} |
|
+ |
|
+int ras_extlog_mem_event_handler(struct trace_seq *s, |
|
+ struct pevent_record *record, |
|
+ struct event_format *event, void *context) |
|
+{ |
|
+ int len; |
|
+ unsigned long long val; |
|
+ struct ras_events *ras = context; |
|
+ time_t now; |
|
+ struct tm *tm; |
|
+ struct ras_extlog_event ev; |
|
+ |
|
+ /* |
|
+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
|
+ * On previous kernels, the way to properly generate an event would |
|
+ * be to inject a fake one, measure its timestamp and diff it against |
|
+ * gettimeofday. We won't do it here. Instead, let's use uptime, |
|
+ * falling-back to the event report's time, if "uptime" clock is |
|
+ * not available (legacy kernels). |
|
+ */ |
|
+ |
|
+ if (ras->use_uptime) |
|
+ now = record->ts/user_hz + ras->uptime_diff; |
|
+ else |
|
+ now = time(NULL); |
|
+ |
|
+ tm = localtime(&now); |
|
+ if (tm) |
|
+ strftime(ev.timestamp, sizeof(ev.timestamp), |
|
+ "%Y-%m-%d %H:%M:%S %z", tm); |
|
+ trace_seq_printf(s, "%s ", ev.timestamp); |
|
+ |
|
+ if (pevent_get_field_val(s, event, "etype", record, &val, 1) < 0) |
|
+ return -1; |
|
+ ev.etype = val; |
|
+ if (pevent_get_field_val(s, event, "err_seq", record, &val, 1) < 0) |
|
+ return -1; |
|
+ ev.error_seq = val; |
|
+ if (pevent_get_field_val(s, event, "sev", record, &val, 1) < 0) |
|
+ return -1; |
|
+ ev.severity = val; |
|
+ if (pevent_get_field_val(s, event, "pa", record, &val, 1) < 0) |
|
+ return -1; |
|
+ ev.address = val; |
|
+ if (pevent_get_field_val(s, event, "pa_mask_lsb", record, &val, 1) < 0) |
|
+ return -1; |
|
+ ev.pa_mask_lsb = val; |
|
+ |
|
+ ev.cper_data = pevent_get_field_raw(s, event, "data", |
|
+ record, &len, 1); |
|
+ ev.cper_data_length = len; |
|
+ ev.fru_text = pevent_get_field_raw(s, event, "fru_text", |
|
+ record, &len, 1); |
|
+ ev.fru_id = pevent_get_field_raw(s, event, "fru_id", |
|
+ record, &len, 1); |
|
+ |
|
+ report_extlog_mem_event(ras, record, s, &ev); |
|
+ |
|
+ ras_store_extlog_mem_record(ras, &ev); |
|
+ |
|
+ return 0; |
|
+} |
|
diff --git a/ras-extlog-handler.h b/ras-extlog-handler.h |
|
new file mode 100644 |
|
index 0000000..54e8cec |
|
--- /dev/null |
|
+++ b/ras-extlog-handler.h |
|
@@ -0,0 +1,31 @@ |
|
+/* |
|
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com> |
|
+ * |
|
+ * This program is free software; you can redistribute it and/or modify |
|
+ * it under the terms of the GNU General Public License as published by |
|
+ * the Free Software Foundation; either version 2 of the License, or |
|
+ * (at your option) any later version. |
|
+ * |
|
+ * This program is distributed in the hope that it will be useful, |
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
+ * GNU General Public License for more details. |
|
+ * |
|
+ * You should have received a copy of the GNU General Public License |
|
+ * along with this program; if not, write to the Free Software |
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
+*/ |
|
+ |
|
+#ifndef __RAS_EXTLOG_HANDLER_H |
|
+#define __RAS_EXTLOG_HANDLER_H |
|
+ |
|
+#include <stdint.h> |
|
+ |
|
+#include "ras-events.h" |
|
+#include "libtrace/event-parse.h" |
|
+ |
|
+extern int ras_extlog_mem_event_handler(struct trace_seq *s, |
|
+ struct pevent_record *record, |
|
+ struct event_format *event, void *context); |
|
+ |
|
+#endif |
|
diff --git a/ras-mc-handler.c b/ras-mc-handler.c |
|
index ffb3805..704a41c 100644 |
|
--- a/ras-mc-handler.c |
|
+++ b/ras-mc-handler.c |
|
@@ -47,7 +47,7 @@ int ras_mc_event_handler(struct trace_seq *s, |
|
*/ |
|
|
|
if (ras->use_uptime) |
|
- now = record->ts/1000000000L + ras->uptime_diff; |
|
+ now = record->ts/user_hz + ras->uptime_diff; |
|
else |
|
now = time(NULL); |
|
|
|
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
|
index 1431049..a1d0b5d 100644 |
|
--- a/ras-mce-handler.c |
|
+++ b/ras-mce-handler.c |
|
@@ -237,7 +237,7 @@ static void report_mce_event(struct ras_events *ras, |
|
*/ |
|
|
|
if (ras->use_uptime) |
|
- now = record->ts/1000000000L + ras->uptime_diff; |
|
+ now = record->ts/user_hz + ras->uptime_diff; |
|
else |
|
now = time(NULL); |
|
|
|
diff --git a/ras-record.c b/ras-record.c |
|
index e5150ad..3dc4493 100644 |
|
--- a/ras-record.c |
|
+++ b/ras-record.c |
|
@@ -157,6 +157,57 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
|
} |
|
#endif |
|
|
|
+#ifdef HAVE_EXTLOG |
|
+static const struct db_fields extlog_event_fields[] = { |
|
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
|
+ { .name="timestamp", .type="TEXT" }, |
|
+ { .name="etype", .type="INTEGER" }, |
|
+ { .name="error_count", .type="INTEGER" }, |
|
+ { .name="severity", .type="INTEGER" }, |
|
+ { .name="address", .type="INTEGER" }, |
|
+ { .name="fru_id", .type="BLOB" }, |
|
+ { .name="fru_text", .type="TEXT" }, |
|
+ { .name="cper_data", .type="BLOB" }, |
|
+}; |
|
+ |
|
+static const struct db_table_descriptor extlog_event_tab = { |
|
+ .name = "extlog_event", |
|
+ .fields = extlog_event_fields, |
|
+ .num_fields = ARRAY_SIZE(extlog_event_fields), |
|
+}; |
|
+ |
|
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) |
|
+{ |
|
+ int rc; |
|
+ struct sqlite3_priv *priv = ras->db_priv; |
|
+ |
|
+ if (!priv || !priv->stmt_extlog_record) |
|
+ return 0; |
|
+ log(TERM, LOG_INFO, "extlog_record store: %p\n", priv->stmt_extlog_record); |
|
+ |
|
+ sqlite3_bind_text (priv->stmt_extlog_record, 1, ev->timestamp, -1, NULL); |
|
+ sqlite3_bind_int (priv->stmt_extlog_record, 2, ev->etype); |
|
+ sqlite3_bind_int (priv->stmt_extlog_record, 3, ev->error_seq); |
|
+ sqlite3_bind_int (priv->stmt_extlog_record, 4, ev->severity); |
|
+ sqlite3_bind_int64 (priv->stmt_extlog_record, 5, ev->address); |
|
+ sqlite3_bind_blob (priv->stmt_extlog_record, 6, ev->fru_id, 16, NULL); |
|
+ sqlite3_bind_text (priv->stmt_extlog_record, 7, ev->fru_text, -1, NULL); |
|
+ sqlite3_bind_blob (priv->stmt_extlog_record, 8, ev->cper_data, ev->cper_data_length, NULL); |
|
+ |
|
+ rc = sqlite3_step(priv->stmt_extlog_record); |
|
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
|
+ log(TERM, LOG_ERR, |
|
+ "Failed to do extlog_mem_record step on sqlite: error = %d\n", rc); |
|
+ rc = sqlite3_reset(priv->stmt_extlog_record); |
|
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
|
+ log(TERM, LOG_ERR, |
|
+ "Failed reset extlog_mem_record on sqlite: error = %d\n", |
|
+ rc); |
|
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
|
+ |
|
+ return rc; |
|
+} |
|
+#endif |
|
|
|
/* |
|
* Table and functions to handle mce:mce_record |
|
@@ -385,6 +436,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
|
&aer_event_tab); |
|
#endif |
|
|
|
+#ifdef HAVE_EXTLOG |
|
+ rc = ras_mc_create_table(priv, &extlog_event_tab); |
|
+ if (rc == SQLITE_OK) |
|
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_extlog_record, |
|
+ &extlog_event_tab); |
|
+#endif |
|
+ |
|
#ifdef HAVE_MCE |
|
rc = ras_mc_create_table(priv, &mce_record_tab); |
|
if (rc == SQLITE_OK) |
|
diff --git a/ras-record.h b/ras-record.h |
|
index 6f146a8..5d84297 100644 |
|
--- a/ras-record.h |
|
+++ b/ras-record.h |
|
@@ -19,8 +19,11 @@ |
|
#ifndef __RAS_RECORD_H |
|
#define __RAS_RECORD_H |
|
|
|
+#include <stdint.h> |
|
#include "config.h" |
|
|
|
+extern long user_hz; |
|
+ |
|
struct ras_events *ras; |
|
|
|
struct ras_mc_event { |
|
@@ -40,8 +43,22 @@ struct ras_aer_event { |
|
const char *msg; |
|
}; |
|
|
|
+struct ras_extlog_event { |
|
+ char timestamp[64]; |
|
+ int32_t error_seq; |
|
+ int8_t etype; |
|
+ int8_t severity; |
|
+ unsigned long long address; |
|
+ int8_t pa_mask_lsb; |
|
+ const char *fru_id; |
|
+ const char *fru_text; |
|
+ const char *cper_data; |
|
+ unsigned short cper_data_length; |
|
+}; |
|
+ |
|
struct ras_mc_event; |
|
struct ras_aer_event; |
|
+struct ras_extlog_event; |
|
struct mce_event; |
|
|
|
#ifdef HAVE_SQLITE3 |
|
@@ -57,18 +74,23 @@ struct sqlite3_priv { |
|
#ifdef HAVE_MCE |
|
sqlite3_stmt *stmt_mce_record; |
|
#endif |
|
+#ifdef HAVE_EXTLOG |
|
+ sqlite3_stmt *stmt_extlog_record; |
|
+#endif |
|
}; |
|
|
|
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
|
int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
|
int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
|
int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); |
|
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); |
|
|
|
#else |
|
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
|
static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
|
static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
|
static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
|
+static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; |
|
|
|
#endif |
|
|
|
diff --git a/rasdaemon.c b/rasdaemon.c |
|
index 85ac2d4..41022ef 100644 |
|
--- a/rasdaemon.c |
|
+++ b/rasdaemon.c |
|
@@ -68,6 +68,8 @@ static error_t parse_opt(int k, char *arg, struct argp_state *state) |
|
return 0; |
|
} |
|
|
|
+long user_hz; |
|
+ |
|
int main(int argc, char *argv[]) |
|
{ |
|
struct arguments args; |
|
@@ -91,6 +93,8 @@ int main(int argc, char *argv[]) |
|
}; |
|
memset (&args, 0, sizeof(args)); |
|
|
|
+ user_hz = sysconf(_SC_CLK_TCK); |
|
+ |
|
argp_parse(&argp, argc, argv, 0, &idx, &args); |
|
|
|
if (idx < 0) { |
|
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
|
index e9f9c59..110262f 100755 |
|
--- a/util/ras-mc-ctl.in |
|
+++ b/util/ras-mc-ctl.in |
|
@@ -842,11 +842,141 @@ sub find_prog |
|
return ""; |
|
} |
|
|
|
+sub get_extlog_type |
|
+{ |
|
+ my @types; |
|
+ |
|
+ if ($_[0] < 0 || $_[0] > 15) { |
|
+ return "unknown-type"; |
|
+ } |
|
+ |
|
+ @types = ("unknown", |
|
+ "no error", |
|
+ "single-bit ECC", |
|
+ "multi-bit ECC", |
|
+ "single-symbol chipkill ECC", |
|
+ "multi-symbol chipkill ECC", |
|
+ "master abort", |
|
+ "target abort", |
|
+ "parity error", |
|
+ "watchdog timeout", |
|
+ "invalid address", |
|
+ "mirror Broken", |
|
+ "memory sparing", |
|
+ "scrub corrected error", |
|
+ "scrub uncorrected error", |
|
+ "physical memory map-out event", |
|
+ "unknown-type"); |
|
+ return $types[$_[0]]; |
|
+} |
|
+ |
|
+sub get_extlog_severity |
|
+{ |
|
+ my @sev; |
|
+ |
|
+ if ($_[0] < 0 || $_[0] > 3) { |
|
+ return "unknown-severity"; |
|
+ } |
|
+ |
|
+ @sev = ("recoverable", |
|
+ "fatal", |
|
+ "corrected", |
|
+ "informational", |
|
+ "unknown-severity"); |
|
+ return $sev[$_[0]]; |
|
+} |
|
+ |
|
+use constant { |
|
+ CPER_MEM_VALID_NODE => 0x0008, |
|
+ CPER_MEM_VALID_CARD => 0x0010, |
|
+ CPER_MEM_VALID_MODULE => 0x0020, |
|
+ CPER_MEM_VALID_BANK => 0x0040, |
|
+ CPER_MEM_VALID_DEVICE => 0x0080, |
|
+ CPER_MEM_VALID_ROW => 0x0100, |
|
+ CPER_MEM_VALID_COLUMN => 0x0200, |
|
+ CPER_MEM_VALID_BIT_POSITION => 0x0400, |
|
+ CPER_MEM_VALID_REQUESTOR_ID => 0x0800, |
|
+ CPER_MEM_VALID_RESPONDER_ID => 0x1000, |
|
+ CPER_MEM_VALID_TARGET_ID => 0x2000, |
|
+ CPER_MEM_VALID_ERROR_TYPE => 0x4000, |
|
+ CPER_MEM_VALID_RANK_NUMBER => 0x8000, |
|
+ CPER_MEM_VALID_CARD_HANDLE => 0x10000, |
|
+ CPER_MEM_VALID_MODULE_HANDLE => 0x20000, |
|
+}; |
|
+ |
|
+sub get_cper_data_text |
|
+{ |
|
+ my $cper_data = $_[0]; |
|
+ my ($validation_bits, $node, $card, $module, $bank, $device, $row, $column, $bit_pos, $requestor_id, $responder_id, $target_id, $rank, $mem_array_handle, $mem_dev_handle) = unpack 'QSSSSSSSSQQQSSS', $cper_data; |
|
+ my @out; |
|
+ |
|
+ if ($validation_bits & CPER_MEM_VALID_NODE) { |
|
+ push @out, (sprintf "node=%d", $node); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_CARD) { |
|
+ push @out, (sprintf "card=%d", $card); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_MODULE) { |
|
+ push @out, (sprintf "module=%d", $module); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_BANK) { |
|
+ push @out, (sprintf "bank=%d", $bank); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_DEVICE) { |
|
+ push @out, (sprintf "device=%d", $device); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_ROW) { |
|
+ push @out, (sprintf "row=%d", $row); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_COLUMN) { |
|
+ push @out, (sprintf "column=%d", $column); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_BIT_POSITION) { |
|
+ push @out, (sprintf "bit_position=%d", $bit_pos); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_REQUESTOR_ID) { |
|
+ push @out, (sprintf "0x%08x", $requestor_id); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_RESPONDER_ID) { |
|
+ push @out, (sprintf "0x%08x", $responder_id); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_TARGET_ID) { |
|
+ push @out, (sprintf "0x%08x", $target_id); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_RANK_NUMBER) { |
|
+ push @out, (sprintf "rank=%d", $rank); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_CARD_HANDLE) { |
|
+ push @out, (sprintf "mem_array_handle=%d", $mem_array_handle); |
|
+ } |
|
+ if ($validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { |
|
+ push @out, (sprintf "mem_dev_handle=%d", $mem_dev_handle); |
|
+ } |
|
+ |
|
+ return join (", ", @out); |
|
+} |
|
+ |
|
+sub get_uuid_le |
|
+{ |
|
+ my $out = ""; |
|
+ my @bytes = unpack "C*", $_[0]; |
|
+ my @le16_table = (3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15); |
|
+ |
|
+ for (my $i = 0; $i < 16; $i++) { |
|
+ $out .= sprintf "%.2x", $bytes[$le16_table[$i]]; |
|
+ if ($i == 3 or $i == 5 or $i == 7 or $i == 9) { |
|
+ $out .= "-"; |
|
+ } |
|
+ } |
|
+ return $out; |
|
+} |
|
+ |
|
sub summary |
|
{ |
|
require DBI; |
|
my ($query, $query_handle, $out); |
|
my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); |
|
+ my ($etype, $severity, $etype_string, $severity_string); |
|
|
|
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
|
|
|
@@ -882,6 +1012,24 @@ sub summary |
|
} |
|
$query_handle->finish; |
|
|
|
+ # extlog errors |
|
+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; |
|
+ $query_handle = $dbh->prepare($query); |
|
+ $query_handle->execute(); |
|
+ $query_handle->bind_columns(\($etype, $severity, $count)); |
|
+ $out = ""; |
|
+ while($query_handle->fetch()) { |
|
+ $etype_string = get_extlog_type($etype); |
|
+ $severity_string = get_extlog_severity($severity); |
|
+ $out .= "\t$count $etype_string $severity_string errors\n"; |
|
+ } |
|
+ if ($out ne "") { |
|
+ print "Extlog records summary:\n$out"; |
|
+ } else { |
|
+ print "No Extlog errors.\n"; |
|
+ } |
|
+ $query_handle->finish; |
|
+ |
|
# MCE mce_record errors |
|
$query = "select error_msg, count(*) from mce_record group by error_msg"; |
|
$query_handle = $dbh->prepare($query); |
|
@@ -906,6 +1054,7 @@ sub errors |
|
require DBI; |
|
my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); |
|
my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); |
|
+ my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); |
|
|
|
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
|
|
|
@@ -945,6 +1094,31 @@ sub errors |
|
} |
|
$query_handle->finish; |
|
|
|
+ # Extlog errors |
|
+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; |
|
+ $query_handle = $dbh->prepare($query); |
|
+ $query_handle->execute(); |
|
+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); |
|
+ $out = ""; |
|
+ while($query_handle->fetch()) { |
|
+ $etype_string = get_extlog_type($etype); |
|
+ $severity_string = get_extlog_severity($severity); |
|
+ $out .= "$id $timestamp error: "; |
|
+ $out .= "type=$etype_string, "; |
|
+ $out .= "severity=$severity_string, "; |
|
+ $out .= sprintf "address=0x%08x, ", $addr; |
|
+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); |
|
+ $out .= "fru_text='$fru_text', "; |
|
+ $out .= get_cper_data_text($cper_data) if ($cper_data); |
|
+ $out .= "\n"; |
|
+ } |
|
+ if ($out ne "") { |
|
+ print "Extlog events:\n$out\n"; |
|
+ } else { |
|
+ print "No Extlog errors.\n\n"; |
|
+ } |
|
+ $query_handle->finish; |
|
+ |
|
# MCE mce_record errors |
|
$query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; |
|
$query_handle = $dbh->prepare($query);
|
|
|