basebuilder_pel7ppc64bebuilder0
7 years ago
62 changed files with 7968 additions and 0 deletions
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
From 5e8fb95e2f6dd3f427e0ae5d7d066aeb6d61fd0f Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Wed, 29 May 2013 21:53:58 -0300 |
||||
Subject: [PATCH 01/32] ras-mc-ctl: Improve error summary to show label and mc |
||||
|
||||
Both information are useful for the users, even on summary. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 6 +++--- |
||||
1 files changed, 3 insertions(+), 3 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 32c4edb..5b1ca4d 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -827,15 +827,15 @@ sub summary |
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
||||
|
||||
- my $query = "select top_layer,middle_layer,lower_layer, count(*) from mc_event group by top_layer,middle_layer,lower_layer"; |
||||
+ my $query = "select label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by label,mc,top_layer,middle_layer,lower_layer"; |
||||
my $query_handle = $dbh->prepare($query); |
||||
$query_handle->execute(); |
||||
|
||||
- $query_handle->bind_columns(\my($top, $mid, $low, $count)); |
||||
+ $query_handle->bind_columns(\my($label, $mc, $top, $mid, $low, $count)); |
||||
|
||||
print "Memory controller events summary:\n"; |
||||
while($query_handle->fetch()) { |
||||
- print "location: $top:$mid:$low errors: $count\n"; |
||||
+ print "DIMM Label(s): '$label' location: $mc:$top:$mid:$low errors: $count\n"; |
||||
} |
||||
|
||||
$query_handle->finish; |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,240 @@
@@ -0,0 +1,240 @@
|
||||
From 002238dff53b284c9455554f146176ee8de2de4a Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 12:41:01 -0300 |
||||
Subject: [PATCH 02/32] ras-record: make the code more generic |
||||
|
||||
Now that we're ready to add more tables to the database, make |
||||
the code that creates and inserts data into the table more |
||||
generic. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-record.c | 173 +++++++++++++++++++++++++++++++++++++--------------------- |
||||
1 files changed, 110 insertions(+), 63 deletions(-) |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 8995c9e..3af0791 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -28,80 +28,128 @@ |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-logger.h" |
||||
|
||||
+/* #define DEBUG_SQL 1 */ |
||||
+ |
||||
#define SQLITE_RAS_DB RASSTATEDIR "/" RAS_DB_FNAME |
||||
|
||||
-const char *mc_event_db = " mc_event "; |
||||
-const char *mc_event_db_create_fields = "(" |
||||
- "id INTEGER PRIMARY KEY" |
||||
- ", timestamp TEXT" |
||||
- ", err_count INTEGER" |
||||
- ", err_type TEXT" |
||||
- ", err_msg TEXT" /* 5 */ |
||||
- ", label TEXT" |
||||
- ", mc INTEGER" |
||||
- ", top_layer INTEGER" |
||||
- ", middle_layer INTEGER" |
||||
- ", lower_layer INTEGER" /* 10 */ |
||||
- ", address INTEGER" |
||||
- ", grain INTEGER" |
||||
- ", syndrome INTEGER" |
||||
- ", driver_detail TEXT" /* 14 */ |
||||
- ")"; |
||||
- |
||||
-const char *mc_event_db_fields = "(" |
||||
- "id" |
||||
- ", timestamp" |
||||
- ", err_count" |
||||
- ", err_type" |
||||
- ", err_msg" /* 5 */ |
||||
- ", label" |
||||
- ", mc" |
||||
- ", top_layer" |
||||
- ", middle_layer" |
||||
- ", lower_layer" /* 10 */ |
||||
- ", address" |
||||
- ", grain" |
||||
- ", syndrome" |
||||
- ", driver_detail" /* 14 */ |
||||
- ")"; |
||||
- |
||||
-#define NUM_MC_EVENT_DB_VALUES 14 |
||||
- |
||||
-const char *createdb = "CREATE TABLE IF NOT EXISTS"; |
||||
+ |
||||
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) |
||||
+ |
||||
+struct db_fields { |
||||
+ char *name; |
||||
+ char *type; |
||||
+}; |
||||
+ |
||||
+struct db_table_descriptor { |
||||
+ char *name; |
||||
+ const struct db_fields *fields; |
||||
+ size_t num_fields; |
||||
+}; |
||||
+ |
||||
+static const struct db_fields mc_event_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ { .name="err_count", .type="INTEGER" }, |
||||
+ { .name="err_type", .type="TEXT" }, |
||||
+ { .name="err_msg", .type="TEXT" }, |
||||
+ { .name="label", .type="TEXT" }, |
||||
+ { .name="mc", .type="INTEGER" }, |
||||
+ { .name="top_layer", .type="INTEGER" }, |
||||
+ { .name="middle_layer", .type="INTEGER" }, |
||||
+ { .name="lower_layer", .type="INTEGER" }, |
||||
+ { .name="address", .type="INTEGER" }, |
||||
+ { .name="grain", .type="INTEGER" }, |
||||
+ { .name="syndrome", .type="INTEGER" }, |
||||
+ { .name="driver_detail", .type="TEXT" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor mc_event_tab = { |
||||
+ .name = "mc_event", |
||||
+ .fields = mc_event_fields, |
||||
+ .num_fields = ARRAY_SIZE(mc_event_fields), |
||||
+}; |
||||
+ |
||||
const char *insertdb = "INSERT INTO"; |
||||
const char *valuesdb = " VALUES "; |
||||
|
||||
-static int ras_mc_prepare_stmt(struct sqlite3_priv *priv) |
||||
+static int ras_mc_prepare_stmt(struct sqlite3_priv *priv, |
||||
+ sqlite3_stmt **stmt, |
||||
+ const struct db_table_descriptor *db_tab) |
||||
+ |
||||
{ |
||||
int i, rc; |
||||
- char sql[1024]; |
||||
+ char sql[1024], *p = sql, *end = sql + sizeof(sql); |
||||
+ const struct db_fields *field; |
||||
+ |
||||
+ p += snprintf(p, end - p, "INSERT INTO %s (", |
||||
+ db_tab->name); |
||||
+ |
||||
+ for (i = 0; i < db_tab->num_fields; i++) { |
||||
+ field = &db_tab->fields[i]; |
||||
+ p += snprintf(p, end - p, "%s", field->name); |
||||
+ |
||||
+ if (i < db_tab->num_fields - 1) |
||||
+ p += snprintf(p, end - p, ", "); |
||||
+ } |
||||
|
||||
- strcpy(sql, insertdb); |
||||
- strcat(sql, mc_event_db); |
||||
- strcat(sql, mc_event_db_fields); |
||||
- strcat(sql, valuesdb); |
||||
+ p += snprintf(p, end - p, ") VALUES ( NULL, "); |
||||
|
||||
- strcat(sql, "(NULL, "); /* Auto-increment field */ |
||||
- for (i = 1; i < NUM_MC_EVENT_DB_VALUES; i++) { |
||||
- if (i < NUM_MC_EVENT_DB_VALUES - 1) |
||||
+ for (i = 1; i < db_tab->num_fields; i++) { |
||||
+ if (i < db_tab->num_fields - 1) |
||||
strcat(sql, "?, "); |
||||
else |
||||
strcat(sql, "?)"); |
||||
} |
||||
|
||||
- rc = sqlite3_prepare_v2(priv->db, sql, -1, &priv->stmt, NULL); |
||||
+#ifdef DEBUG_SQL |
||||
+ log(TERM, LOG_INFO, "SQL: %s\n", sql); |
||||
+#endif |
||||
+ |
||||
+ rc = sqlite3_prepare_v2(priv->db, sql, -1, stmt, NULL); |
||||
if (rc != SQLITE_OK) |
||||
- log(TERM, LOG_ERR, "Failed to prepare insert db on %s: error = %s\n", |
||||
- SQLITE_RAS_DB, sqlite3_errmsg(priv->db)); |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to prepare insert db at table %s (db %s): error = %s\n", |
||||
+ db_tab->name, SQLITE_RAS_DB, sqlite3_errmsg(priv->db)); |
||||
|
||||
return rc; |
||||
} |
||||
|
||||
+static int ras_mc_create_table(struct sqlite3_priv *priv, |
||||
+ const struct db_table_descriptor *db_tab) |
||||
+{ |
||||
+ const struct db_fields *field; |
||||
+ char sql[1024], *p = sql, *end = sql + sizeof(sql); |
||||
+ int i,rc; |
||||
+ |
||||
+ p += snprintf(p, end - p, "CREATE TABLE IF NOT EXISTS %s (", |
||||
+ db_tab->name); |
||||
+ |
||||
+ for (i = 0; i < db_tab->num_fields; i++) { |
||||
+ field = &db_tab->fields[i]; |
||||
+ p += snprintf(p, end - p, "%s %s", field->name, field->type); |
||||
+ |
||||
+ if (i < db_tab->num_fields - 1) |
||||
+ p += snprintf(p, end - p, ", "); |
||||
+ } |
||||
+ p += snprintf(p, end - p, ")"); |
||||
+ |
||||
+#ifdef DEBUG_SQL |
||||
+ log(TERM, LOG_INFO, "SQL: %s\n", sql); |
||||
+#endif |
||||
+ |
||||
+ rc = sqlite3_exec(priv->db, sql, NULL, NULL, NULL); |
||||
+ if (rc != SQLITE_OK) { |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to create table %s on %s: error = %d\n", |
||||
+ db_tab->name, SQLITE_RAS_DB, rc); |
||||
+ } |
||||
+ return rc; |
||||
+} |
||||
+ |
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
{ |
||||
int rc; |
||||
sqlite3 *db; |
||||
- char sql[1024]; |
||||
struct sqlite3_priv *priv; |
||||
|
||||
printf("Calling %s()\n", __FUNCTION__); |
||||
@@ -137,27 +185,26 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
free(priv); |
||||
return -1; |
||||
} |
||||
+ priv->db = db; |
||||
|
||||
- strcpy(sql, createdb); |
||||
- strcat(sql, mc_event_db); |
||||
- strcat(sql, mc_event_db_create_fields); |
||||
- rc = sqlite3_exec(db, sql, NULL, NULL, NULL); |
||||
+ rc = ras_mc_create_table(priv, &mc_event_tab); |
||||
if (rc != SQLITE_OK) { |
||||
- log(TERM, LOG_ERR, |
||||
- "cpu %u: Failed to create db on %s: error = %d\n", |
||||
- cpu, SQLITE_RAS_DB, rc); |
||||
+ sqlite3_close(db); |
||||
free(priv); |
||||
return -1; |
||||
} |
||||
|
||||
- priv->db = db; |
||||
- ras->db_priv = priv; |
||||
- |
||||
- rc = ras_mc_prepare_stmt(priv); |
||||
- if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt, &mc_event_tab); |
||||
+ if (rc == SQLITE_OK) { |
||||
log(TERM, LOG_INFO, |
||||
"cpu %u: Recording events at %s\n", |
||||
cpu, SQLITE_RAS_DB); |
||||
+ ras->db_priv = priv; |
||||
+ } else { |
||||
+ sqlite3_close(db); |
||||
+ free(priv); |
||||
+ return -1; |
||||
+ } |
||||
|
||||
return 0; |
||||
} |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,97 @@
@@ -0,0 +1,97 @@
|
||||
From 016802f4093e80971a52c590c661a04924cb9aa3 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 13:10:16 -0300 |
||||
Subject: [PATCH 03/32] ras-record: rename stmt to stmt_mc_event |
||||
|
||||
This stmt is used only for mc_event. So, rename it, as we'll be |
||||
adding other stmts for the other tables. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-record.c | 46 ++++++++++++++++++++++++---------------------- |
||||
ras-record.h | 2 +- |
||||
2 files changed, 25 insertions(+), 23 deletions(-) |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 3af0791..efcd78f 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -194,7 +194,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
return -1; |
||||
} |
||||
|
||||
- rc = ras_mc_prepare_stmt(priv, &priv->stmt, &mc_event_tab); |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mc_event, &mc_event_tab); |
||||
if (rc == SQLITE_OK) { |
||||
log(TERM, LOG_INFO, |
||||
"cpu %u: Recording events at %s\n", |
||||
@@ -214,30 +214,32 @@ int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) |
||||
int rc; |
||||
struct sqlite3_priv *priv = ras->db_priv; |
||||
|
||||
- if (!priv || !priv->stmt) |
||||
+ if (!priv || !priv->stmt_mc_event) |
||||
return 0; |
||||
- log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt); |
||||
- |
||||
- sqlite3_bind_text(priv->stmt, 1, ev->timestamp, -1, NULL); |
||||
- sqlite3_bind_int (priv->stmt, 2, ev->error_count); |
||||
- sqlite3_bind_text(priv->stmt, 3, ev->error_type, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt, 4, ev->msg, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt, 5, ev->label, -1, NULL); |
||||
- sqlite3_bind_int (priv->stmt, 6, ev->mc_index); |
||||
- sqlite3_bind_int (priv->stmt, 7, ev->top_layer); |
||||
- sqlite3_bind_int (priv->stmt, 8, ev->middle_layer); |
||||
- sqlite3_bind_int (priv->stmt, 9, ev->lower_layer); |
||||
- sqlite3_bind_int (priv->stmt, 10, ev->address); |
||||
- sqlite3_bind_int (priv->stmt, 11, ev->grain); |
||||
- sqlite3_bind_int (priv->stmt, 12, ev->syndrome); |
||||
- sqlite3_bind_text(priv->stmt, 13, ev->driver_detail, -1, NULL); |
||||
- rc = sqlite3_step(priv->stmt); |
||||
+ log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt_mc_event); |
||||
+ |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 2, ev->error_count); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 3, ev->error_type, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 4, ev->msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 5, ev->label, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 6, ev->mc_index); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 7, ev->top_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 8, ev->middle_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 9, ev->lower_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 10, ev->address); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 11, ev->grain); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 12, ev->syndrome); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 13, ev->driver_detail, -1, NULL); |
||||
+ rc = sqlite3_step(priv->stmt_mc_event); |
||||
if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
- log(TERM, LOG_ERR, "Failed to do mc_event step on sqlite: error = %d\n", rc); |
||||
- rc = sqlite3_reset(priv->stmt); |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do mc_event step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_mc_event); |
||||
if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
- log(TERM, LOG_ERR, "Failed reset mc_event on sqlite: error = %d\n", |
||||
- rc); |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset mc_event on sqlite: error = %d\n", |
||||
+ rc); |
||||
log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
|
||||
return rc; |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 20c327f..9791185 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -46,7 +46,7 @@ struct ras_aer_event { |
||||
|
||||
struct sqlite3_priv { |
||||
sqlite3 *db; |
||||
- sqlite3_stmt *stmt; |
||||
+ sqlite3_stmt *stmt_mc_event; |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,114 @@
@@ -0,0 +1,114 @@
|
||||
From 4474f696c9207ceb21d55a0047ab6871879afe5a Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 13:51:55 -0300 |
||||
Subject: [PATCH 04/32] ras-record: reorder functions |
||||
|
||||
No functional changes |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-record.c | 77 +++++++++++++++++++++++++++++---------------------------- |
||||
1 files changed, 39 insertions(+), 38 deletions(-) |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index efcd78f..298977e 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -46,6 +46,10 @@ struct db_table_descriptor { |
||||
size_t num_fields; |
||||
}; |
||||
|
||||
+/* |
||||
+ * Table and functions to handle ras:mc_event |
||||
+ */ |
||||
+ |
||||
static const struct db_fields mc_event_fields[] = { |
||||
{ .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
{ .name="timestamp", .type="TEXT" }, |
||||
@@ -69,8 +73,41 @@ static const struct db_table_descriptor mc_event_tab = { |
||||
.num_fields = ARRAY_SIZE(mc_event_fields), |
||||
}; |
||||
|
||||
-const char *insertdb = "INSERT INTO"; |
||||
-const char *valuesdb = " VALUES "; |
||||
+int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_mc_event) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt_mc_event); |
||||
+ |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 2, ev->error_count); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 3, ev->error_type, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 4, ev->msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 5, ev->label, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 6, ev->mc_index); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 7, ev->top_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 8, ev->middle_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 9, ev->lower_layer); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 10, ev->address); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 11, ev->grain); |
||||
+ sqlite3_bind_int (priv->stmt_mc_event, 12, ev->syndrome); |
||||
+ sqlite3_bind_text(priv->stmt_mc_event, 13, ev->driver_detail, -1, NULL); |
||||
+ rc = sqlite3_step(priv->stmt_mc_event); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do mc_event step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_mc_event); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset mc_event on sqlite: error = %d\n", |
||||
+ rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
|
||||
static int ras_mc_prepare_stmt(struct sqlite3_priv *priv, |
||||
sqlite3_stmt **stmt, |
||||
@@ -208,39 +245,3 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
|
||||
return 0; |
||||
} |
||||
- |
||||
-int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) |
||||
-{ |
||||
- int rc; |
||||
- struct sqlite3_priv *priv = ras->db_priv; |
||||
- |
||||
- if (!priv || !priv->stmt_mc_event) |
||||
- return 0; |
||||
- log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt_mc_event); |
||||
- |
||||
- sqlite3_bind_text(priv->stmt_mc_event, 1, ev->timestamp, -1, NULL); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 2, ev->error_count); |
||||
- sqlite3_bind_text(priv->stmt_mc_event, 3, ev->error_type, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt_mc_event, 4, ev->msg, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt_mc_event, 5, ev->label, -1, NULL); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 6, ev->mc_index); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 7, ev->top_layer); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 8, ev->middle_layer); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 9, ev->lower_layer); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 10, ev->address); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 11, ev->grain); |
||||
- sqlite3_bind_int (priv->stmt_mc_event, 12, ev->syndrome); |
||||
- sqlite3_bind_text(priv->stmt_mc_event, 13, ev->driver_detail, -1, NULL); |
||||
- rc = sqlite3_step(priv->stmt_mc_event); |
||||
- if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
- log(TERM, LOG_ERR, |
||||
- "Failed to do mc_event step on sqlite: error = %d\n", rc); |
||||
- rc = sqlite3_reset(priv->stmt_mc_event); |
||||
- if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
- log(TERM, LOG_ERR, |
||||
- "Failed reset mc_event on sqlite: error = %d\n", |
||||
- rc); |
||||
- log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
- |
||||
- return rc; |
||||
-} |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,60 @@
@@ -0,0 +1,60 @@
|
||||
From 93217061a4b1dc7f287f2715aadc621d2c00425d Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 13:53:18 -0300 |
||||
Subject: [PATCH 05/32] ras-record: Make the code easier to add support for other tables |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-record.c | 25 ++++++++----------------- |
||||
1 files changed, 8 insertions(+), 17 deletions(-) |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 298977e..36b3373 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -143,10 +143,14 @@ static int ras_mc_prepare_stmt(struct sqlite3_priv *priv, |
||||
#endif |
||||
|
||||
rc = sqlite3_prepare_v2(priv->db, sql, -1, stmt, NULL); |
||||
- if (rc != SQLITE_OK) |
||||
+ if (rc != SQLITE_OK) { |
||||
log(TERM, LOG_ERR, |
||||
"Failed to prepare insert db at table %s (db %s): error = %s\n", |
||||
db_tab->name, SQLITE_RAS_DB, sqlite3_errmsg(priv->db)); |
||||
+ stmt = NULL; |
||||
+ } else { |
||||
+ log(TERM, LOG_INFO, "Recording %s events\n", db_tab->name); |
||||
+ } |
||||
|
||||
return rc; |
||||
} |
||||
@@ -225,23 +229,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
priv->db = db; |
||||
|
||||
rc = ras_mc_create_table(priv, &mc_event_tab); |
||||
- if (rc != SQLITE_OK) { |
||||
- sqlite3_close(db); |
||||
- free(priv); |
||||
- return -1; |
||||
- } |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mc_event, &mc_event_tab); |
||||
|
||||
- rc = ras_mc_prepare_stmt(priv, &priv->stmt_mc_event, &mc_event_tab); |
||||
- if (rc == SQLITE_OK) { |
||||
- log(TERM, LOG_INFO, |
||||
- "cpu %u: Recording events at %s\n", |
||||
- cpu, SQLITE_RAS_DB); |
||||
- ras->db_priv = priv; |
||||
- } else { |
||||
- sqlite3_close(db); |
||||
- free(priv); |
||||
- return -1; |
||||
- } |
||||
|
||||
+ ras->db_priv = priv; |
||||
return 0; |
||||
} |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,141 @@
@@ -0,0 +1,141 @@
|
||||
From 11004aaa98865dd7c0ee28b4af8d6ba6b6f11507 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 13:54:11 -0300 |
||||
Subject: [PATCH 06/32] Add support to record AER events |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-aer-handler.c | 4 ++- |
||||
ras-record.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++- |
||||
ras-record.h | 6 +++++ |
||||
3 files changed, 68 insertions(+), 2 deletions(-) |
||||
|
||||
diff --git a/ras-aer-handler.c b/ras-aer-handler.c |
||||
index ec63e2a..e5abaca 100644 |
||||
--- a/ras-aer-handler.c |
||||
+++ b/ras-aer-handler.c |
||||
@@ -111,7 +111,9 @@ int ras_aer_event_handler(struct trace_seq *s, |
||||
trace_seq_puts(s, ev.error_type); |
||||
|
||||
/* Insert data into the SGBD */ |
||||
-// ras_store_aer_event(ras, &ev); |
||||
+#ifdef HAVE_SQLITE3 |
||||
+ ras_store_aer_event(ras, &ev); |
||||
+#endif |
||||
|
||||
return 0; |
||||
} |
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 36b3373..cb302ce 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -26,6 +26,7 @@ |
||||
#include <unistd.h> |
||||
#include "ras-events.h" |
||||
#include "ras-mc-handler.h" |
||||
+#include "ras-aer-handler.h" |
||||
#include "ras-logger.h" |
||||
|
||||
/* #define DEBUG_SQL 1 */ |
||||
@@ -109,6 +110,56 @@ int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) |
||||
return rc; |
||||
} |
||||
|
||||
+/* |
||||
+ * Table and functions to handle ras:aer |
||||
+ */ |
||||
+ |
||||
+#ifdef HAVE_AER |
||||
+static const struct db_fields aer_event_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ { .name="err_type", .type="TEXT" }, |
||||
+ { .name="err_msg", .type="TEXT" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor aer_event_tab = { |
||||
+ .name = "aer_event", |
||||
+ .fields = aer_event_fields, |
||||
+ .num_fields = ARRAY_SIZE(aer_event_fields), |
||||
+}; |
||||
+ |
||||
+int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_aer_event) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt_aer_event); |
||||
+ |
||||
+ sqlite3_bind_text(priv->stmt_aer_event, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_aer_event, 3, ev->error_type, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_aer_event, 4, ev->msg, -1, NULL); |
||||
+ |
||||
+ rc = sqlite3_step(priv->stmt_aer_event); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do aer_event step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_aer_event); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset aer_event on sqlite: error = %d\n", |
||||
+ rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+#endif |
||||
+ |
||||
+/* |
||||
+ * Generic code |
||||
+ */ |
||||
+ |
||||
static int ras_mc_prepare_stmt(struct sqlite3_priv *priv, |
||||
sqlite3_stmt **stmt, |
||||
const struct db_table_descriptor *db_tab) |
||||
@@ -230,8 +281,15 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
|
||||
rc = ras_mc_create_table(priv, &mc_event_tab); |
||||
if (rc == SQLITE_OK) |
||||
- rc = ras_mc_prepare_stmt(priv, &priv->stmt_mc_event, &mc_event_tab); |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mc_event, |
||||
+ &mc_event_tab); |
||||
|
||||
+#ifdef HAVE_AER |
||||
+ rc = ras_mc_create_table(priv, &aer_event_tab); |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_aer_event, |
||||
+ &aer_event_tab); |
||||
+#endif |
||||
|
||||
ras->db_priv = priv; |
||||
return 0; |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 9791185..5008906 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -47,14 +47,20 @@ struct ras_aer_event { |
||||
struct sqlite3_priv { |
||||
sqlite3 *db; |
||||
sqlite3_stmt *stmt_mc_event; |
||||
+#ifdef HAVE_AER |
||||
+ sqlite3_stmt *stmt_aer_event; |
||||
+#endif |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
+int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
|
||||
#else |
||||
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
||||
static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
||||
+static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
+ |
||||
#endif |
||||
|
||||
#endif |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,202 @@
@@ -0,0 +1,202 @@
|
||||
From 0a31d938cf29e065e96de1206a7d35042962e02a Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 14:18:24 -0300 |
||||
Subject: [PATCH 07/32] Add support to store MCE events at the database |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
ras-mce-handler.c | 5 +++ |
||||
ras-record.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++- |
||||
ras-record.h | 9 +++++ |
||||
3 files changed, 116 insertions(+), 2 deletions(-) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 614a0eb..59e8d05 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -396,5 +396,10 @@ int ras_mce_event_handler(struct trace_seq *s, |
||||
return rc; |
||||
|
||||
report_mce_event(ras, record, s, &e); |
||||
+ |
||||
+#ifdef HAVE_SQLITE3 |
||||
+ ras_store_mce_record(ras, &e); |
||||
+#endif |
||||
+ |
||||
return 0; |
||||
} |
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index cb302ce..daa3cb1 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -27,6 +27,7 @@ |
||||
#include "ras-events.h" |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-aer-handler.h" |
||||
+#include "ras-mce-handler.h" |
||||
#include "ras-logger.h" |
||||
|
||||
/* #define DEBUG_SQL 1 */ |
||||
@@ -135,7 +136,7 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
|
||||
if (!priv || !priv->stmt_aer_event) |
||||
return 0; |
||||
- log(TERM, LOG_INFO, "mc_event store: %p\n", priv->stmt_aer_event); |
||||
+ log(TERM, LOG_INFO, "aer_event store: %p\n", priv->stmt_aer_event); |
||||
|
||||
sqlite3_bind_text(priv->stmt_aer_event, 1, ev->timestamp, -1, NULL); |
||||
sqlite3_bind_text(priv->stmt_aer_event, 3, ev->error_type, -1, NULL); |
||||
@@ -156,6 +157,98 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
} |
||||
#endif |
||||
|
||||
+ |
||||
+/* |
||||
+ * Table and functions to handle mce:mce_record |
||||
+ */ |
||||
+ |
||||
+#ifdef HAVE_MCE |
||||
+static const struct db_fields mce_record_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ |
||||
+ /* MCE registers */ |
||||
+ { .name="mcgcap", .type="INTEGER" }, |
||||
+ { .name="mcgstatus", .type="INTEGER" }, |
||||
+ { .name="status", .type="INTEGER" }, |
||||
+ { .name="addr", .type="INTEGER" }, // 5 |
||||
+ { .name="misc", .type="INTEGER" }, |
||||
+ { .name="ip", .type="INTEGER" }, |
||||
+ { .name="tsc", .type="INTEGER" }, |
||||
+ { .name="walltime", .type="INTEGER" }, |
||||
+ { .name="cpu", .type="INTEGER" }, // 10 |
||||
+ { .name="cpuid", .type="INTEGER" }, |
||||
+ { .name="apicid", .type="INTEGER" }, |
||||
+ { .name="socketid", .type="INTEGER" }, |
||||
+ { .name="cs", .type="INTEGER" }, |
||||
+ { .name="bank", .type="INTEGER" }, //15 |
||||
+ { .name="cpuvendor", .type="INTEGER" }, |
||||
+ |
||||
+ /* Parsed data - will likely change */ |
||||
+ { .name="bank_name", .type="TEXT" }, |
||||
+ { .name="error_msg", .type="TEXT" }, |
||||
+ { .name="mcgstatus_msg", .type="TEXT" }, |
||||
+ { .name="mcistatus_msg", .type="TEXT" }, // 20 |
||||
+ { .name="user_action", .type="TEXT" }, |
||||
+ { .name="mc_location", .type="TEXT" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor mce_record_tab = { |
||||
+ .name = "mce_record", |
||||
+ .fields = mce_record_fields, |
||||
+ .num_fields = ARRAY_SIZE(mce_record_fields), |
||||
+}; |
||||
+ |
||||
+int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_mce_record) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "mce_record store: %p\n", priv->stmt_mce_record); |
||||
+ |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 2, ev->mcgcap); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 3, ev->mcgstatus); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 4, ev->status); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 5, ev->addr); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 6, ev->misc); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 7, ev->ip); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 8, ev->tsc); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 9, ev->walltime); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 10, ev->cpu); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpuid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 12, ev->apicid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 13, ev->socketid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 14, ev->cs); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 15, ev->bank); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 16, ev->cpuvendor); |
||||
+ |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 17, ev->bank_name, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 18, ev->error_msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 19, ev->mcgstatus_msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 20, ev->mcistatus_msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 21, ev->mcastatus_msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 22, ev->user_action, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_mce_record, 23, ev->mc_location, -1, NULL); |
||||
+ |
||||
+ rc = sqlite3_step(priv->stmt_mce_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do mce_record step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_mce_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset mce_record on sqlite: error = %d\n", |
||||
+ rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+#endif |
||||
+ |
||||
+ |
||||
/* |
||||
* Generic code |
||||
*/ |
||||
@@ -291,6 +384,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
&aer_event_tab); |
||||
#endif |
||||
|
||||
- ras->db_priv = priv; |
||||
+#ifdef HAVE_MCE |
||||
+ rc = ras_mc_create_table(priv, &mce_record_tab); |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mce_record, |
||||
+ &mce_record_tab); |
||||
+#endif |
||||
+ |
||||
+ ras->db_priv = priv; |
||||
return 0; |
||||
} |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 5008906..6f146a8 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -40,6 +40,10 @@ struct ras_aer_event { |
||||
const char *msg; |
||||
}; |
||||
|
||||
+struct ras_mc_event; |
||||
+struct ras_aer_event; |
||||
+struct mce_event; |
||||
+ |
||||
#ifdef HAVE_SQLITE3 |
||||
|
||||
#include <sqlite3.h> |
||||
@@ -50,16 +54,21 @@ struct sqlite3_priv { |
||||
#ifdef HAVE_AER |
||||
sqlite3_stmt *stmt_aer_event; |
||||
#endif |
||||
+#ifdef HAVE_MCE |
||||
+ sqlite3_stmt *stmt_mce_record; |
||||
+#endif |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
+int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); |
||||
|
||||
#else |
||||
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
||||
static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
||||
static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
+static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,85 @@
@@ -0,0 +1,85 @@
|
||||
From 2925cc92d73065dab3bbf7de83404d6e0e141dc6 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 14:57:54 -0300 |
||||
Subject: [PATCH 08/32] ras-mc-ctl: add summary for MCE and PCIe AER errors |
||||
|
||||
Report the summary also for MCE and PCIe errors. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 50 ++++++++++++++++++++++++++++++++++++++++++++------ |
||||
1 files changed, 44 insertions(+), 6 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 5b1ca4d..118af7b 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -824,21 +824,59 @@ sub find_prog |
||||
sub summary |
||||
{ |
||||
require DBI; |
||||
+ my ($query, $query_handle, $out); |
||||
+ my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); |
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
||||
|
||||
- my $query = "select label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by label,mc,top_layer,middle_layer,lower_layer"; |
||||
- my $query_handle = $dbh->prepare($query); |
||||
+ # Memory controller mc_event errors |
||||
+ $query = "select err_type, label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by err_type, label, mc, top_layer, middle_layer, lower_layer"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
$query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($err_type, $label, $mc, $top, $mid, $low, $count)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $out .= "\t$err_type on DIMM Label(s): '$label' location: $mc:$top:$mid:$low errors: $count\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "Memory controller events summary:\n$out\n"; |
||||
+ } else { |
||||
+ print "No Memory errors.\n\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
|
||||
- $query_handle->bind_columns(\my($label, $mc, $top, $mid, $low, $count)); |
||||
- |
||||
- print "Memory controller events summary:\n"; |
||||
+ # PCIe AER aer_event errors |
||||
+ $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($err_type, $msg, $count)); |
||||
+ $out = ""; |
||||
while($query_handle->fetch()) { |
||||
- print "DIMM Label(s): '$label' location: $mc:$top:$mid:$low errors: $count\n"; |
||||
+ $out .= "\t$count $err_type errors: $msg\n"; |
||||
} |
||||
+ if ($out ne "") { |
||||
+ print "PCIe AER events summary:\n$out\n"; |
||||
+ } else { |
||||
+ print "No PCIe AER errors.\n\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
|
||||
+ # MCE mce_record errors |
||||
+ $query = "select error_msg, count(*) from mce_record group by error_msg"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($msg, $count)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $out .= "\t$count $msg errors\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "MCE records summary:\n$out"; |
||||
+ } else { |
||||
+ print "No MCE errors.\n"; |
||||
+ } |
||||
$query_handle->finish; |
||||
+ |
||||
undef($dbh); |
||||
} |
||||
|
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,108 @@
@@ -0,0 +1,108 @@
|
||||
From 4b64649eb5740027f58377f6c29d1554d9792b97 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 16:16:44 -0300 |
||||
Subject: [PATCH 09/32] ras-mc-ctl: report errors also for PCIe AER and MCE |
||||
|
||||
Show also PCIe AER and MCE when used with --errors parameter. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 73 +++++++++++++++++++++++++++++++++++++++++++++++----- |
||||
1 files changed, 66 insertions(+), 7 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 118af7b..30d3078 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -883,22 +883,81 @@ sub summary |
||||
sub errors |
||||
{ |
||||
require DBI; |
||||
+ my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); |
||||
+ my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); |
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
||||
|
||||
- my $query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id"; |
||||
- |
||||
- my $query_handle = $dbh->prepare($query); |
||||
+ # Memory controller mc_event errors |
||||
+ $query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
$query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $out .= "$id $time $count $type error(s): $msg at $label location: $mc:$top:$mid:$low, addr $addr, grain $grain, syndrome $syndrome $detail\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "PCIe AER events:\n$out\n"; |
||||
+ } else { |
||||
+ print "No PCIe AER errors.\n\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
|
||||
- $query_handle->bind_columns(\my($id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail)); |
||||
- |
||||
- print "Memory controller events:\n"; |
||||
+ # PCIe AER aer_event errors |
||||
+ $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($id, $time, $type, $msg)); |
||||
+ $out = ""; |
||||
while($query_handle->fetch()) { |
||||
- print "$id $time $count $type error(s): $msg at $label location: $mc:$top:$mid:$low, addr $addr, grain $grain, syndrome $syndrome $detail\n"; |
||||
+ $out .= "$id $time $type error: $msg\n"; |
||||
} |
||||
+ if ($out ne "") { |
||||
+ print "MCE events:\n$out\n"; |
||||
+ } else { |
||||
+ print "No MCE errors.\n\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
|
||||
+ # MCE mce_record errors |
||||
+ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $out .= "$id $time error: $msg"; |
||||
+ $out .= ", CPU $cpuvendor" if ($cpuvendor); |
||||
+ $out .= ", bank $bank_name" if ($bank_name); |
||||
+ $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); |
||||
+ $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); |
||||
+ $out .= ", $mc_location" if ($mc_location); |
||||
+ $out .= ", $user_action" if ($user_action); |
||||
+ $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); |
||||
+ $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); |
||||
+ $out .= sprintf ", status=0x%08x", $status if ($status); |
||||
+ $out .= sprintf ", addr=0x%08x", $addr if ($addr); |
||||
+ $out .= sprintf ", misc=0x%08x", $misc if ($misc); |
||||
+ $out .= sprintf ", ip=0x%08x", $ip if ($ip); |
||||
+ $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); |
||||
+ $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); |
||||
+ $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); |
||||
+ $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); |
||||
+ $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); |
||||
+ $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); |
||||
+ $out .= sprintf ", cs=0x%08x", $cs if ($cs); |
||||
+ $out .= sprintf ", bank=0x%08x", $bank if ($bank); |
||||
+ |
||||
+ $out .= "\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "Memory controller events:\n$out\n"; |
||||
+ } else { |
||||
+ print "No Memory errors.\n\n"; |
||||
+ } |
||||
$query_handle->finish; |
||||
+ |
||||
undef($dbh); |
||||
} |
||||
|
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,53 @@
@@ -0,0 +1,53 @@
|
||||
From dc811f88b1bd5ac33faa1606c3a3ce4d3bc0b7ed Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
Date: Fri, 31 May 2013 16:40:40 -0300 |
||||
Subject: [PATCH 10/32] ras-mc-ctl: Fix the name of the error table data |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 12 ++++++------ |
||||
1 files changed, 6 insertions(+), 6 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 30d3078..48d9b00 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -898,9 +898,9 @@ sub errors |
||||
$out .= "$id $time $count $type error(s): $msg at $label location: $mc:$top:$mid:$low, addr $addr, grain $grain, syndrome $syndrome $detail\n"; |
||||
} |
||||
if ($out ne "") { |
||||
- print "PCIe AER events:\n$out\n"; |
||||
+ print "Memory controller events:\n$out\n"; |
||||
} else { |
||||
- print "No PCIe AER errors.\n\n"; |
||||
+ print "No Memory errors.\n\n"; |
||||
} |
||||
$query_handle->finish; |
||||
|
||||
@@ -914,9 +914,9 @@ sub errors |
||||
$out .= "$id $time $type error: $msg\n"; |
||||
} |
||||
if ($out ne "") { |
||||
- print "MCE events:\n$out\n"; |
||||
+ print "PCIe AER events:\n$out\n"; |
||||
} else { |
||||
- print "No MCE errors.\n\n"; |
||||
+ print "No PCIe AER errors.\n\n"; |
||||
} |
||||
$query_handle->finish; |
||||
|
||||
@@ -952,9 +952,9 @@ sub errors |
||||
$out .= "\n"; |
||||
} |
||||
if ($out ne "") { |
||||
- print "Memory controller events:\n$out\n"; |
||||
+ print "MCE events:\n$out\n"; |
||||
} else { |
||||
- print "No Memory errors.\n\n"; |
||||
+ print "No MCE errors.\n\n"; |
||||
} |
||||
$query_handle->finish; |
||||
|
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,36 @@
@@ -0,0 +1,36 @@
|
||||
From 099af4056912faa28bf1385fffa77e7bbb468b93 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
Date: Thu, 15 Aug 2013 12:43:02 -0300 |
||||
Subject: [PATCH 13/32] ras-mc-ctl: Improve parser |
||||
|
||||
Accept either . or : as layers separator at config files. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 4 ++-- |
||||
1 files changed, 2 insertions(+), 2 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 48d9b00..f5a8ce5 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -481,14 +481,14 @@ sub parse_dimm_labels_file |
||||
|
||||
next unless (my ($label, $info) = ($str =~ /^(.*)\s*:\s*(.*)$/i)); |
||||
|
||||
- unless ($info =~ /\d+(?:\.\d+)*/) { |
||||
+ unless ($info =~ /\d+(?:[\.\:]\d+)*/) { |
||||
log_error ("$file: $line: Invalid syntax, ignoring: \"$_\"\n"); |
||||
next; |
||||
} |
||||
|
||||
for my $target (split (/[, ]+/, $info)) { |
||||
my $n; |
||||
- my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:\.(\d+)){0,1}(?:\.(\d+)){0,1}(?:\.(\d+)){0,1}(?:\.(\d+)){0,1}/); |
||||
+ my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}/); |
||||
|
||||
if (defined($extra)) { |
||||
die ("Error: Only up to 3 layers are currently supported on label db \"$file\"\n"); |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,77 @@
@@ -0,0 +1,77 @@
|
||||
From 0d53728f9cbdca5a1bd32c51a121dd1162f50e95 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
Date: Thu, 15 Aug 2013 12:45:18 -0300 |
||||
Subject: [PATCH 14/32] ras-mc-ctl: Fix label register with 2 layers |
||||
|
||||
When there aren't 3 layers, label print/register weren't working. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 19 +++++++++++++------ |
||||
1 files changed, 13 insertions(+), 6 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index f5a8ce5..a7137be 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -508,7 +508,6 @@ sub parse_dimm_labels_file |
||||
} |
||||
map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } |
||||
@models; |
||||
- $n = 3; |
||||
} |
||||
if (!$num) { |
||||
$num = $n; |
||||
@@ -542,9 +541,13 @@ sub parse_dimm_labels |
||||
|
||||
sub read_dimm_label |
||||
{ |
||||
- my ($mc, $top, $mid, $low) = @_; |
||||
+ my ($num_layers, $mc, $top, $mid, $low) = @_; |
||||
my $sysfs = "/sys/devices/system/edac/mc"; |
||||
- my $pos = "$mc:$top:$mid:$low"; |
||||
+ my $pos; |
||||
+ |
||||
+ $pos = "$mc:$top:$mid:$low" if ($num_layers == 3); |
||||
+ $pos = "$mc:$top:$mid" if ($num_layers == 2); |
||||
+ $pos = "$mc:$top" if ($num_layers == 1); |
||||
|
||||
if (!defined($dimm_node{$pos})) { |
||||
my $label = "$pos missing"; |
||||
@@ -574,10 +577,14 @@ sub read_dimm_label |
||||
|
||||
sub get_dimm_label_node |
||||
{ |
||||
- my ($mc, $top, $mid, $low) = @_; |
||||
+ my ($num_layers, $mc, $top, $mid, $low) = @_; |
||||
my $sysfs = "/sys/devices/system/edac/mc"; |
||||
my $pos = "$mc:$top:$mid:$low"; |
||||
|
||||
+ $pos = "$mc:$top:$mid:$low" if ($num_layers == 3); |
||||
+ $pos = "$mc:$top:$mid" if ($num_layers == 2); |
||||
+ $pos = "$mc:$top" if ($num_layers == 1); |
||||
+ |
||||
return "" if (!defined($dimm_node{$pos})); |
||||
|
||||
my $dimm = $dimm_node{$pos}; |
||||
@@ -611,7 +618,7 @@ sub print_dimm_labels |
||||
for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) { |
||||
for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) { |
||||
my $label = $$lref{$vendor}{$model}{$mc}{$top}{$mid}{$low}; |
||||
- my ($rlabel,$loc) = read_dimm_label ($mc, $top, $mid, $low); |
||||
+ my ($rlabel,$loc) = read_dimm_label ($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low); |
||||
|
||||
printf $fh $format, $loc, $label, $rlabel; |
||||
} |
||||
@@ -645,7 +652,7 @@ sub register_dimm_labels |
||||
for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) { |
||||
for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) { |
||||
|
||||
- my $file = get_dimm_label_node($mc, $top, $mid, $low); |
||||
+ my $file = get_dimm_label_node($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low); |
||||
|
||||
# Ignore sysfs files that don't exist. Might just be |
||||
# unpopulated bank. |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,44 @@
@@ -0,0 +1,44 @@
|
||||
From 74d84ba18f4f1d7097b47ce1c2e41e332d197dfb Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
Date: Thu, 15 Aug 2013 12:58:02 -0300 |
||||
Subject: [PATCH 15/32] Add an example of labels file |
||||
|
||||
This is an example of a labels file for a Dell Power Edge T620. |
||||
|
||||
For now, only DIMMs A1 and B1 are tested here. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
labels/dell | 20 ++++++++++++++++++++ |
||||
1 files changed, 20 insertions(+), 0 deletions(-) |
||||
create mode 100644 labels/dell |
||||
|
||||
diff --git a/labels/dell b/labels/dell |
||||
new file mode 100644 |
||||
index 0000000..e1a09a7 |
||||
--- /dev/null |
||||
+++ b/labels/dell |
||||
@@ -0,0 +1,20 @@ |
||||
+# RASDAEMON Motherboard DIMM labels Database file. |
||||
+# |
||||
+# Vendor-name and model-name are found from the program 'dmidecode' |
||||
+# labels are found from the silk screen on the motherboard. |
||||
+# |
||||
+#Vendor: <vendor-name> |
||||
+# Model: <model-name> |
||||
+# <label>: <mc>.<top>.<mid>.<low> |
||||
+# |
||||
+ |
||||
+Vendor: Dell Inc. |
||||
+ |
||||
+ Model: 0F5XM3 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2; DIMM_A4: 0.0.3; |
||||
+ DIMM_A5: 0.1.0; DIMM_A6: 0.1.1; DIMM_A7: 0.1.2; DIMM_A8: 0.1.3; |
||||
+ DIMM_A9: 0.2.0; DIMM_A10: 0.2.1; DIMM_A11: 0.2.2; DIMM_A12: 0.2.3; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2; DIMM_B4: 1.0.3; |
||||
+ DIMM_B5: 1.1.0; DIMM_B6: 1.1.1; DIMM_B7: 1.1.2; DIMM_B8: 1.1.3; |
||||
+ DIMM_B9: 1.2.0; DIMM_B10: 1.2.1; DIMM_B11: 1.2.2; DIMM_B12: 1.2.3; |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,76 @@
@@ -0,0 +1,76 @@
|
||||
From b8bb2ed4a751516d32373e478e5c9ea9f16b524d Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
Date: Thu, 15 Aug 2013 17:13:43 -0300 |
||||
Subject: [PATCH 17/32] ras-mc-ctl: Fix the DIMM layout display |
||||
|
||||
The items weren't being presented at the right order. Fix it. |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 26 ++++++++++++++++++++------ |
||||
1 files changed, 20 insertions(+), 6 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index a7137be..196a643 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -673,15 +673,15 @@ sub register_dimm_labels |
||||
return 1; |
||||
} |
||||
|
||||
-sub dimm_display_layer($@); |
||||
+sub dimm_display_layer_rev($@); |
||||
|
||||
-sub dimm_display_layer($@) |
||||
+sub dimm_display_layer_rev($@) |
||||
{ |
||||
my $layer = shift; |
||||
my @pos = @_; |
||||
|
||||
- $layer--; |
||||
- if ($layer < 0) { |
||||
+ $layer++; |
||||
+ if ($layer >= scalar(@pos) - 1) { |
||||
my $str_loc = join(':', @pos); |
||||
my $size = $dimm_size{$str_loc}; |
||||
if (!$size) { |
||||
@@ -695,12 +695,26 @@ sub dimm_display_layer($@) |
||||
my $s; |
||||
for (my $i = 0; $i <= $max_pos[$layer]; $i++) { |
||||
$pos[$layer] = $i; |
||||
- $s .= dimm_display_layer($layer, @pos); |
||||
+ $s .= dimm_display_layer_rev($layer, @pos); |
||||
} |
||||
|
||||
return $s; |
||||
} |
||||
|
||||
+sub dimm_display_layer(@) |
||||
+{ |
||||
+ my @pos = @_; |
||||
+ |
||||
+ my $s; |
||||
+ for (my $i = 0; $i <= $max_pos[0]; $i++) { |
||||
+ $pos[0] = $i; |
||||
+ $s .= dimm_display_layer_rev(0, @pos); |
||||
+ } |
||||
+ |
||||
+ return $s; |
||||
+} |
||||
+ |
||||
+ |
||||
sub dimm_display_layer_header($$) |
||||
{ |
||||
my $n_items = 1; |
||||
@@ -753,7 +767,7 @@ sub dimm_display_mem() |
||||
my $p1 = length($s) - 1; |
||||
|
||||
$pos[scalar(@pos) - 1] = $d; |
||||
- $s .= dimm_display_layer(scalar(@pos) - 1, @pos); |
||||
+ $s .= dimm_display_layer(@pos); |
||||
$len += length($s); |
||||
|
||||
$sep = "-" x $p1; |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,29 @@
@@ -0,0 +1,29 @@
|
||||
From 2afbcd81173822014d6d73e98e9093a140bb1421 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Fri, 6 Dec 2013 09:45:14 -0500 |
||||
Subject: [PATCH 19/32] ras-mc-ctl: remove completely use of modprobe |
||||
|
||||
While verifying SELinux policies, this popped up. ras-mc-ctl inherited a |
||||
modprobe lookup that ends up never being used. This patch gets rid of |
||||
it. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 1 - |
||||
1 files changed, 0 insertions(+), 1 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 196a643..ef0d9bc 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -39,7 +39,6 @@ my $dbname = "@RASSTATEDIR@/@RAS_DB_FNAME@"; |
||||
my $prefix = "@prefix@"; |
||||
my $sysconfdir = "@sysconfdir@"; |
||||
my $dmidecode = find_prog ("dmidecode"); |
||||
-my $modprobe = find_prog ("modprobe") or exit (1); |
||||
|
||||
my %conf = (); |
||||
my %bus = (); |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
From 78465e5047b226011c1a4c916c79c63fb6e68f71 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
Date: Fri, 14 Feb 2014 05:11:26 +0900 |
||||
Subject: [PATCH 22/32] mce-amd-k8.c: fix a warning |
||||
MIME-Version: 1.0 |
||||
Content-Type: text/plain; charset=UTF-8 |
||||
Content-Transfer-Encoding: 8bit |
||||
|
||||
mce-amd-k8.c: In function ‘bank_name’: |
||||
mce-amd-k8.c:250:22: warning: argument to ‘sizeof’ in ‘snprintf’ call is the same expression as the destination; did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess] |
||||
snprintf(buf, sizeof(buf), "%s (bank=%d)", s, e->bank); |
||||
^ |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
mce-amd-k8.c | 3 +-- |
||||
1 files changed, 1 insertions(+), 2 deletions(-) |
||||
|
||||
diff --git a/mce-amd-k8.c b/mce-amd-k8.c |
||||
index 5e21b55..8179f74 100644 |
||||
--- a/mce-amd-k8.c |
||||
+++ b/mce-amd-k8.c |
||||
@@ -236,7 +236,6 @@ static void decode_k8_threashold(struct mce_event *e) |
||||
|
||||
static void bank_name(struct mce_event *e) |
||||
{ |
||||
- char *buf = e->bank_name; |
||||
const char *s; |
||||
|
||||
if (e->bank < ARRAY_SIZE(k8bank)) |
||||
@@ -247,7 +246,7 @@ static void bank_name(struct mce_event *e) |
||||
else |
||||
return; /* Use the generic parser for bank */ |
||||
|
||||
- snprintf(buf, sizeof(buf), "%s (bank=%d)", s, e->bank); |
||||
+ mce_snprintf(e->bank_name, "%s (bank=%d)", s, e->bank); |
||||
} |
||||
|
||||
int parse_amd_k8_event(struct ras_events *ras, struct mce_event *e) |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,641 @@
@@ -0,0 +1,641 @@
|
||||
From c6ed1e1af9356cdce1eaa652061dd6e4eb32d283 Mon Sep 17 00:00:00 2001 |
||||
From: Junliang Li <lijunliang.dna@gmail.com> |
||||
Date: Thu, 13 Feb 2014 10:39:53 +0800 |
||||
Subject: [PATCH 23/32] add abrt suppport for rasdaemon |
||||
|
||||
Adds abrt as another error mechanism for the rasdaemon. |
||||
This patch does: |
||||
|
||||
1) read ras event (mc,mce and aer) |
||||
|
||||
2) setup a abrt-server unix socket |
||||
|
||||
3) write messages follow ABRT server protocol, set event |
||||
info into backtrace zone. |
||||
|
||||
4) commit report. |
||||
|
||||
For now, it depends on ABRT to limit flood reports. |
||||
|
||||
Signed-off-by: Junliang Li <lijunliang.dna@gmail.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
Makefile.am | 5 +- |
||||
configure.ac | 9 + |
||||
ras-aer-handler.c | 6 + |
||||
ras-events.h | 3 + |
||||
ras-mc-handler.c | 7 + |
||||
ras-mce-handler.c | 6 + |
||||
ras-report.c | 429 +++++++++++++++++++++++++++++++++++++++++++++++++++++ |
||||
ras-report.h | 39 +++++ |
||||
8 files changed, 503 insertions(+), 1 deletions(-) |
||||
create mode 100644 ras-report.c |
||||
create mode 100644 ras-report.h |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index 473ce98..c1668b4 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -17,10 +17,13 @@ if WITH_MCE |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
mce-intel-sb.c mce-intel-ivb.c |
||||
endif |
||||
+if WITH_ABRT_REPORT |
||||
+ rasdaemon_SOURCES += ras-report.c |
||||
+endif |
||||
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a |
||||
|
||||
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ |
||||
- ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h |
||||
+ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h |
||||
|
||||
# This rule can't be called with more than one Makefile job (like make -j8) |
||||
# I can't figure out a way to fix that |
||||
diff --git a/configure.ac b/configure.ac |
||||
index 4fe6ef2..0ea962e 100644 |
||||
--- a/configure.ac |
||||
+++ b/configure.ac |
||||
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [ |
||||
]) |
||||
AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes]) |
||||
|
||||
+AC_ARG_ENABLE([abrt_report], |
||||
+ AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) |
||||
+ |
||||
+AS_IF([test "x$enable_abrt_report" = "xyes"], [ |
||||
+ AC_DEFINE(HAVE_ABRT_REPORT,1,"have report event to ABRT") |
||||
+ AC_SUBST([WITH_ABRT_REPORT]) |
||||
+]) |
||||
+AM_CONDITIONAL([WITH_ABRT_REPORT], [test x$enable_abrt_report = xyes]) |
||||
+ |
||||
test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc |
||||
|
||||
CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" |
||||
diff --git a/ras-aer-handler.c b/ras-aer-handler.c |
||||
index e5abaca..50526af 100644 |
||||
--- a/ras-aer-handler.c |
||||
+++ b/ras-aer-handler.c |
||||
@@ -24,6 +24,7 @@ |
||||
#include "ras-record.h" |
||||
#include "ras-logger.h" |
||||
#include "bitfield.h" |
||||
+#include "ras-report.h" |
||||
|
||||
static const char *aer_errors[32] = { |
||||
/* Correctable errors */ |
||||
@@ -115,5 +116,10 @@ int ras_aer_event_handler(struct trace_seq *s, |
||||
ras_store_aer_event(ras, &ev); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ /* Report event to ABRT */ |
||||
+ ras_report_aer_event(ras, &ev); |
||||
+#endif |
||||
+ |
||||
return 0; |
||||
} |
||||
diff --git a/ras-events.h b/ras-events.h |
||||
index 554a95e..64e045a 100644 |
||||
--- a/ras-events.h |
||||
+++ b/ras-events.h |
||||
@@ -47,6 +47,9 @@ struct ras_events { |
||||
|
||||
/* For the mce handler */ |
||||
struct mce_priv *mce_priv; |
||||
+ |
||||
+ /* For ABRT socket*/ |
||||
+ int socketfd; |
||||
}; |
||||
|
||||
struct pthread_data { |
||||
diff --git a/ras-mc-handler.c b/ras-mc-handler.c |
||||
index 5c24f65..ffb3805 100644 |
||||
--- a/ras-mc-handler.c |
||||
+++ b/ras-mc-handler.c |
||||
@@ -23,6 +23,7 @@ |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-record.h" |
||||
#include "ras-logger.h" |
||||
+#include "ras-report.h" |
||||
|
||||
int ras_mc_event_handler(struct trace_seq *s, |
||||
struct pevent_record *record, |
||||
@@ -189,6 +190,12 @@ int ras_mc_event_handler(struct trace_seq *s, |
||||
/* Insert data into the SGBD */ |
||||
|
||||
ras_store_mc_event(ras, &ev); |
||||
+ |
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ /* Report event to ABRT */ |
||||
+ ras_report_mc_event(ras, &ev); |
||||
+#endif |
||||
+ |
||||
return 0; |
||||
|
||||
parse_error: |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 59e8d05..1431049 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -26,6 +26,7 @@ |
||||
#include "ras-mce-handler.h" |
||||
#include "ras-record.h" |
||||
#include "ras-logger.h" |
||||
+#include "ras-report.h" |
||||
|
||||
/* |
||||
* The code below were adapted from Andi Kleen/Intel/SuSe mcelog code, |
||||
@@ -401,5 +402,10 @@ int ras_mce_event_handler(struct trace_seq *s, |
||||
ras_store_mce_record(ras, &e); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ /* Report event to ABRT */ |
||||
+ ras_report_mce_event(ras, &e); |
||||
+#endif |
||||
+ |
||||
return 0; |
||||
} |
||||
diff --git a/ras-report.c b/ras-report.c |
||||
new file mode 100644 |
||||
index 0000000..d3e4a79 |
||||
--- /dev/null |
||||
+++ b/ras-report.c |
||||
@@ -0,0 +1,429 @@ |
||||
+#include <stdio.h> |
||||
+#include <string.h> |
||||
+#include <unistd.h> |
||||
+#include <sys/types.h> |
||||
+#include <sys/utsname.h> |
||||
+#include <sys/socket.h> |
||||
+#include <sys/un.h> |
||||
+ |
||||
+#include "ras-report.h" |
||||
+ |
||||
+static int setup_report_socket(void){ |
||||
+ int sockfd = -1; |
||||
+ int rc = -1; |
||||
+ struct sockaddr_un addr; |
||||
+ |
||||
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0); |
||||
+ if (sockfd < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ memset(&addr, 0, sizeof(struct sockaddr_un)); |
||||
+ addr.sun_family = AF_UNIX; |
||||
+ strncpy(addr.sun_path, ABRT_SOCKET, strlen(ABRT_SOCKET)); |
||||
+ |
||||
+ rc = connect(sockfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)); |
||||
+ if (rc < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ return sockfd; |
||||
+} |
||||
+ |
||||
+static int commit_report_basic(int sockfd){ |
||||
+ char buf[INPUT_BUFFER_SIZE]; |
||||
+ struct utsname un; |
||||
+ int rc = -1; |
||||
+ |
||||
+ if(sockfd < 0){ |
||||
+ return rc; |
||||
+ } |
||||
+ |
||||
+ memset(buf, 0, INPUT_BUFFER_SIZE); |
||||
+ memset(&un, 0, sizeof(struct utsname)); |
||||
+ |
||||
+ rc = uname(&un); |
||||
+ if(rc < 0){ |
||||
+ return rc; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * ABRT server protocol |
||||
+ */ |
||||
+ sprintf(buf, "PUT / HTTP/1.1\r\n\r\n"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "PID=%d", (int)getpid()); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "EXECUTABLE=/boot/vmlinuz-%s", un.release); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "BASENAME=%s", "rasdaemon"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+/* |
||||
+ * add "DONE" string to finish message. |
||||
+ */ |
||||
+static int commit_report_done(int sockfd){ |
||||
+ int rc = -1; |
||||
+ |
||||
+ if(sockfd < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ rc = write(sockfd, "DONE\0", strlen("DONE\0")); |
||||
+ if(rc < strlen("DONE\0")){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){ |
||||
+ char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
+ |
||||
+ if(!buf || !ev) |
||||
+ return -1; |
||||
+ |
||||
+ sprintf(bt_buf, "BACKTRACE= " \ |
||||
+ "timestamp=%s\n" \ |
||||
+ "error_count=%d\n" \ |
||||
+ "error_type=%s\n" \ |
||||
+ "msg=%s\n" \ |
||||
+ "label=%s\n" \ |
||||
+ "mc_index=%c\n" \ |
||||
+ "top_layer=%c\n" \ |
||||
+ "middle_layer=%c\n" \ |
||||
+ "lower_layer=%c\n" \ |
||||
+ "address=%llu\n" \ |
||||
+ "grain=%llu\n" \ |
||||
+ "syndrome=%llu\n" \ |
||||
+ "driver_detail=%s\n", \ |
||||
+ ev->timestamp, \ |
||||
+ ev->error_count, \ |
||||
+ ev->error_type, \ |
||||
+ ev->msg, \ |
||||
+ ev->label, \ |
||||
+ ev->mc_index, \ |
||||
+ ev->top_layer, \ |
||||
+ ev->middle_layer, \ |
||||
+ ev->lower_layer, \ |
||||
+ ev->address, \ |
||||
+ ev->grain, \ |
||||
+ ev->syndrome, \ |
||||
+ ev->driver_detail); |
||||
+ |
||||
+ strcat(buf, bt_buf); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+static int set_mce_event_backtrace(char *buf, struct mce_event *ev){ |
||||
+ char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
+ |
||||
+ if(!buf || !ev) |
||||
+ return -1; |
||||
+ |
||||
+ sprintf(bt_buf, "BACKTRACE=" \ |
||||
+ "timestamp=%s\n" \ |
||||
+ "bank_name=%s\n" \ |
||||
+ "error_msg=%s\n" \ |
||||
+ "mcgstatus_msg=%s\n" \ |
||||
+ "mcistatus_msg=%s\n" \ |
||||
+ "mcastatus_msg=%s\n" \ |
||||
+ "user_action=%s\n" \ |
||||
+ "mc_location=%s\n" \ |
||||
+ "mcgcap=%lu\n" \ |
||||
+ "mcgstatus=%lu\n" \ |
||||
+ "status=%lu\n" \ |
||||
+ "addr=%lu\n" \ |
||||
+ "misc=%lu\n" \ |
||||
+ "ip=%lu\n" \ |
||||
+ "tsc=%lu\n" \ |
||||
+ "walltime=%lu\n" \ |
||||
+ "cpu=%u\n" \ |
||||
+ "cpuid=%u\n" \ |
||||
+ "apicid=%u\n" \ |
||||
+ "socketid=%u\n" \ |
||||
+ "cs=%d\n" \ |
||||
+ "bank=%d\n" \ |
||||
+ "cpuvendor=%d\n", \ |
||||
+ ev->timestamp, \ |
||||
+ ev->bank_name, \ |
||||
+ ev->error_msg, \ |
||||
+ ev->mcgstatus_msg, \ |
||||
+ ev->mcistatus_msg, \ |
||||
+ ev->mcastatus_msg, \ |
||||
+ ev->user_action, \ |
||||
+ ev->mc_location, \ |
||||
+ ev->mcgcap, \ |
||||
+ ev->mcgstatus, \ |
||||
+ ev->status, \ |
||||
+ ev->addr, \ |
||||
+ ev->misc, \ |
||||
+ ev->ip, \ |
||||
+ ev->tsc, \ |
||||
+ ev->walltime, \ |
||||
+ ev->cpu, \ |
||||
+ ev->cpuid, \ |
||||
+ ev->apicid, \ |
||||
+ ev->socketid, \ |
||||
+ ev->cs, \ |
||||
+ ev->bank, \ |
||||
+ ev->cpuvendor); |
||||
+ |
||||
+ strcat(buf, bt_buf); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){ |
||||
+ char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
+ |
||||
+ if(!buf || !ev) |
||||
+ return -1; |
||||
+ |
||||
+ sprintf(bt_buf, "BACKTRACE=" \ |
||||
+ "timestamp=%s\n" \ |
||||
+ "error_type=%s\n" \ |
||||
+ "dev_name=%s\n" \ |
||||
+ "msg=%s\n", \ |
||||
+ ev->timestamp, \ |
||||
+ ev->error_type, \ |
||||
+ ev->dev_name, \ |
||||
+ ev->msg); |
||||
+ |
||||
+ strcat(buf, bt_buf); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+static int commit_report_backtrace(int sockfd, int type, void *ev){ |
||||
+ char buf[MAX_BACKTRACE_SIZE]; |
||||
+ char *pbuf = buf; |
||||
+ int rc = -1; |
||||
+ int buf_len = 0; |
||||
+ |
||||
+ if(sockfd < 0 || !ev){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ memset(buf, 0, MAX_BACKTRACE_SIZE); |
||||
+ |
||||
+ switch(type){ |
||||
+ case MC_EVENT: |
||||
+ rc = set_mc_event_backtrace(buf, (struct ras_mc_event *)ev); |
||||
+ break; |
||||
+ case AER_EVENT: |
||||
+ rc = set_aer_event_backtrace(buf, (struct ras_aer_event *)ev); |
||||
+ break; |
||||
+ case MCE_EVENT: |
||||
+ rc = set_mce_event_backtrace(buf, (struct mce_event *)ev); |
||||
+ break; |
||||
+ default: |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ if(rc < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ buf_len = strlen(buf); |
||||
+ |
||||
+ for(;buf_len > INPUT_BUFFER_SIZE - 1; buf_len -= (INPUT_BUFFER_SIZE - 1)){ |
||||
+ rc = write(sockfd, pbuf, INPUT_BUFFER_SIZE - 1); |
||||
+ if(rc < INPUT_BUFFER_SIZE - 1){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ pbuf = pbuf + INPUT_BUFFER_SIZE - 1; |
||||
+ } |
||||
+ |
||||
+ rc = write(sockfd, pbuf, buf_len + 1); |
||||
+ if(rc < buf_len){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ |
||||
+ char buf[MAX_MESSAGE_SIZE]; |
||||
+ int sockfd = -1; |
||||
+ int done = 0; |
||||
+ int rc = -1; |
||||
+ |
||||
+ memset(buf, 0, sizeof(buf)); |
||||
+ |
||||
+ sockfd = setup_report_socket(); |
||||
+ if(sockfd < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_basic(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto mc_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_backtrace(sockfd, MC_EVENT, ev); |
||||
+ if(rc < 0){ |
||||
+ goto mc_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-mc"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto mc_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "REASON=%s", "EDAC driver report problem"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto mc_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_done(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto mc_fail; |
||||
+ } |
||||
+ |
||||
+ done = 1; |
||||
+ |
||||
+mc_fail: |
||||
+ |
||||
+ if(sockfd > 0){ |
||||
+ close(sockfd); |
||||
+ } |
||||
+ |
||||
+ if(done){ |
||||
+ return 0; |
||||
+ }else{ |
||||
+ return -1; |
||||
+ } |
||||
+} |
||||
+ |
||||
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ |
||||
+ char buf[MAX_MESSAGE_SIZE]; |
||||
+ int sockfd = 0; |
||||
+ int done = 0; |
||||
+ int rc = -1; |
||||
+ |
||||
+ memset(buf, 0, sizeof(buf)); |
||||
+ |
||||
+ sockfd = setup_report_socket(); |
||||
+ if(sockfd < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_basic(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto aer_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_backtrace(sockfd, AER_EVENT, ev); |
||||
+ if(rc < 0){ |
||||
+ goto aer_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-aer"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto aer_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "REASON=%s", "PCIe AER driver report problem"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto aer_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_done(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto aer_fail; |
||||
+ } |
||||
+ |
||||
+ done = 1; |
||||
+ |
||||
+aer_fail: |
||||
+ |
||||
+ if(sockfd > 0){ |
||||
+ close(sockfd); |
||||
+ } |
||||
+ |
||||
+ if(done){ |
||||
+ return 0; |
||||
+ }else{ |
||||
+ return -1; |
||||
+ } |
||||
+} |
||||
+ |
||||
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ |
||||
+ char buf[MAX_MESSAGE_SIZE]; |
||||
+ int sockfd = 0; |
||||
+ int done = 0; |
||||
+ int rc = -1; |
||||
+ |
||||
+ memset(buf, 0, sizeof(buf)); |
||||
+ |
||||
+ sockfd = setup_report_socket(); |
||||
+ if(sockfd < 0){ |
||||
+ return -1; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_basic(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto mce_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_backtrace(sockfd, MCE_EVENT, ev); |
||||
+ if(rc < 0){ |
||||
+ goto mce_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-mce"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto mce_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "REASON=%s", "Machine Check driver report problem"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto mce_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_done(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto mce_fail; |
||||
+ } |
||||
+ |
||||
+ done = 1; |
||||
+ |
||||
+mce_fail: |
||||
+ |
||||
+ if(sockfd > 0){ |
||||
+ close(sockfd); |
||||
+ } |
||||
+ |
||||
+ if(done){ |
||||
+ return 0; |
||||
+ }else{ |
||||
+ return -1; |
||||
+ } |
||||
+} |
||||
diff --git a/ras-report.h b/ras-report.h |
||||
new file mode 100644 |
||||
index 0000000..7920cdf |
||||
--- /dev/null |
||||
+++ b/ras-report.h |
||||
@@ -0,0 +1,39 @@ |
||||
+#ifndef __RAS_REPORT_H |
||||
+#define __RAS_REPORT_H |
||||
+ |
||||
+#include "ras-record.h" |
||||
+#include "ras-events.h" |
||||
+#include "ras-mc-handler.h" |
||||
+#include "ras-mce-handler.h" |
||||
+#include "ras-aer-handler.h" |
||||
+ |
||||
+/* Maximal length of backtrace. */ |
||||
+#define MAX_BACKTRACE_SIZE (1024*1024) |
||||
+/* Amount of data received from one client for a message before reporting error. */ |
||||
+#define MAX_MESSAGE_SIZE (4*MAX_BACKTRACE_SIZE) |
||||
+/* Maximal number of characters read from socket at once. */ |
||||
+#define INPUT_BUFFER_SIZE (8*1024) |
||||
+/* ABRT socket file */ |
||||
+#define ABRT_SOCKET "/var/run/abrt/abrt.socket" |
||||
+ |
||||
+enum { |
||||
+ MC_EVENT, |
||||
+ MCE_EVENT, |
||||
+ AER_EVENT |
||||
+}; |
||||
+ |
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ |
||||
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); |
||||
+ |
||||
+#else |
||||
+ |
||||
+static inline int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
||||
+static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
+static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
+ |
||||
+#endif |
||||
+ |
||||
+#endif |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
From d1b81490639f2608ecaf8fa50c24ac78c053fc2b Mon Sep 17 00:00:00 2001 |
||||
From: Betty Dall <betty.dall@hp.com> |
||||
Date: Wed, 19 Mar 2014 14:59:47 -0600 |
||||
Subject: [PATCH 26/32] rasdaemon: Add record option to rasdaemon man page |
||||
|
||||
Add the already existing rasdaemon option 'record' to the rasdaemon man |
||||
page. This option records events via sqlite3. |
||||
|
||||
Signed-off-by: Betty Dall <betty.dall@hp.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
man/rasdaemon.1.in | 14 +++++++++++--- |
||||
1 files changed, 11 insertions(+), 3 deletions(-) |
||||
|
||||
diff --git a/man/rasdaemon.1.in b/man/rasdaemon.1.in |
||||
index 5349fa3..7a8b60f 100644 |
||||
--- a/man/rasdaemon.1.in |
||||
+++ b/man/rasdaemon.1.in |
||||
@@ -29,8 +29,10 @@ rasdaemon \- RAS daemon to log the RAS events. |
||||
|
||||
.SH DESCRIPTION |
||||
|
||||
-The \fBrasdaemon\fR program is a daemon with monitors the RAS trace events |
||||
-from /sys/kernel/debug/tracing, reporting them via syslog/journald. |
||||
+The \fBrasdaemon\fR program is a daemon which monitors the platform |
||||
+Reliablity, Availability and Serviceability (RAS) reports from the |
||||
+Linux kernel trace events. These trace events are logged in |
||||
+/sys/kernel/debug/tracing, reporting them via syslog/journald. |
||||
|
||||
.SH OPTIONS |
||||
.TP |
||||
@@ -51,8 +53,14 @@ Executes in foreground, printing the events at console. Useful for testing it, |
||||
and to be used by systemd or Unix System V respan. |
||||
If not specified, the program runs in daemon mode. |
||||
.TP |
||||
+.BI "--record" |
||||
+Record RAS events via Sqlite3. The Sqlite3 database has the benefit of |
||||
+keeping a persistent record of the RAS events. This feature is used with |
||||
+the ras-mc-ctl utility. Note that rasdaemon may be compiled without this |
||||
+feature. |
||||
+.TP |
||||
.BI "--version" |
||||
-Prints the program version and exit. |
||||
+Print the program version and exit. |
||||
|
||||
.SH SEE ALSO |
||||
\fBras-mc-ctl\fR(8) |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,45 @@
@@ -0,0 +1,45 @@
|
||||
From caa44c3946ddc900896830297c28b90ce5b9034b Mon Sep 17 00:00:00 2001 |
||||
From: Betty Dall <betty.dall@hp.com> |
||||
Date: Wed, 19 Mar 2014 15:54:56 -0600 |
||||
Subject: [PATCH 27/32] ras-mc-ctl: Print useful message when run without rasdaemon -r |
||||
|
||||
The utility script ras-mc-ctl requires that rasdaemon --record be run |
||||
to create the me_event table in the SQLite database. The current behaviour |
||||
is this: |
||||
[root@sa1 util]# ras-mc-ctl --errors |
||||
DBD::SQLite::db prepare failed: no such table: mc_event at |
||||
/usr/local/sbin/ras-mc-ctl line 914. |
||||
Can't call method "execute" on an undefined value at |
||||
/usr/local/sbin/ras-mc-ctl line 915. |
||||
|
||||
With this change, the user sees: |
||||
[root@sa1 util]# ras-mc-ctl --errors |
||||
DBD::SQLite::db prepare failed: no such table: mc_event at |
||||
/usr/local/sbin/ras-mc-ctl line 914. |
||||
ras-mc-ctl: Error: mc_event table missing from |
||||
/usr/local/var/lib/rasdaemon/ras-mc_event.db. Run 'rasdaemon --record'. |
||||
|
||||
Signed-off-by: Betty Dall <betty.dall@hp.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 4 ++++ |
||||
1 files changed, 4 insertions(+), 0 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 196a643..e9f9c59 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -912,6 +912,10 @@ sub errors |
||||
# Memory controller mc_event errors |
||||
$query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id"; |
||||
$query_handle = $dbh->prepare($query); |
||||
+ if (!$query_handle) { |
||||
+ log_error ("mc_event table missing from $dbname. Run 'rasdaemon --record'.\n"); |
||||
+ exit -1 |
||||
+ } |
||||
$query_handle->execute(); |
||||
$query_handle->bind_columns(\($id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail)); |
||||
$out = ""; |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,114 @@
@@ -0,0 +1,114 @@
|
||||
From 4bfa45f56e1500f1cfc8de3fd8d1228b11011e95 Mon Sep 17 00:00:00 2001 |
||||
From: Jakub Filak <jfilak@redhat.com> |
||||
Date: Fri, 21 Feb 2014 15:54:09 +0100 |
||||
Subject: [PATCH 28/32] Make paths in the systemd services configurable |
||||
|
||||
The path to a binary depends on configuration, therefore it is better to |
||||
not use hard coded strings. |
||||
|
||||
Signed-off-by: Jakub Filak <jfilak@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
Makefile.am | 15 ++++++++++++++- |
||||
misc/ras-mc-ctl.service | 10 ---------- |
||||
misc/ras-mc-ctl.service.in | 10 ++++++++++ |
||||
misc/rasdaemon.service | 10 ---------- |
||||
misc/rasdaemon.service.in | 10 ++++++++++ |
||||
5 files changed, 34 insertions(+), 21 deletions(-) |
||||
delete mode 100644 misc/ras-mc-ctl.service |
||||
create mode 100644 misc/ras-mc-ctl.service.in |
||||
delete mode 100644 misc/rasdaemon.service |
||||
create mode 100644 misc/rasdaemon.service.in |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index c1668b4..0fa615f 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -1,6 +1,19 @@ |
||||
ACLOCAL_AMFLAGS=-I m4 |
||||
SUBDIRS = libtrace util man |
||||
-EXTRA_DIST = misc/rasdaemon.service misc/ras-mc-ctl.service |
||||
+SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in |
||||
+SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) |
||||
+EXTRA_DIST = $(SYSTEMD_SERVICES_IN) |
||||
+ |
||||
+# This rule is needed because \@sbindir\@ is expanded to \${exec_prefix\}/sbin |
||||
+# during ./configure phase, therefore it is not possible to add .service.in |
||||
+# files to AC_CONFIG_FILES in configure.ac |
||||
+SUFFIXES = .service.in .service |
||||
+.service.in.service: |
||||
+ sed -e s,\@sbindir\@,$(sbindir),g $< > $@ |
||||
+ |
||||
+# This rule is needed because the service files must be generated on target |
||||
+# system after ./configure phase |
||||
+all-local: $(SYSTEMD_SERVICES) |
||||
|
||||
sbin_PROGRAMS = rasdaemon |
||||
rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \ |
||||
diff --git a/misc/ras-mc-ctl.service b/misc/ras-mc-ctl.service |
||||
deleted file mode 100644 |
||||
index 8a09508..0000000 |
||||
--- a/misc/ras-mc-ctl.service |
||||
+++ /dev/null |
||||
@@ -1,10 +0,0 @@ |
||||
-[Unit] |
||||
-Description=Initialize EDAC v3.0.0 Drivers For Machine Hardware |
||||
- |
||||
-[Service] |
||||
-Type=oneshot |
||||
-ExecStart=/usr/sbin/ras-mc-ctl --register-labels |
||||
-RemainAfterExit=yes |
||||
- |
||||
-[Install] |
||||
-WantedBy=multi-user.target |
||||
diff --git a/misc/ras-mc-ctl.service.in b/misc/ras-mc-ctl.service.in |
||||
new file mode 100644 |
||||
index 0000000..8cb3651 |
||||
--- /dev/null |
||||
+++ b/misc/ras-mc-ctl.service.in |
||||
@@ -0,0 +1,10 @@ |
||||
+[Unit] |
||||
+Description=Initialize EDAC v3.0.0 Drivers For Machine Hardware |
||||
+ |
||||
+[Service] |
||||
+Type=oneshot |
||||
+ExecStart=@sbindir@/ras-mc-ctl --register-labels |
||||
+RemainAfterExit=yes |
||||
+ |
||||
+[Install] |
||||
+WantedBy=multi-user.target |
||||
diff --git a/misc/rasdaemon.service b/misc/rasdaemon.service |
||||
deleted file mode 100644 |
||||
index 36cdef5..0000000 |
||||
--- a/misc/rasdaemon.service |
||||
+++ /dev/null |
||||
@@ -1,10 +0,0 @@ |
||||
-[Unit] |
||||
-Description=RAS daemon to log the RAS events |
||||
-After=syslog.target |
||||
- |
||||
-[Service] |
||||
-ExecStart=/usr/local/sbin/rasdaemon -f |
||||
-Restart=on-abort |
||||
- |
||||
-[Install] |
||||
-WantedBy=multi-user.target |
||||
diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in |
||||
new file mode 100644 |
||||
index 0000000..5e1f375 |
||||
--- /dev/null |
||||
+++ b/misc/rasdaemon.service.in |
||||
@@ -0,0 +1,10 @@ |
||||
+[Unit] |
||||
+Description=RAS daemon to log the RAS events |
||||
+After=syslog.target |
||||
+ |
||||
+[Service] |
||||
+ExecStart=@sbindir@/rasdaemon -f |
||||
+Restart=on-abort |
||||
+ |
||||
+[Install] |
||||
+WantedBy=multi-user.target |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,118 @@
@@ -0,0 +1,118 @@
|
||||
From d7453479e96693ebb5e17b285adf915b67095aad Mon Sep 17 00:00:00 2001 |
||||
From: Jakub Filak <jfilak@redhat.com> |
||||
Date: Wed, 2 Apr 2014 15:03:44 +0200 |
||||
Subject: [PATCH 31/32] Correct ABRT report data |
||||
|
||||
Remove '\0' byte from 'PUT' message because this was superfluous. |
||||
|
||||
Replaced 'BASENAME' item with 'TYPE' item because the first one is no |
||||
longer supported by abrtd and the second one is required. Basically the |
||||
later is a substitute for the first one. |
||||
|
||||
Removed the closing message which is not supported by abrtd. abrtd |
||||
considers that message as a part of the problem report. |
||||
|
||||
Removed a superfluous space from 'Backtrace'. |
||||
|
||||
Signed-off-by: Jakub Filak <jfilak@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
ras-report.c | 41 ++++------------------------------------- |
||||
1 files changed, 4 insertions(+), 37 deletions(-) |
||||
|
||||
diff --git a/ras-report.c b/ras-report.c |
||||
index d3e4a79..0a05732 100644 |
||||
--- a/ras-report.c |
||||
+++ b/ras-report.c |
||||
@@ -51,8 +51,8 @@ static int commit_report_basic(int sockfd){ |
||||
* ABRT server protocol |
||||
*/ |
||||
sprintf(buf, "PUT / HTTP/1.1\r\n\r\n"); |
||||
- rc = write(sockfd, buf, strlen(buf) + 1); |
||||
- if(rc < strlen(buf) + 1){ |
||||
+ rc = write(sockfd, buf, strlen(buf)); |
||||
+ if(rc < strlen(buf)){ |
||||
return -1; |
||||
} |
||||
|
||||
@@ -68,7 +68,7 @@ static int commit_report_basic(int sockfd){ |
||||
return -1; |
||||
} |
||||
|
||||
- sprintf(buf, "BASENAME=%s", "rasdaemon"); |
||||
+ sprintf(buf, "TYPE=%s", "ras"); |
||||
rc = write(sockfd, buf, strlen(buf) + 1); |
||||
if(rc < strlen(buf) + 1){ |
||||
return -1; |
||||
@@ -77,31 +77,13 @@ static int commit_report_basic(int sockfd){ |
||||
return 0; |
||||
} |
||||
|
||||
-/* |
||||
- * add "DONE" string to finish message. |
||||
- */ |
||||
-static int commit_report_done(int sockfd){ |
||||
- int rc = -1; |
||||
- |
||||
- if(sockfd < 0){ |
||||
- return -1; |
||||
- } |
||||
- |
||||
- rc = write(sockfd, "DONE\0", strlen("DONE\0")); |
||||
- if(rc < strlen("DONE\0")){ |
||||
- return -1; |
||||
- } |
||||
- |
||||
- return 0; |
||||
-} |
||||
- |
||||
static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){ |
||||
char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
|
||||
if(!buf || !ev) |
||||
return -1; |
||||
|
||||
- sprintf(bt_buf, "BACKTRACE= " \ |
||||
+ sprintf(bt_buf, "BACKTRACE=" \ |
||||
"timestamp=%s\n" \ |
||||
"error_count=%d\n" \ |
||||
"error_type=%s\n" \ |
||||
@@ -298,11 +280,6 @@ int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ |
||||
goto mc_fail; |
||||
} |
||||
|
||||
- rc = commit_report_done(sockfd); |
||||
- if(rc < 0){ |
||||
- goto mc_fail; |
||||
- } |
||||
- |
||||
done = 1; |
||||
|
||||
mc_fail: |
||||
@@ -353,11 +330,6 @@ int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ |
||||
goto aer_fail; |
||||
} |
||||
|
||||
- rc = commit_report_done(sockfd); |
||||
- if(rc < 0){ |
||||
- goto aer_fail; |
||||
- } |
||||
- |
||||
done = 1; |
||||
|
||||
aer_fail: |
||||
@@ -408,11 +380,6 @@ int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ |
||||
goto mce_fail; |
||||
} |
||||
|
||||
- rc = commit_report_done(sockfd); |
||||
- if(rc < 0){ |
||||
- goto mce_fail; |
||||
- } |
||||
- |
||||
done = 1; |
||||
|
||||
mce_fail: |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
From 59f6c44864f914a189cb924dd8fea14cc314bf3f Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 23 Jun 2014 15:43:41 -0400 |
||||
Subject: [PATCH 1/2] rasdaemon: handle failures of snprintf() |
||||
|
||||
Florian Weimer found that in bitfield_msg() the return value of |
||||
snprintf() is used to calculate length ignoring that it can return a |
||||
negative number. This patch makes bitfield_msg() to stop writing in such |
||||
case. |
||||
|
||||
Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1035741 |
||||
|
||||
Reported-by: Florian Weimer <fweimer@redhat.com> |
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
--- |
||||
bitfield.c | 4 ++++ |
||||
1 files changed, 4 insertions(+), 0 deletions(-) |
||||
|
||||
diff --git a/bitfield.c b/bitfield.c |
||||
index b2895b4..1690f15 100644 |
||||
--- a/bitfield.c |
||||
+++ b/bitfield.c |
||||
@@ -41,6 +41,8 @@ unsigned bitfield_msg(char *buf, size_t len, const char **bitarray, |
||||
if (status & (1 << (i + bit_offset))) { |
||||
if (p != buf) { |
||||
n = snprintf(p, len, ", "); |
||||
+ if (n < 0) |
||||
+ break; |
||||
len -= n; |
||||
p += n; |
||||
} |
||||
@@ -48,6 +50,8 @@ unsigned bitfield_msg(char *buf, size_t len, const char **bitarray, |
||||
n = snprintf(p, len, "BIT%d", i + bit_offset); |
||||
else |
||||
n = snprintf(p, len, "%s", bitarray[i]); |
||||
+ if (n < 0) |
||||
+ break; |
||||
len -= n; |
||||
p += n; |
||||
} |
||||
-- |
||||
1.7.1 |
||||
|
@ -0,0 +1,46 @@
@@ -0,0 +1,46 @@
|
||||
From 5ba31285710e85c7d3688e536cd54180321964e4 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 23 Jun 2014 16:31:50 -0400 |
||||
Subject: [PATCH 2/2] rasdaemon: correct range while parsing top, middle and lower layers |
||||
|
||||
{top,middle,lower}_layer are signed char, therefore will never be 255. |
||||
|
||||
Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1035746 |
||||
|
||||
Reported-by: Florian Weimer <fweimer@redhat.com> |
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
--- |
||||
ras-mc-handler.c | 14 +++----------- |
||||
1 file changed, 3 insertions(+), 11 deletions(-) |
||||
|
||||
--- upstream.orig/ras-mc-handler.c 2014-06-26 16:09:30.000000000 -0400 |
||||
+++ upstream/ras-mc-handler.c 2014-06-26 16:09:32.000000000 -0400 |
||||
@@ -120,25 +120,17 @@ if (pevent_get_field_val(s, event, "mc_ |
||||
if (pevent_get_field_val(s, event, "top_layer", record, &val, 1) < 0) |
||||
goto parse_error; |
||||
parsed_fields++; |
||||
+ ev.top_layer = (signed char) val; |
||||
|
||||
- ev.top_layer = (int) val; |
||||
if (pevent_get_field_val(s, event, "middle_layer", record, &val, 1) < 0) |
||||
goto parse_error; |
||||
parsed_fields++; |
||||
+ ev.middle_layer = (signed char) val; |
||||
|
||||
- ev.middle_layer = (int) val; |
||||
if (pevent_get_field_val(s, event, "lower_layer", record, &val, 1) < 0) |
||||
goto parse_error; |
||||
parsed_fields++; |
||||
- |
||||
- ev.lower_layer = (int) val; |
||||
- |
||||
- if (ev.top_layer == 255) |
||||
- ev.top_layer = -1; |
||||
- if (ev.middle_layer == 255) |
||||
- ev.middle_layer = -1; |
||||
- if (ev.lower_layer == 255) |
||||
- ev.lower_layer = -1; |
||||
+ ev.lower_layer = (signed char) val; |
||||
|
||||
if (ev.top_layer >= 0 || ev.middle_layer >= 0 || ev.lower_layer >= 0) { |
||||
if (ev.lower_layer >= 0) |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
--- |
||||
misc/rasdaemon.service.in | 4 +++- |
||||
1 file changed, 3 insertions(+), 1 deletion(-) |
||||
|
||||
--- upstream.orig/misc/rasdaemon.service.in 2014-06-04 14:25:13.000000000 -0400 |
||||
+++ upstream/misc/rasdaemon.service.in 2014-07-08 14:37:26.421395520 -0400 |
||||
@@ -3,7 +3,9 @@ Description=RAS daemon to log the RAS ev |
||||
After=syslog.target |
||||
|
||||
[Service] |
||||
-ExecStart=@sbindir@/rasdaemon -f |
||||
+ExecStart=@sbindir@/rasdaemon -f -r |
||||
+ExecStartPost=@sbindir@/rasdaemon --enable |
||||
+ExecStop=@sbindir@/rasdaemon --disable |
||||
Restart=on-abort |
||||
|
||||
[Install] |
@ -0,0 +1,807 @@
@@ -0,0 +1,807 @@
|
||||
commit 38d48ed48f9d0baa20786d98abe2b4085fca7d5d |
||||
Author: Luck, Tony <tony.luck@intel.com> |
||||
Date: Mon Aug 4 13:29:01 2014 -0700 |
||||
|
||||
rasdaemon: Add support for extlog trace events |
||||
|
||||
Linux kernel 3.17 includes a new trace event to pick up extended |
||||
error logs produced by BIOS in the Common Platform Error Record |
||||
format described in appendix N of the UEFI standard. This patch |
||||
adds support to collect that information and log it both in |
||||
readable ASCII and into the sqlite3 database that rasdaemon |
||||
uses to store all error information. In addition ras-mc-ctl |
||||
is updated to query that database for both detailed and summary |
||||
reports. |
||||
|
||||
Big thanks to Aristeu for pretty much all the sqlite3 pieces, |
||||
plus testing and fixing miscellaneous issues elsewhere. |
||||
|
||||
Signed-off-by: Tony Luck <tony.luck@intel.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index 0fa615f..117c970 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -30,13 +30,17 @@ if WITH_MCE |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
mce-intel-sb.c mce-intel-ivb.c |
||||
endif |
||||
+if WITH_EXTLOG |
||||
+ rasdaemon_SOURCES += ras-extlog-handler.c |
||||
+endif |
||||
if WITH_ABRT_REPORT |
||||
rasdaemon_SOURCES += ras-report.c |
||||
endif |
||||
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a |
||||
|
||||
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ |
||||
- ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h |
||||
+ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ |
||||
+ ras-extlog-handler.h |
||||
|
||||
# This rule can't be called with more than one Makefile job (like make -j8) |
||||
# I can't figure out a way to fix that |
||||
diff --git a/configure.ac b/configure.ac |
||||
index 64a5b13..9495491 100644 |
||||
--- a/configure.ac |
||||
+++ b/configure.ac |
||||
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [ |
||||
]) |
||||
AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes]) |
||||
|
||||
+AC_ARG_ENABLE([extlog], |
||||
+ AS_HELP_STRING([--enable-extlog], [enable EXTLOG events (currently experimental)])) |
||||
+ |
||||
+AS_IF([test "x$enable_extlog" = "xyes"], [ |
||||
+ AC_DEFINE(HAVE_EXTLOG,1,"have EXTLOG events collect") |
||||
+ AC_SUBST([WITH_EXTLOG]) |
||||
+]) |
||||
+AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes]) |
||||
+ |
||||
AC_ARG_ENABLE([abrt_report], |
||||
AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) |
||||
|
||||
diff --git a/ras-aer-handler.c b/ras-aer-handler.c |
||||
index 50526af..bb7c0b9 100644 |
||||
--- a/ras-aer-handler.c |
||||
+++ b/ras-aer-handler.c |
||||
@@ -70,7 +70,7 @@ int ras_aer_event_handler(struct trace_seq *s, |
||||
*/ |
||||
|
||||
if (ras->use_uptime) |
||||
- now = record->ts/1000000000L + ras->uptime_diff; |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
else |
||||
now = time(NULL); |
||||
|
||||
diff --git a/ras-events.c b/ras-events.c |
||||
index ecbbd3a..0be7c3f 100644 |
||||
--- a/ras-events.c |
||||
+++ b/ras-events.c |
||||
@@ -30,6 +30,7 @@ |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-aer-handler.h" |
||||
#include "ras-mce-handler.h" |
||||
+#include "ras-extlog-handler.h" |
||||
#include "ras-record.h" |
||||
#include "ras-logger.h" |
||||
|
||||
@@ -203,6 +204,10 @@ int toggle_ras_mc_event(int enable) |
||||
rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_EXTLOG |
||||
+ rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); |
||||
+#endif |
||||
+ |
||||
free_ras: |
||||
free(ras); |
||||
return rc; |
||||
@@ -688,6 +693,19 @@ int handle_ras_events(int record_events) |
||||
"mce", "mce_record"); |
||||
} |
||||
#endif |
||||
+ |
||||
+#ifdef HAVE_EXTLOG |
||||
+ rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", |
||||
+ ras_extlog_mem_event_handler); |
||||
+ if (!rc) { |
||||
+ /* tell kernel we are listening, so don't printk to console */ |
||||
+ (void)open("/sys/kernel/debug/ras/daemon_active", 0); |
||||
+ num_events++; |
||||
+ } else |
||||
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", |
||||
+ "ras", "aer_event"); |
||||
+#endif |
||||
+ |
||||
if (!num_events) { |
||||
log(ALL, LOG_INFO, |
||||
"Failed to trace all supported RAS events. Aborting.\n"); |
||||
diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c |
||||
new file mode 100644 |
||||
index 0000000..5fd3580 |
||||
--- /dev/null |
||||
+++ b/ras-extlog-handler.c |
||||
@@ -0,0 +1,246 @@ |
||||
+/* |
||||
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com> |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+#include <ctype.h> |
||||
+#include <errno.h> |
||||
+#include <stdio.h> |
||||
+#include <stdlib.h> |
||||
+#include <string.h> |
||||
+#include <unistd.h> |
||||
+#include <stdint.h> |
||||
+#include "libtrace/kbuffer.h" |
||||
+#include "ras-extlog-handler.h" |
||||
+#include "ras-record.h" |
||||
+#include "ras-logger.h" |
||||
+#include "ras-report.h" |
||||
+ |
||||
+static char *err_type(int etype) |
||||
+{ |
||||
+ switch (etype) { |
||||
+ case 0: return "unknown"; |
||||
+ case 1: return "no error"; |
||||
+ case 2: return "single-bit ECC"; |
||||
+ case 3: return "multi-bit ECC"; |
||||
+ case 4: return "single-symbol chipkill ECC"; |
||||
+ case 5: return "multi-symbol chipkill ECC"; |
||||
+ case 6: return "master abort"; |
||||
+ case 7: return "target abort"; |
||||
+ case 8: return "parity error"; |
||||
+ case 9: return "watchdog timeout"; |
||||
+ case 10: return "invalid address"; |
||||
+ case 11: return "mirror Broken"; |
||||
+ case 12: return "memory sparing"; |
||||
+ case 13: return "scrub corrected error"; |
||||
+ case 14: return "scrub uncorrected error"; |
||||
+ case 15: return "physical memory map-out event"; |
||||
+ } |
||||
+ return "unknown-type"; |
||||
+} |
||||
+ |
||||
+static char *err_severity(int severity) |
||||
+{ |
||||
+ switch (severity) { |
||||
+ case 0: return "recoverable"; |
||||
+ case 1: return "fatal"; |
||||
+ case 2: return "corrected"; |
||||
+ case 3: return "informational"; |
||||
+ } |
||||
+ return "unknown-severity"; |
||||
+} |
||||
+ |
||||
+static unsigned long long err_mask(int lsb) |
||||
+{ |
||||
+ if (lsb == 0xff) |
||||
+ return ~0ull; |
||||
+ return ~((1ull << lsb) - 1); |
||||
+} |
||||
+ |
||||
+#define CPER_MEM_VALID_NODE 0x0008 |
||||
+#define CPER_MEM_VALID_CARD 0x0010 |
||||
+#define CPER_MEM_VALID_MODULE 0x0020 |
||||
+#define CPER_MEM_VALID_BANK 0x0040 |
||||
+#define CPER_MEM_VALID_DEVICE 0x0080 |
||||
+#define CPER_MEM_VALID_ROW 0x0100 |
||||
+#define CPER_MEM_VALID_COLUMN 0x0200 |
||||
+#define CPER_MEM_VALID_BIT_POSITION 0x0400 |
||||
+#define CPER_MEM_VALID_REQUESTOR_ID 0x0800 |
||||
+#define CPER_MEM_VALID_RESPONDER_ID 0x1000 |
||||
+#define CPER_MEM_VALID_TARGET_ID 0x2000 |
||||
+#define CPER_MEM_VALID_RANK_NUMBER 0x8000 |
||||
+#define CPER_MEM_VALID_CARD_HANDLE 0x10000 |
||||
+#define CPER_MEM_VALID_MODULE_HANDLE 0x20000 |
||||
+ |
||||
+struct cper_mem_err_compact { |
||||
+ unsigned long long validation_bits; |
||||
+ unsigned short node; |
||||
+ unsigned short card; |
||||
+ unsigned short module; |
||||
+ unsigned short bank; |
||||
+ unsigned short device; |
||||
+ unsigned short row; |
||||
+ unsigned short column; |
||||
+ unsigned short bit_pos; |
||||
+ unsigned long long requestor_id; |
||||
+ unsigned long long responder_id; |
||||
+ unsigned long long target_id; |
||||
+ unsigned short rank; |
||||
+ unsigned short mem_array_handle; |
||||
+ unsigned short mem_dev_handle; |
||||
+}; |
||||
+ |
||||
+static char *err_cper_data(const char *c) |
||||
+{ |
||||
+ const struct cper_mem_err_compact *cpd = (struct cper_mem_err_compact *)c; |
||||
+ static char buf[256]; |
||||
+ char *p = buf; |
||||
+ |
||||
+ if (cpd->validation_bits == 0) |
||||
+ return ""; |
||||
+ p += sprintf(p, " ("); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_NODE) |
||||
+ p += sprintf(p, "node: %d ", cpd->node); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_CARD) |
||||
+ p += sprintf(p, "card: %d ", cpd->card); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_MODULE) |
||||
+ p += sprintf(p, "module: %d ", cpd->module); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_BANK) |
||||
+ p += sprintf(p, "bank: %d ", cpd->bank); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_DEVICE) |
||||
+ p += sprintf(p, "device: %d ", cpd->device); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_ROW) |
||||
+ p += sprintf(p, "row: %d ", cpd->row); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_COLUMN) |
||||
+ p += sprintf(p, "column: %d ", cpd->column); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_BIT_POSITION) |
||||
+ p += sprintf(p, "bit_pos: %d ", cpd->bit_pos); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) |
||||
+ p += sprintf(p, "req_id: 0x%llx ", cpd->requestor_id); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_RESPONDER_ID) |
||||
+ p += sprintf(p, "resp_id: 0x%llx ", cpd->responder_id); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_TARGET_ID) |
||||
+ p += sprintf(p, "tgt_id: 0x%llx ", cpd->target_id); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_RANK_NUMBER) |
||||
+ p += sprintf(p, "rank: %d ", cpd->rank); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_CARD_HANDLE) |
||||
+ p += sprintf(p, "card_handle: %d ", cpd->mem_array_handle); |
||||
+ if (cpd->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) |
||||
+ p += sprintf(p, "module_handle: %d ", cpd->mem_dev_handle); |
||||
+ p += sprintf(p-1, ")"); |
||||
+ |
||||
+ return buf; |
||||
+} |
||||
+ |
||||
+static char *uuid_le(const char *uu) |
||||
+{ |
||||
+ static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; |
||||
+ char *p = uuid; |
||||
+ int i; |
||||
+ static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; |
||||
+ |
||||
+ for (i = 0; i < 16; i++) { |
||||
+ p += sprintf(p, "%.2x", uu[le[i]]); |
||||
+ switch (i) { |
||||
+ case 3: |
||||
+ case 5: |
||||
+ case 7: |
||||
+ case 9: |
||||
+ *p++ = '-'; |
||||
+ break; |
||||
+ } |
||||
+ } |
||||
+ |
||||
+ *p = 0; |
||||
+ |
||||
+ return uuid; |
||||
+} |
||||
+ |
||||
+ |
||||
+static void report_extlog_mem_event(struct ras_events *ras, |
||||
+ struct pevent_record *record, |
||||
+ struct trace_seq *s, |
||||
+ struct ras_extlog_event *ev) |
||||
+{ |
||||
+ trace_seq_printf(s, "%d %s error: %s physical addr: 0x%llx mask: 0x%llx%s %s %s", |
||||
+ ev->error_seq, err_severity(ev->severity), |
||||
+ err_type(ev->etype), ev->address, |
||||
+ err_mask(ev->pa_mask_lsb), |
||||
+ err_cper_data(ev->cper_data), |
||||
+ ev->fru_text, |
||||
+ uuid_le(ev->fru_id)); |
||||
+} |
||||
+ |
||||
+int ras_extlog_mem_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context) |
||||
+{ |
||||
+ int len; |
||||
+ unsigned long long val; |
||||
+ struct ras_events *ras = context; |
||||
+ time_t now; |
||||
+ struct tm *tm; |
||||
+ struct ras_extlog_event ev; |
||||
+ |
||||
+ /* |
||||
+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
||||
+ * On previous kernels, the way to properly generate an event would |
||||
+ * be to inject a fake one, measure its timestamp and diff it against |
||||
+ * gettimeofday. We won't do it here. Instead, let's use uptime, |
||||
+ * falling-back to the event report's time, if "uptime" clock is |
||||
+ * not available (legacy kernels). |
||||
+ */ |
||||
+ |
||||
+ if (ras->use_uptime) |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
+ else |
||||
+ now = time(NULL); |
||||
+ |
||||
+ tm = localtime(&now); |
||||
+ if (tm) |
||||
+ strftime(ev.timestamp, sizeof(ev.timestamp), |
||||
+ "%Y-%m-%d %H:%M:%S %z", tm); |
||||
+ trace_seq_printf(s, "%s ", ev.timestamp); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "etype", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.etype = val; |
||||
+ if (pevent_get_field_val(s, event, "err_seq", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.error_seq = val; |
||||
+ if (pevent_get_field_val(s, event, "sev", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.severity = val; |
||||
+ if (pevent_get_field_val(s, event, "pa", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.address = val; |
||||
+ if (pevent_get_field_val(s, event, "pa_mask_lsb", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.pa_mask_lsb = val; |
||||
+ |
||||
+ ev.cper_data = pevent_get_field_raw(s, event, "data", |
||||
+ record, &len, 1); |
||||
+ ev.cper_data_length = len; |
||||
+ ev.fru_text = pevent_get_field_raw(s, event, "fru_text", |
||||
+ record, &len, 1); |
||||
+ ev.fru_id = pevent_get_field_raw(s, event, "fru_id", |
||||
+ record, &len, 1); |
||||
+ |
||||
+ report_extlog_mem_event(ras, record, s, &ev); |
||||
+ |
||||
+ ras_store_extlog_mem_record(ras, &ev); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
diff --git a/ras-extlog-handler.h b/ras-extlog-handler.h |
||||
new file mode 100644 |
||||
index 0000000..54e8cec |
||||
--- /dev/null |
||||
+++ b/ras-extlog-handler.h |
||||
@@ -0,0 +1,31 @@ |
||||
+/* |
||||
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com> |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#ifndef __RAS_EXTLOG_HANDLER_H |
||||
+#define __RAS_EXTLOG_HANDLER_H |
||||
+ |
||||
+#include <stdint.h> |
||||
+ |
||||
+#include "ras-events.h" |
||||
+#include "libtrace/event-parse.h" |
||||
+ |
||||
+extern int ras_extlog_mem_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context); |
||||
+ |
||||
+#endif |
||||
diff --git a/ras-mc-handler.c b/ras-mc-handler.c |
||||
index ffb3805..704a41c 100644 |
||||
--- a/ras-mc-handler.c |
||||
+++ b/ras-mc-handler.c |
||||
@@ -47,7 +47,7 @@ int ras_mc_event_handler(struct trace_seq *s, |
||||
*/ |
||||
|
||||
if (ras->use_uptime) |
||||
- now = record->ts/1000000000L + ras->uptime_diff; |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
else |
||||
now = time(NULL); |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 1431049..a1d0b5d 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -237,7 +237,7 @@ static void report_mce_event(struct ras_events *ras, |
||||
*/ |
||||
|
||||
if (ras->use_uptime) |
||||
- now = record->ts/1000000000L + ras->uptime_diff; |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
else |
||||
now = time(NULL); |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index e5150ad..3dc4493 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -157,6 +157,57 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
} |
||||
#endif |
||||
|
||||
+#ifdef HAVE_EXTLOG |
||||
+static const struct db_fields extlog_event_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ { .name="etype", .type="INTEGER" }, |
||||
+ { .name="error_count", .type="INTEGER" }, |
||||
+ { .name="severity", .type="INTEGER" }, |
||||
+ { .name="address", .type="INTEGER" }, |
||||
+ { .name="fru_id", .type="BLOB" }, |
||||
+ { .name="fru_text", .type="TEXT" }, |
||||
+ { .name="cper_data", .type="BLOB" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor extlog_event_tab = { |
||||
+ .name = "extlog_event", |
||||
+ .fields = extlog_event_fields, |
||||
+ .num_fields = ARRAY_SIZE(extlog_event_fields), |
||||
+}; |
||||
+ |
||||
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_extlog_record) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "extlog_record store: %p\n", priv->stmt_extlog_record); |
||||
+ |
||||
+ sqlite3_bind_text (priv->stmt_extlog_record, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_extlog_record, 2, ev->etype); |
||||
+ sqlite3_bind_int (priv->stmt_extlog_record, 3, ev->error_seq); |
||||
+ sqlite3_bind_int (priv->stmt_extlog_record, 4, ev->severity); |
||||
+ sqlite3_bind_int64 (priv->stmt_extlog_record, 5, ev->address); |
||||
+ sqlite3_bind_blob (priv->stmt_extlog_record, 6, ev->fru_id, 16, NULL); |
||||
+ sqlite3_bind_text (priv->stmt_extlog_record, 7, ev->fru_text, -1, NULL); |
||||
+ sqlite3_bind_blob (priv->stmt_extlog_record, 8, ev->cper_data, ev->cper_data_length, NULL); |
||||
+ |
||||
+ rc = sqlite3_step(priv->stmt_extlog_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do extlog_mem_record step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_extlog_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset extlog_mem_record on sqlite: error = %d\n", |
||||
+ rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+#endif |
||||
|
||||
/* |
||||
* Table and functions to handle mce:mce_record |
||||
@@ -385,6 +436,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
&aer_event_tab); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_EXTLOG |
||||
+ rc = ras_mc_create_table(priv, &extlog_event_tab); |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_extlog_record, |
||||
+ &extlog_event_tab); |
||||
+#endif |
||||
+ |
||||
#ifdef HAVE_MCE |
||||
rc = ras_mc_create_table(priv, &mce_record_tab); |
||||
if (rc == SQLITE_OK) |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 6f146a8..5d84297 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -19,8 +19,11 @@ |
||||
#ifndef __RAS_RECORD_H |
||||
#define __RAS_RECORD_H |
||||
|
||||
+#include <stdint.h> |
||||
#include "config.h" |
||||
|
||||
+extern long user_hz; |
||||
+ |
||||
struct ras_events *ras; |
||||
|
||||
struct ras_mc_event { |
||||
@@ -40,8 +43,22 @@ struct ras_aer_event { |
||||
const char *msg; |
||||
}; |
||||
|
||||
+struct ras_extlog_event { |
||||
+ char timestamp[64]; |
||||
+ int32_t error_seq; |
||||
+ int8_t etype; |
||||
+ int8_t severity; |
||||
+ unsigned long long address; |
||||
+ int8_t pa_mask_lsb; |
||||
+ const char *fru_id; |
||||
+ const char *fru_text; |
||||
+ const char *cper_data; |
||||
+ unsigned short cper_data_length; |
||||
+}; |
||||
+ |
||||
struct ras_mc_event; |
||||
struct ras_aer_event; |
||||
+struct ras_extlog_event; |
||||
struct mce_event; |
||||
|
||||
#ifdef HAVE_SQLITE3 |
||||
@@ -57,18 +74,23 @@ struct sqlite3_priv { |
||||
#ifdef HAVE_MCE |
||||
sqlite3_stmt *stmt_mce_record; |
||||
#endif |
||||
+#ifdef HAVE_EXTLOG |
||||
+ sqlite3_stmt *stmt_extlog_record; |
||||
+#endif |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); |
||||
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); |
||||
|
||||
#else |
||||
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
||||
static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
||||
static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
+static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
diff --git a/rasdaemon.c b/rasdaemon.c |
||||
index 85ac2d4..41022ef 100644 |
||||
--- a/rasdaemon.c |
||||
+++ b/rasdaemon.c |
||||
@@ -68,6 +68,8 @@ static error_t parse_opt(int k, char *arg, struct argp_state *state) |
||||
return 0; |
||||
} |
||||
|
||||
+long user_hz; |
||||
+ |
||||
int main(int argc, char *argv[]) |
||||
{ |
||||
struct arguments args; |
||||
@@ -91,6 +93,8 @@ int main(int argc, char *argv[]) |
||||
}; |
||||
memset (&args, 0, sizeof(args)); |
||||
|
||||
+ user_hz = sysconf(_SC_CLK_TCK); |
||||
+ |
||||
argp_parse(&argp, argc, argv, 0, &idx, &args); |
||||
|
||||
if (idx < 0) { |
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index e9f9c59..110262f 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -842,11 +842,141 @@ sub find_prog |
||||
return ""; |
||||
} |
||||
|
||||
+sub get_extlog_type |
||||
+{ |
||||
+ my @types; |
||||
+ |
||||
+ if ($_[0] < 0 || $_[0] > 15) { |
||||
+ return "unknown-type"; |
||||
+ } |
||||
+ |
||||
+ @types = ("unknown", |
||||
+ "no error", |
||||
+ "single-bit ECC", |
||||
+ "multi-bit ECC", |
||||
+ "single-symbol chipkill ECC", |
||||
+ "multi-symbol chipkill ECC", |
||||
+ "master abort", |
||||
+ "target abort", |
||||
+ "parity error", |
||||
+ "watchdog timeout", |
||||
+ "invalid address", |
||||
+ "mirror Broken", |
||||
+ "memory sparing", |
||||
+ "scrub corrected error", |
||||
+ "scrub uncorrected error", |
||||
+ "physical memory map-out event", |
||||
+ "unknown-type"); |
||||
+ return $types[$_[0]]; |
||||
+} |
||||
+ |
||||
+sub get_extlog_severity |
||||
+{ |
||||
+ my @sev; |
||||
+ |
||||
+ if ($_[0] < 0 || $_[0] > 3) { |
||||
+ return "unknown-severity"; |
||||
+ } |
||||
+ |
||||
+ @sev = ("recoverable", |
||||
+ "fatal", |
||||
+ "corrected", |
||||
+ "informational", |
||||
+ "unknown-severity"); |
||||
+ return $sev[$_[0]]; |
||||
+} |
||||
+ |
||||
+use constant { |
||||
+ CPER_MEM_VALID_NODE => 0x0008, |
||||
+ CPER_MEM_VALID_CARD => 0x0010, |
||||
+ CPER_MEM_VALID_MODULE => 0x0020, |
||||
+ CPER_MEM_VALID_BANK => 0x0040, |
||||
+ CPER_MEM_VALID_DEVICE => 0x0080, |
||||
+ CPER_MEM_VALID_ROW => 0x0100, |
||||
+ CPER_MEM_VALID_COLUMN => 0x0200, |
||||
+ CPER_MEM_VALID_BIT_POSITION => 0x0400, |
||||
+ CPER_MEM_VALID_REQUESTOR_ID => 0x0800, |
||||
+ CPER_MEM_VALID_RESPONDER_ID => 0x1000, |
||||
+ CPER_MEM_VALID_TARGET_ID => 0x2000, |
||||
+ CPER_MEM_VALID_ERROR_TYPE => 0x4000, |
||||
+ CPER_MEM_VALID_RANK_NUMBER => 0x8000, |
||||
+ CPER_MEM_VALID_CARD_HANDLE => 0x10000, |
||||
+ CPER_MEM_VALID_MODULE_HANDLE => 0x20000, |
||||
+}; |
||||
+ |
||||
+sub get_cper_data_text |
||||
+{ |
||||
+ my $cper_data = $_[0]; |
||||
+ my ($validation_bits, $node, $card, $module, $bank, $device, $row, $column, $bit_pos, $requestor_id, $responder_id, $target_id, $rank, $mem_array_handle, $mem_dev_handle) = unpack 'QSSSSSSSSQQQSSS', $cper_data; |
||||
+ my @out; |
||||
+ |
||||
+ if ($validation_bits & CPER_MEM_VALID_NODE) { |
||||
+ push @out, (sprintf "node=%d", $node); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_CARD) { |
||||
+ push @out, (sprintf "card=%d", $card); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_MODULE) { |
||||
+ push @out, (sprintf "module=%d", $module); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_BANK) { |
||||
+ push @out, (sprintf "bank=%d", $bank); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_DEVICE) { |
||||
+ push @out, (sprintf "device=%d", $device); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_ROW) { |
||||
+ push @out, (sprintf "row=%d", $row); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_COLUMN) { |
||||
+ push @out, (sprintf "column=%d", $column); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_BIT_POSITION) { |
||||
+ push @out, (sprintf "bit_position=%d", $bit_pos); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_REQUESTOR_ID) { |
||||
+ push @out, (sprintf "0x%08x", $requestor_id); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_RESPONDER_ID) { |
||||
+ push @out, (sprintf "0x%08x", $responder_id); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_TARGET_ID) { |
||||
+ push @out, (sprintf "0x%08x", $target_id); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_RANK_NUMBER) { |
||||
+ push @out, (sprintf "rank=%d", $rank); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_CARD_HANDLE) { |
||||
+ push @out, (sprintf "mem_array_handle=%d", $mem_array_handle); |
||||
+ } |
||||
+ if ($validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { |
||||
+ push @out, (sprintf "mem_dev_handle=%d", $mem_dev_handle); |
||||
+ } |
||||
+ |
||||
+ return join (", ", @out); |
||||
+} |
||||
+ |
||||
+sub get_uuid_le |
||||
+{ |
||||
+ my $out = ""; |
||||
+ my @bytes = unpack "C*", $_[0]; |
||||
+ my @le16_table = (3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15); |
||||
+ |
||||
+ for (my $i = 0; $i < 16; $i++) { |
||||
+ $out .= sprintf "%.2x", $bytes[$le16_table[$i]]; |
||||
+ if ($i == 3 or $i == 5 or $i == 7 or $i == 9) { |
||||
+ $out .= "-"; |
||||
+ } |
||||
+ } |
||||
+ return $out; |
||||
+} |
||||
+ |
||||
sub summary |
||||
{ |
||||
require DBI; |
||||
my ($query, $query_handle, $out); |
||||
my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); |
||||
+ my ($etype, $severity, $etype_string, $severity_string); |
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
||||
|
||||
@@ -882,6 +1012,24 @@ sub summary |
||||
} |
||||
$query_handle->finish; |
||||
|
||||
+ # extlog errors |
||||
+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($etype, $severity, $count)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $etype_string = get_extlog_type($etype); |
||||
+ $severity_string = get_extlog_severity($severity); |
||||
+ $out .= "\t$count $etype_string $severity_string errors\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "Extlog records summary:\n$out"; |
||||
+ } else { |
||||
+ print "No Extlog errors.\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
+ |
||||
# MCE mce_record errors |
||||
$query = "select error_msg, count(*) from mce_record group by error_msg"; |
||||
$query_handle = $dbh->prepare($query); |
||||
@@ -906,6 +1054,7 @@ sub errors |
||||
require DBI; |
||||
my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); |
||||
my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); |
||||
+ my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); |
||||
|
||||
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); |
||||
|
||||
@@ -945,6 +1094,31 @@ sub errors |
||||
} |
||||
$query_handle->finish; |
||||
|
||||
+ # Extlog errors |
||||
+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; |
||||
+ $query_handle = $dbh->prepare($query); |
||||
+ $query_handle->execute(); |
||||
+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); |
||||
+ $out = ""; |
||||
+ while($query_handle->fetch()) { |
||||
+ $etype_string = get_extlog_type($etype); |
||||
+ $severity_string = get_extlog_severity($severity); |
||||
+ $out .= "$id $timestamp error: "; |
||||
+ $out .= "type=$etype_string, "; |
||||
+ $out .= "severity=$severity_string, "; |
||||
+ $out .= sprintf "address=0x%08x, ", $addr; |
||||
+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); |
||||
+ $out .= "fru_text='$fru_text', "; |
||||
+ $out .= get_cper_data_text($cper_data) if ($cper_data); |
||||
+ $out .= "\n"; |
||||
+ } |
||||
+ if ($out ne "") { |
||||
+ print "Extlog events:\n$out\n"; |
||||
+ } else { |
||||
+ print "No Extlog errors.\n\n"; |
||||
+ } |
||||
+ $query_handle->finish; |
||||
+ |
||||
# MCE mce_record errors |
||||
$query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; |
||||
$query_handle = $dbh->prepare($query); |
@ -0,0 +1,37 @@
@@ -0,0 +1,37 @@
|
||||
commit d3d336471119f16368e40b68643d9dd928be5385 |
||||
Author: Luck, Tony <tony.luck@intel.com> |
||||
Date: Mon Apr 7 12:23:25 2014 -0700 |
||||
|
||||
rasdaemon: fix some typos and cut/paste errors in sqlite bits |
||||
|
||||
aer event has the error_type as field 2 and msg as field 3 - but the calls |
||||
the sqlite3_bind_text use 3 and 4. |
||||
|
||||
mce event forgot to declare the "mcastatus_msg" |
||||
|
||||
Signed-off-by: Tony Luck <tony.luck@intel.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index daa3cb1..e602edb 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -139,8 +139,8 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
log(TERM, LOG_INFO, "aer_event store: %p\n", priv->stmt_aer_event); |
||||
|
||||
sqlite3_bind_text(priv->stmt_aer_event, 1, ev->timestamp, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt_aer_event, 3, ev->error_type, -1, NULL); |
||||
- sqlite3_bind_text(priv->stmt_aer_event, 4, ev->msg, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_aer_event, 2, ev->error_type, -1, NULL); |
||||
+ sqlite3_bind_text(priv->stmt_aer_event, 3, ev->msg, -1, NULL); |
||||
|
||||
rc = sqlite3_step(priv->stmt_aer_event); |
||||
if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
@@ -189,6 +189,7 @@ static const struct db_fields mce_record_fields[] = { |
||||
{ .name="error_msg", .type="TEXT" }, |
||||
{ .name="mcgstatus_msg", .type="TEXT" }, |
||||
{ .name="mcistatus_msg", .type="TEXT" }, // 20 |
||||
+ { .name="mcastatus_msg", .type="TEXT" }, |
||||
{ .name="user_action", .type="TEXT" }, |
||||
{ .name="mc_location", .type="TEXT" }, |
||||
}; |
@ -0,0 +1,73 @@
@@ -0,0 +1,73 @@
|
||||
commit 52e60e3050105a55e1ff2382979d5f370f398200 |
||||
Author: Luck, Tony <tony.luck@intel.com> |
||||
Date: Mon Apr 7 11:27:47 2014 -0700 |
||||
|
||||
rasdaemon: sqlite truncates some MCE fields to 32-bit |
||||
|
||||
The sqlite3_bind_int() function takes an "int" as the argument value to |
||||
save to the database. But some fields are wider than 32-bits. Use |
||||
sqlite3_bind_int64() for the fields where we know values can exceed |
||||
4G. |
||||
|
||||
Before: |
||||
|
||||
# ./rasdaemon/util/ras-mc-ctl --errors |
||||
... |
||||
MCE events: |
||||
1 2014-04-04 08:50:32 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x00010090, addr=0x35fcb9c0, misc=0x5026a686, walltime=0x5342e4f9, cpu=0x0000000e, cpuid=0x000306f1, apicid=0x00000020, socketid=0x00000001, bank=0x00000008 |
||||
2 2014-04-04 08:50:35 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x00010090, addr=0x4187adc0, misc=0x4274f486, walltime=0x5342e4fc, cpu=0x0000000e, cpuid=0x000306f1, apicid=0x00000020, socketid=0x00000001, bank=0x00000007 |
||||
3 2014-04-04 08:50:37 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x00010090, addr=0x52efc600, misc=0x50028286, walltime=0x5342e4fd, cpu=0x0000000e, cpuid=0x000306f1, apicid=0x00000020, socketid=0x00000001, bank=0x00000008 |
||||
|
||||
After: |
||||
./rasdaemon/util/ras-mc-ctl --errors |
||||
... |
||||
1 2014-04-04 09:00:07 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x8c00004000010090, addr=0x45340a180, misc=0x140686886, walltime=0x5342e736, cpuid=0x000306f1, bank=0x00000008 |
||||
2 2014-04-04 09:00:08 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x8c00004000010090, addr=0x44d6e4780, misc=0x15060e086, walltime=0x5342e737, cpuid=0x000306f1, bank=0x00000007 |
||||
3 2014-04-04 09:00:10 -0700 error: MEMORY CONTROLLER RD_CHANNEL0_ERR Transaction: Memory read error, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x8c00004000010090, addr=0x44cb64640, misc=0x140505086, walltime=0x5342e739, cpuid=0x000306f1, bank=0x00000008 |
||||
|
||||
Signed-off-by: Tony Luck <tony.luck@intel.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
|
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index e602edb..e5150ad 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -209,22 +209,22 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) |
||||
return 0; |
||||
log(TERM, LOG_INFO, "mce_record store: %p\n", priv->stmt_mce_record); |
||||
|
||||
- sqlite3_bind_text(priv->stmt_mce_record, 1, ev->timestamp, -1, NULL); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 2, ev->mcgcap); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 3, ev->mcgstatus); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 4, ev->status); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 5, ev->addr); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 6, ev->misc); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 7, ev->ip); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 8, ev->tsc); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 9, ev->walltime); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 10, ev->cpu); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpuid); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 12, ev->apicid); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 13, ev->socketid); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 14, ev->cs); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 15, ev->bank); |
||||
- sqlite3_bind_int (priv->stmt_mce_record, 16, ev->cpuvendor); |
||||
+ sqlite3_bind_text (priv->stmt_mce_record, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 2, ev->mcgcap); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 3, ev->mcgstatus); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 4, ev->status); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 5, ev->addr); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 6, ev->misc); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 7, ev->ip); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 8, ev->tsc); |
||||
+ sqlite3_bind_int64 (priv->stmt_mce_record, 9, ev->walltime); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 10, ev->cpu); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 11, ev->cpuid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 12, ev->apicid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 13, ev->socketid); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 14, ev->cs); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 15, ev->bank); |
||||
+ sqlite3_bind_int (priv->stmt_mce_record, 16, ev->cpuvendor); |
||||
|
||||
sqlite3_bind_text(priv->stmt_mce_record, 17, ev->bank_name, -1, NULL); |
||||
sqlite3_bind_text(priv->stmt_mce_record, 18, ev->error_msg, -1, NULL); |
@ -0,0 +1,44 @@
@@ -0,0 +1,44 @@
|
||||
commit f20a366a9b7a32a1be6fc89e7546cc2b4cb690bf |
||||
Author: Xie XiuQi <xiexiuqi@huawei.com> |
||||
Date: Thu May 8 20:07:19 2014 +0800 |
||||
|
||||
rasdaemon: fix mce numfield decoded error |
||||
|
||||
Some fields are missing in mce decode information, as below: |
||||
... |
||||
rasdaemon: register inserted at db |
||||
<...>-31568 [000] 4023.214080: mce_record: |
||||
2014-05-07 15:51:16 +0800 bank=2, status= bd000000000000c0, MEMORY |
||||
CONTROLLER MS_CHANNEL0_ERR Transaction: Memory scrubbing error %s: %Lu |
||||
%s: %Lx |
||||
%s: %Lx |
||||
%s: %Lu |
||||
%s: %Lu |
||||
%s: %Lx |
||||
, mci=Uncorrected_error Error_enabled SRAO, n_errors=0 channel=0, |
||||
dimm=0, cpu_type= Intel Xeon 5500 series / Core i3/5/7 |
||||
("Nehalem/Westmere"), cpu= 0, socketid= 0, ip= 1eadbabe (INEXACT), cs= |
||||
73, misc= 8c, addr= 62b000, mcgstatus= 5 RIPV MCIP, mcgcap= 1c09, |
||||
apicid= 0 |
||||
|
||||
"f->name" & "v" are missed to print in decode_numfield(), so fix it. |
||||
|
||||
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
|
||||
diff --git a/bitfield.c b/bitfield.c |
||||
index b2895b4..07795a9 100644 |
||||
--- a/bitfield.c |
||||
+++ b/bitfield.c |
||||
@@ -92,8 +92,9 @@ void decode_numfield(struct mce_event *e, uint64_t status, |
||||
uint64_t mask = (1ULL << (f->end - f->start + 1)) - 1; |
||||
uint64_t v = (status >> f->start) & mask; |
||||
if (v > 0 || f->force) { |
||||
- mce_snprintf(e->error_msg, "%%s: %s\n", |
||||
- f->fmt ? f->fmt : "%Lu"); |
||||
+ char fmt[32] = {0}; |
||||
+ snprintf(fmt, 32, "%%s: %s\n", f->fmt ? f->fmt : "%Lu"); |
||||
+ mce_snprintf(e->error_msg, fmt, f->name, v); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,84 @@
@@ -0,0 +1,84 @@
|
||||
From 7e79fa94dc6c294cd731c0c684b277dd4811c5db Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <aris@redhat.com> |
||||
Date: Fri, 15 Aug 2014 13:50:58 -0400 |
||||
Subject: [PATCH 3/4] rasdaemon: do not assume dimmX/ directories will be |
||||
present |
||||
|
||||
While finding the labels, size and location, ras-mc-ctl will search /sys for |
||||
the files and calculate the location. When it uses the location trying to map |
||||
back to files to print labels or write labels, it'll just assume dimm* |
||||
directories exist which is not correct while using drivers like amd64_edac. |
||||
This patch adds two new hashes to store the location and the label file path |
||||
so it can be used later. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 21 +++++++++++++-------- |
||||
1 file changed, 13 insertions(+), 8 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 110262f..7b6d798 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -45,6 +45,8 @@ my %conf = (); |
||||
my %bus = (); |
||||
my %dimm_size = (); |
||||
my %dimm_node = (); |
||||
+my %dimm_label_file = (); |
||||
+my %dimm_location = (); |
||||
my %csrow_size = (); |
||||
my %rank_size = (); |
||||
my %csrow_ranks = (); |
||||
@@ -278,6 +280,9 @@ sub parse_dimm_nodes |
||||
my $str_loc = join(':', $mc, @pos); |
||||
$dimm_size{$str_loc} = $size; |
||||
$dimm_node{$str_loc} = $dimm; |
||||
+ $file =~ s/size/dimm_label/; |
||||
+ $dimm_label_file{$str_loc} = $file; |
||||
+ $dimm_location{$str_loc} = $location; |
||||
|
||||
return; |
||||
} |
||||
@@ -557,12 +562,14 @@ sub read_dimm_label |
||||
|
||||
my $dimm = $dimm_node{$pos}; |
||||
|
||||
- my $file = "$sysfs/mc$mc/dimm$dimm/dimm_label"; |
||||
+ my $dimm_label_file = $dimm_label_file{$pos}; |
||||
|
||||
- return ("$pos missing") unless -f $file; |
||||
+ my $location = $dimm_location{$pos}; |
||||
|
||||
- if (!open (LABEL, "$file")) { |
||||
- warn "Failed to open $file: $!\n"; |
||||
+ return ("label missing", "$pos missing") unless -f $dimm_label_file; |
||||
+ |
||||
+ if (!open (LABEL, "$dimm_label_file")) { |
||||
+ warn "Failed to open $dimm_label_file: $!\n"; |
||||
return ("Error"); |
||||
} |
||||
|
||||
@@ -570,7 +577,7 @@ sub read_dimm_label |
||||
|
||||
close (LABEL); |
||||
|
||||
- $pos = "mc$mc " . qx(cat $sysfs/mc$mc/dimm$dimm/dimm_location); |
||||
+ $pos = "mc$mc $location"; |
||||
|
||||
return ($label, $pos); |
||||
} |
||||
@@ -587,9 +594,7 @@ sub get_dimm_label_node |
||||
|
||||
return "" if (!defined($dimm_node{$pos})); |
||||
|
||||
- my $dimm = $dimm_node{$pos}; |
||||
- |
||||
- return "$sysfs/mc$mc/dimm$dimm/dimm_label"; |
||||
+ return "$dimm_label_file{$pos}"; |
||||
} |
||||
|
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,119 @@
@@ -0,0 +1,119 @@
|
||||
Hello, |
||||
|
||||
This patch adds labels for these Dell PowerEdge Servers: |
||||
|
||||
R610,R/T710, R220, R/T620, R720/xd, R730/xd, M520, M620 and M820. |
||||
|
||||
The current T610 (0F5XM3) mapping is incorrect. This patch fixes it. |
||||
|
||||
Reqest review and inclusion to git repo. |
||||
|
||||
Acked-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Charles Rose <charles.rose.linux@gmail.com> |
||||
--- |
||||
Changes in v2: |
||||
- Include T110 II, T20, R/T320, M420, R/T420, R/T630, FC620, FC420 |
||||
- Include additional model numbers for M820 and some 2-socket systems. |
||||
- Consolidate systems with similar maps. |
||||
--- |
||||
labels/dell | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
||||
1 file changed, 79 insertions(+), 7 deletions(-) |
||||
|
||||
diff --git a/labels/dell b/labels/dell |
||||
index e1a09a7..d7e797b 100644 |
||||
--- a/labels/dell |
||||
+++ b/labels/dell |
||||
@@ -9,12 +9,84 @@ |
||||
# |
||||
|
||||
Vendor: Dell Inc. |
||||
+#### 11G #### |
||||
+# 2-socket |
||||
+# PowerEdge R610 |
||||
+ Model: 0K399H, 0F0XJ6 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2; |
||||
+ DIMM_A4: 0.1.0; DIMM_A5: 0.1.1; DIMM_A6: 0.1.2; |
||||
|
||||
- Model: 0F5XM3 |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2; DIMM_A4: 0.0.3; |
||||
- DIMM_A5: 0.1.0; DIMM_A6: 0.1.1; DIMM_A7: 0.1.2; DIMM_A8: 0.1.3; |
||||
- DIMM_A9: 0.2.0; DIMM_A10: 0.2.1; DIMM_A11: 0.2.2; DIMM_A12: 0.2.3; |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2; |
||||
+ DIMM_B4: 1.1.0; DIMM_B5: 1.1.1; DIMM_B6: 1.1.2; |
||||
|
||||
- DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2; DIMM_B4: 1.0.3; |
||||
- DIMM_B5: 1.1.0; DIMM_B6: 1.1.1; DIMM_B7: 1.1.2; DIMM_B8: 1.1.3; |
||||
- DIMM_B9: 1.2.0; DIMM_B10: 1.2.1; DIMM_B11: 1.2.2; DIMM_B12: 1.2.3; |
||||
+# PowerEdge T710 R710 |
||||
+ Model: 01CTXG, 0N0H4P, 0MD99X, 0N047H, 0PV9DG |
||||
+ DIMM_A3: 0.0.0; DIMM_A2: 0.1.0; DIMM_A1: 0.2.0; |
||||
+ DIMM_A6: 0.0.1; DIMM_A5: 0.1.1; DIMM_A4: 0.2.1; |
||||
+ DIMM_A9: 0.0.2; DIMM_A8: 0.1.2; DIMM_A7: 0.2.2; |
||||
+ |
||||
+ DIMM_B3: 1.0.0; DIMM_B2: 1.1.0; DIMM_B1: 1.2.0; |
||||
+ DIMM_B6: 1.0.1; DIMM_B5: 1.1.1; DIMM_B4: 1.2.1; |
||||
+ DIMM_B9: 1.0.2; DIMM_B8: 1.1.2; DIMM_B7: 1.2.2; |
||||
+ |
||||
+#### 12/13G #### |
||||
+# 1-socket |
||||
+# PowerEdge R220 |
||||
+ Model: 081N4V |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; |
||||
+ DIMM_A3: 0.1.0; DIMM_A4: 0.1.1; |
||||
+ |
||||
+#PowerEdge T110 II, T20 |
||||
+ Model: 0PC2WT, 0PM2CW, 015TH9, 0MDHN4, 0VD5HY |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; |
||||
+ |
||||
+ DIMM_B1: 0.0.1; DIMM_B2: 0.1.1; |
||||
+ |
||||
+#PowerEdge R320 T320 |
||||
+ Model: 0YCV59, 0Y97HY, 07DKYR, 0VJ84C, 07MYHN, 04DMNN, 0W7H8C, 0K20G5, 0V719V, 0FDT3J |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1; |
||||
+ |
||||
+# 2-socket |
||||
+# PowerEdge R620/T620 R720/xd R730/xd T630 R730 R630 T620 M620, FC620 |
||||
+ Model: 0VWT90, 07NDJ2, 0F5XM3, 0PXXHP, 0X3D66, 061P35, 0H5J4J, 00W9X3, 0599V5, 0W9WXC, 0599V5, 0H21J3, 0CNCJW, 02CD1V, 0T5TFW, 0F5XM3, 0G1CNH, 05YV77, 0PDCCX, 093MW8, 0NJVT7 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
+ DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
+ DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2; |
||||
+ |
||||
+# PowerEdge M520 R420 T420 |
||||
+ Model: 0NRG83, 0DW6GX, 03WPHJ, 06HTRX, 0H1Y24, 02T9N6, 0TT5P2, 0CPKXG, 03015M, 061VPC, 0PC9H0, 0K3G34, 0PC0V5, 08NVYK |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; |
||||
+ DIMM_B4: 1.0.1; DIMM_B5: 1.1.1; DIMM_B6: 1.2.1; |
||||
+ |
||||
+#PowerEdge FC420, M420 |
||||
+ Model: 0DPJGD, 068CTP, 0MN3VC, 0417VP |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; |
||||
+ |
||||
+# 4-socket |
||||
+# # PowerEdge M820 |
||||
+ Model: 0RN9TC, 0YWR73, 066N7P, 0PFG1N, 0JC2W3 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
+ DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
+ DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2; |
||||
+ |
||||
+ DIMM_C1: 2.0.0; DIMM_C2: 2.1.0; DIMM_C3: 2.2.0; DIMM_C4: 2.3.0; |
||||
+ DIMM_C5: 2.0.1; DIMM_C6: 2.1.1; DIMM_C7: 2.2.1; DIMM_C8: 2.3.1; |
||||
+ DIMM_C9: 2.0.2; DIMM_C10: 2.1.2; DIMM_C11: 2.2.2; DIMM_C12: 2.3.2; |
||||
+ |
||||
+ DIMM_D1: 3.0.0; DIMM_D2: 3.1.0; DIMM_D3: 3.2.0; DIMM_D4: 3.3.0; |
||||
+ DIMM_D5: 3.0.1; DIMM_D6: 3.1.1; DIMM_D7: 3.2.1; DIMM_D8: 3.3.1; |
||||
+ DIMM_D9: 3.0.2; DIMM_D10: 3.1.2; DIMM_D11: 3.2.2; DIMM_D12: 3.3.2; |
||||
-- |
||||
1.9.3 |
@ -0,0 +1,295 @@
@@ -0,0 +1,295 @@
|
||||
From 108b124a09512d44cd810d1ef6b823c9d029d5d6 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:28 -0300 |
||||
Subject: [PATCH 01/13] rasdaemon: add support for Haswell |
||||
|
||||
Based on mcelog code. |
||||
|
||||
Acked-by: Tony Luck <tony.luck@intel,com> |
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
Makefile.am | 2 +- |
||||
mce-intel-haswell.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++ |
||||
mce-intel.c | 2 + |
||||
ras-mce-handler.c | 8 +++ |
||||
ras-mce-handler.h | 3 + |
||||
5 files changed, 208 insertions(+), 1 deletion(-) |
||||
create mode 100644 mce-intel-haswell.c |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index 9c5f007..a6bf18f 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -28,7 +28,7 @@ if WITH_MCE |
||||
rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c \ |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
- mce-intel-sb.c mce-intel-ivb.c |
||||
+ mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c |
||||
endif |
||||
if WITH_EXTLOG |
||||
rasdaemon_SOURCES += ras-extlog-handler.c |
||||
diff --git a/mce-intel-haswell.c b/mce-intel-haswell.c |
||||
new file mode 100644 |
||||
index 0000000..c32704c |
||||
--- /dev/null |
||||
+++ b/mce-intel-haswell.c |
||||
@@ -0,0 +1,194 @@ |
||||
+/* |
||||
+ * The code below came from Tony Luck mcelog code, |
||||
+ * released under GNU Public General License, v.2 |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#include <string.h> |
||||
+#include <stdio.h> |
||||
+ |
||||
+#include "ras-mce-handler.h" |
||||
+#include "bitfield.h" |
||||
+ |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-20 */ |
||||
+ |
||||
+static char *pcu_1[] = { |
||||
+ [0x00] = "No Error", |
||||
+ [0x09] = "MC_MESSAGE_CHANNEL_TIMEOUT", |
||||
+ [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", |
||||
+ [0x0E] = "MC_CPD_UNCPD_SD_TIMEOUT", |
||||
+ [0x13] = "MC_DMI_TRAINING_TIMEOUT", |
||||
+ [0x15] = "MC_DMI_CPU_RESET_ACK_TIMEOUT", |
||||
+ [0x1E] = "MC_VR_ICC_MAX_LT_FUSED_ICC_MAX", |
||||
+ [0x25] = "MC_SVID_COMMAN_TIMEOUT", |
||||
+ [0x29] = "MC_VR_VOUT_MAC_LT_FUSED_SVID", |
||||
+ [0x2B] = "MC_PKGC_WATCHDOG_HANG_CBZ_DOWN", |
||||
+ [0x2C] = "MC_PKGC_WATCHDOG_HANG_CBZ_UP", |
||||
+ [0x39] = "MC_PKGC_WATCHDOG_HANG_C3_UP_SF", |
||||
+ [0x44] = "MC_CRITICAL_VR_FAILED", |
||||
+ [0x45] = "MC_ICC_MAX_NOTSUPPORTED", |
||||
+ [0x46] = "MC_VID_RAMP_DOWN_FAILED", |
||||
+ [0x47] = "MC_EXCL_MODE_NO_PMREQ_CMP", |
||||
+ [0x48] = "MC_SVID_READ_REG_ICC_MAX_FAILED", |
||||
+ [0x49] = "MC_SVID_WRITE_REG_VOUT_MAX_FAILED", |
||||
+ [0x4B] = "MC_BOOT_VID_TIMEOUT_DRAM_0", |
||||
+ [0x4C] = "MC_BOOT_VID_TIMEOUT_DRAM_1", |
||||
+ [0x4D] = "MC_BOOT_VID_TIMEOUT_DRAM_2", |
||||
+ [0x4E] = "MC_BOOT_VID_TIMEOUT_DRAM_3", |
||||
+ [0x4F] = "MC_SVID_COMMAND_ERROR", |
||||
+ [0x52] = "MC_FIVR_CATAS_OVERVOL_FAULT", |
||||
+ [0x53] = "MC_FIVR_CATAS_OVERCUR_FAULT", |
||||
+ [0x57] = "MC_SVID_PKGC_REQUEST_FAILED", |
||||
+ [0x58] = "MC_SVID_IMON_REQUEST_FAILED", |
||||
+ [0x59] = "MC_SVID_ALERT_REQUEST_FAILED", |
||||
+ [0x60] = "MC_INVALID_PKGS_REQ_PCH", |
||||
+ [0x61] = "MC_INVALID_PKGS_REQ_QPI", |
||||
+ [0x62] = "MC_INVALID_PKGS_RSP_QPI", |
||||
+ [0x63] = "MC_INVALID_PKGS_RSP_PCH", |
||||
+ [0x64] = "MC_INVALID_PKG_STATE_CONFIG", |
||||
+ [0x67] = "MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT", |
||||
+ [0x68] = "MC_IMC_RW_SMBUS_TIMEOUT", |
||||
+ [0x69] = "MC_HA_FAILSTS_CHANGE_DETECTED", |
||||
+ [0x6A] = "MC_MSGCH_PMREQ_CMP_TIMEOUT", |
||||
+ [0x70] = "MC_WATCHDOG_TIMEOUT_PKGC_SLAVE", |
||||
+ [0x71] = "MC_WATCHDOG_TIMEOUT_PKGC_MASTER", |
||||
+ [0x72] = "MC_WATCHDOG_TIMEOUT_PKGS_MASTER", |
||||
+ [0x7C] = "MC_BIOS_RST_CPL_INVALID_SEQ", |
||||
+ [0x7D] = "MC_MORE_THAN_ONE_TXT_AGENT", |
||||
+ [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT" |
||||
+}; |
||||
+ |
||||
+static struct field pcu_mc4[] = { |
||||
+ FIELD(24, pcu_1), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-21 */ |
||||
+ |
||||
+static char *qpi[] = { |
||||
+ [0x02] = "Intel QPI physical layer detected drift buffer alarm", |
||||
+ [0x03] = "Intel QPI physical layer detected latency buffer rollover", |
||||
+ [0x10] = "Intel QPI link layer detected control error from R3QPI", |
||||
+ [0x11] = "Rx entered LLR abort state on CRC error", |
||||
+ [0x12] = "Unsupported or undefined packet", |
||||
+ [0x13] = "Intel QPI link layer control error", |
||||
+ [0x15] = "RBT used un-initialized value", |
||||
+ [0x20] = "Intel QPI physical layer detected a QPI in-band reset but aborted initialization", |
||||
+ [0x21] = "Link failover data self healing", |
||||
+ [0x22] = "Phy detected in-band reset (no width change)", |
||||
+ [0x23] = "Link failover clock failover", |
||||
+ [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", |
||||
+ [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", |
||||
+}; |
||||
+ |
||||
+static struct field qpi_mc[] = { |
||||
+ FIELD(16, qpi), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-22 */ |
||||
+ |
||||
+static struct field memctrl_mc9[] = { |
||||
+ SBITFIELD(16, "DDR3 address parity error"), |
||||
+ SBITFIELD(17, "Uncorrected HA write data error"), |
||||
+ SBITFIELD(18, "Uncorrected HA data byte enable error"), |
||||
+ SBITFIELD(19, "Corrected patrol scrub error"), |
||||
+ SBITFIELD(20, "Uncorrected patrol scrub error"), |
||||
+ SBITFIELD(21, "Corrected spare error"), |
||||
+ SBITFIELD(22, "Uncorrected spare error"), |
||||
+ SBITFIELD(23, "Corrected memory read error"), |
||||
+ SBITFIELD(24, "iMC write data buffer parity error"), |
||||
+ SBITFIELD(25, "DDR4 command address parity error"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+void hsw_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
+{ |
||||
+ uint64_t status = e->status; |
||||
+ uint32_t mca = status & 0xffff; |
||||
+ unsigned rank0 = -1, rank1 = -1, chan; |
||||
+ |
||||
+ switch (e->bank) { |
||||
+ case 4: |
||||
+ switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { |
||||
+ case 0x402: case 0x403: |
||||
+ /* Internal errors */ |
||||
+ break; |
||||
+ case 0x406: |
||||
+ /* Intel TXT errors */ |
||||
+ break; |
||||
+ case 0x407: |
||||
+ /* Other UBOX Internal errors */ |
||||
+ break; |
||||
+ } |
||||
+ if (EXTRACT(status, 16, 19)) |
||||
+ /* PCU internal error */ |
||||
+ decode_bitfield(e, status, pcu_mc4); |
||||
+ break; |
||||
+ case 5: |
||||
+ case 20: |
||||
+ case 21: |
||||
+ decode_bitfield(e, status, qpi_mc); |
||||
+ break; |
||||
+ case 9: case 10: case 11: case 12: |
||||
+ case 13: case 14: case 15: case 16: |
||||
+ decode_bitfield(e, status, memctrl_mc9); |
||||
+ break; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * Memory error specific code. Returns if the error is not a MC one |
||||
+ */ |
||||
+ |
||||
+ /* Check if the error is at the memory controller */ |
||||
+ if ((mca >> 7) != 1) |
||||
+ return; |
||||
+ |
||||
+ /* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
+ if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
+ return; |
||||
+ |
||||
+ /* |
||||
+ * Parse the reported channel and ranks |
||||
+ */ |
||||
+ |
||||
+ chan = EXTRACT(status, 0, 3); |
||||
+ if (chan == 0xf) |
||||
+ return; |
||||
+ |
||||
+ mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 62, 62)) |
||||
+ rank0 = EXTRACT(e->misc, 46, 50); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ |
||||
+ /* |
||||
+ * FIXME: The conversion from rank to dimm requires to parse the |
||||
+ * DMI tables and call failrank2dimm(). |
||||
+ */ |
||||
+ if (rank0 >= 0 && rank1 >= 0) |
||||
+ mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
+ rank0, rank1); |
||||
+ else if (rank0 >= 0) |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
+ else |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank1); |
||||
+} |
||||
+ |
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 427b98e..1546a1d 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -392,6 +392,8 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
ivb_decode_model(ras, e); |
||||
break; |
||||
+ case CPU_HASWELL_EPEX: |
||||
+ hsw_decode_model(ras, e); |
||||
default: |
||||
break; |
||||
} |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index a1d0b5d..d2de096 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -47,6 +47,8 @@ static char *cputype_name[] = { |
||||
[CPU_SANDY_BRIDGE_EP] = "Sandy Bridge EP", /* Fill in better name */ |
||||
[CPU_IVY_BRIDGE] = "Ivy Bridge", /* Fill in better name */ |
||||
[CPU_IVY_BRIDGE_EPEX] = "Ivy Bridge EP/EX", /* Fill in better name */ |
||||
+ [CPU_HASWELL] = "Haswell", |
||||
+ [CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", |
||||
}; |
||||
|
||||
static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
@@ -81,6 +83,12 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_IVY_BRIDGE; |
||||
else if (mce->model == 0x3e) |
||||
return CPU_IVY_BRIDGE_EPEX; |
||||
+ else if (mce->model == 0x3c || mce->model == 0x45 || |
||||
+ mce->model == 0x46) |
||||
+ return CPU_HASWELL; |
||||
+ else if (mce->model == 0x3f) |
||||
+ return CPU_HASWELL_EPEX; |
||||
+ |
||||
if (mce->model > 0x1a) { |
||||
log(ALL, LOG_INFO, |
||||
"Family 6 Model %x CPU: only decoding architectural errors\n", |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index 80e9769..b8b3d4f 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -42,6 +42,8 @@ enum cputype { |
||||
CPU_SANDY_BRIDGE_EP, |
||||
CPU_IVY_BRIDGE, |
||||
CPU_IVY_BRIDGE_EPEX, |
||||
+ CPU_HASWELL, |
||||
+ CPU_HASWELL_EPEX, |
||||
}; |
||||
|
||||
struct mce_event { |
||||
@@ -114,6 +116,7 @@ void xeon75xx_decode_model(struct mce_event *e); |
||||
void dunnington_decode_model(struct mce_event *e); |
||||
void snb_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void ivb_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
+void hsw_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void tulsa_decode_model(struct mce_event *e); |
||||
|
||||
/* Software defined banks */ |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,40 @@
@@ -0,0 +1,40 @@
|
||||
From 85a2ead8f2d6e380be8d8234ba752a558e8027ed Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:29 -0300 |
||||
Subject: [PATCH 02/13] rasdaemon: decode new simple error code number 6 |
||||
|
||||
This patch was based on fa313dd0144596dfa140bd66805367250d6eae9b |
||||
(mcelog) |
||||
|
||||
mcelog: Decode new simple error code number 6 |
||||
|
||||
Edition 050 of the Intel SDM released in late February 2014 |
||||
includes a new simple error code in "Table 15-8. IA32_MCi_Status |
||||
[15:0] Simple Error Code Encoding". Code 6 (0000 0000 0000 0110) |
||||
has been allocated for the reporting of cases where the BIOS SMM |
||||
code attempts to execute code outside of the protected SMRR area. |
||||
|
||||
Signed-off-by: Tony Luck <tony.luck@intel.com> |
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com> |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel.c | 1 + |
||||
1 file changed, 1 insertion(+) |
||||
|
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 1546a1d..69ea00e 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -115,6 +115,7 @@ static char *mca_msg[] = { |
||||
[3] = "External error", |
||||
[4] = "FRC error", |
||||
[5] = "Internal parity error", |
||||
+ [6] = "SMM Handler Code Access Violation", |
||||
}; |
||||
|
||||
static char *tracking_msg[] = { |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
From 064a74b1202e529b5e16a54218fc17974906af2d Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:30 -0300 |
||||
Subject: [PATCH 03/13] rasdaemon: Add missing entry to Ivy Bridge memory |
||||
controller decode table |
||||
|
||||
This patch is based on 2577aeb662374cb87169ee675b2e37c06f1aed99 (mcelog) |
||||
|
||||
mcelog: Add missing entry to Ivy Bridge memory controller decode table |
||||
|
||||
September 2013 edition of the software developer manual added an |
||||
entry that had been inadvertently omitted from earlier editions. |
||||
Add the 0x80 entry for "Corrected memory read error". |
||||
|
||||
Signed-off-by: Tony Luck <tony.luck@intel.com> |
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com> |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel-ivb.c | 1 + |
||||
1 file changed, 1 insertion(+) |
||||
|
||||
diff --git a/mce-intel-ivb.c b/mce-intel-ivb.c |
||||
index f2a133a..0c5bebc 100644 |
||||
--- a/mce-intel-ivb.c |
||||
+++ b/mce-intel-ivb.c |
||||
@@ -76,6 +76,7 @@ static char *memctrl_1[] = { |
||||
[0x010] = "Uncorrected patrol scrub error", |
||||
[0x020] = "Corrected spare error", |
||||
[0x040] = "Uncorrected spare error", |
||||
+ [0x080] = "Corrected memory read error", |
||||
[0x100] = "iMC, WDB, parity errors", |
||||
}; |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
From 66021c20c92b5df16b5c8dae4fb664788fa40376 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:31 -0300 |
||||
Subject: [PATCH 04/13] rasdaemon: Identify Ivy Bridge properly |
||||
|
||||
This patch is based on b29cc4d615cead87cbc163ada0645b10c5b1217d (mcelog) |
||||
mcelog: Identify Ivy Bridge properly |
||||
|
||||
Uniquely identify Ivy Bridge even though the machine checks are the same |
||||
for Sandy Bridge and Ivy Bridge. This makes the output for the processor |
||||
display "Ivy Bridge". |
||||
|
||||
Signed-off-by: Prarit Bhargava <prarit@redhat.com> |
||||
Cc: tony.luck@intel.com |
||||
Signed-off-by: Andi Kleen <ak@linux.intel.com> |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
ras-mce-handler.c | 2 +- |
||||
1 file changed, 1 insertion(+), 1 deletion(-) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index d2de096..07e298f 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -75,7 +75,7 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_NEHALEM; |
||||
else if (mce->model == 0x2e || mce->model == 0x2f) |
||||
return CPU_XEON75XX; |
||||
- else if (mce->model == 0x2a || mce->model == 0x3a) |
||||
+ else if (mce->model == 0x2a) |
||||
return CPU_SANDY_BRIDGE; |
||||
else if (mce->model == 0x2d) |
||||
return CPU_SANDY_BRIDGE_EP; |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,52 @@
@@ -0,0 +1,52 @@
|
||||
From a9810094cf838e03102f95333db7ddfe810ccabd Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:32 -0300 |
||||
Subject: [PATCH 05/13] rasdaemon: add support for Broadwell |
||||
|
||||
Only basic support for now. |
||||
|
||||
Based on mcelog code. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
ras-mce-handler.c | 3 +++ |
||||
ras-mce-handler.h | 1 + |
||||
2 files changed, 4 insertions(+) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 07e298f..e059b92 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -49,6 +49,7 @@ static char *cputype_name[] = { |
||||
[CPU_IVY_BRIDGE_EPEX] = "Ivy Bridge EP/EX", /* Fill in better name */ |
||||
[CPU_HASWELL] = "Haswell", |
||||
[CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", |
||||
+ [CPU_BROADWELL] = "Broadwell", |
||||
}; |
||||
|
||||
static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
@@ -88,6 +89,8 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_HASWELL; |
||||
else if (mce->model == 0x3f) |
||||
return CPU_HASWELL_EPEX; |
||||
+ else if (mce->model == 0x3d) |
||||
+ return CPU_BROADWELL; |
||||
|
||||
if (mce->model > 0x1a) { |
||||
log(ALL, LOG_INFO, |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index b8b3d4f..ba01f55 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -44,6 +44,7 @@ enum cputype { |
||||
CPU_IVY_BRIDGE_EPEX, |
||||
CPU_HASWELL, |
||||
CPU_HASWELL_EPEX, |
||||
+ CPU_BROADWELL, |
||||
}; |
||||
|
||||
struct mce_event { |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
From bd6c78d89f4e934fafb1136a15efc0d6df4635ed Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon, 18 May 2015 14:19:33 -0300 |
||||
Subject: [PATCH 06/13] rasdaemon: add support for Knights Landing |
||||
|
||||
Patch based on mcelog. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
ras-mce-handler.c | 3 +++ |
||||
ras-mce-handler.h | 1 + |
||||
2 files changed, 4 insertions(+) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index e059b92..63f14fd 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -50,6 +50,7 @@ static char *cputype_name[] = { |
||||
[CPU_HASWELL] = "Haswell", |
||||
[CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", |
||||
[CPU_BROADWELL] = "Broadwell", |
||||
+ [CPU_KNIGHTS_LANDING] = "Knights Landing", |
||||
}; |
||||
|
||||
static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
@@ -91,6 +92,8 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_HASWELL_EPEX; |
||||
else if (mce->model == 0x3d) |
||||
return CPU_BROADWELL; |
||||
+ else if (mce->model == 0x57) |
||||
+ return CPU_KNIGHTS_LANDING; |
||||
|
||||
if (mce->model > 0x1a) { |
||||
log(ALL, LOG_INFO, |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index ba01f55..28aad00 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -45,6 +45,7 @@ enum cputype { |
||||
CPU_HASWELL, |
||||
CPU_HASWELL_EPEX, |
||||
CPU_BROADWELL, |
||||
+ CPU_KNIGHTS_LANDING, |
||||
}; |
||||
|
||||
struct mce_event { |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
From 5dd11c60b84294a3c6ce5ccb0db726b3dce35b10 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Tue, 26 May 2015 11:59:36 -0300 |
||||
Subject: [PATCH 07/13] rasdaemon: properly pring message strings in |
||||
decode_bitfield() |
||||
|
||||
Fix decode_bitfield() so that it does print message strings from the struct |
||||
field table. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
bitfield.c | 3 ++- |
||||
1 file changed, 2 insertions(+), 1 deletion(-) |
||||
|
||||
diff --git a/bitfield.c b/bitfield.c |
||||
index 1dda30d..d6931c9 100644 |
||||
--- a/bitfield.c |
||||
+++ b/bitfield.c |
||||
@@ -84,7 +84,8 @@ void decode_bitfield(struct mce_event *e, uint64_t status, |
||||
continue; |
||||
mce_snprintf(e->error_msg, "<%u:%llx>", |
||||
f->start_bit, (long long)v); |
||||
- } |
||||
+ } else |
||||
+ mce_snprintf(e->error_msg, "%s", s); |
||||
} |
||||
} |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,31 @@
@@ -0,0 +1,31 @@
|
||||
From abf36efe909c4022260cb4016c54d1ec3ec18cb8 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Tue, 26 May 2015 11:59:37 -0300 |
||||
Subject: [PATCH 08/13] rasdaemon: add missing semicolon in hsw_decode_model() |
||||
|
||||
hsw_decode_model() tries to skip decode_bitfield() if IA32_MC4_STATUS indicates |
||||
some internal errors. Unfortunately, here behaves opposite to the intention |
||||
because a semicolon is missing. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel-haswell.c | 1 + |
||||
1 file changed, 1 insertion(+) |
||||
|
||||
diff --git a/mce-intel-haswell.c b/mce-intel-haswell.c |
||||
index c32704c..3ac12f2 100644 |
||||
--- a/mce-intel-haswell.c |
||||
+++ b/mce-intel-haswell.c |
||||
@@ -137,6 +137,7 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
} |
||||
if (EXTRACT(status, 16, 19)) |
||||
/* PCU internal error */ |
||||
+ ; |
||||
decode_bitfield(e, status, pcu_mc4); |
||||
break; |
||||
case 5: |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
From f892a390c55c0b350c57cda9d166a9cf331aa36f Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Tue, 26 May 2015 11:59:38 -0300 |
||||
Subject: [PATCH 09/13] rasdaemon: enable IMC status usage for Haswell-E |
||||
|
||||
Enable IMC status bank for Haswell-E, as described in Intel SDM Vol.3C |
||||
Table 35-27. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel.c | 1 + |
||||
ras-mce-handler.c | 1 + |
||||
2 files changed, 2 insertions(+) |
||||
|
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 69ea00e..3684602 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -457,6 +457,7 @@ int set_intel_imc_log(enum cputype cputype, unsigned ncpus) |
||||
switch (cputype) { |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
+ case CPU_HASWELL_EPEX: |
||||
msr = 0x17f; /* MSR_ERROR_CONTROL */ |
||||
bit = 0x2; /* MemError Log Enable */ |
||||
break; |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 63f14fd..fb6db8a 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -221,6 +221,7 @@ int register_mce_handler(struct ras_events *ras, unsigned ncpus) |
||||
switch (mce->cputype) { |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
+ case CPU_HASWELL_EPEX: |
||||
set_intel_imc_log(mce->cputype, ncpus); |
||||
default: |
||||
break; |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,54 @@
@@ -0,0 +1,54 @@
|
||||
From 56913e2f2a5a6ddf8ab684c8d528e9ef1d55cfba Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Tue, 26 May 2015 11:59:39 -0300 |
||||
Subject: [PATCH 10/13] rasdaemon: make sure the error is valid before handling |
||||
ranks |
||||
|
||||
Fix "rank" handling according to the Bit 63 description in Intel SDM Vol.3C |
||||
Table 16-23, that says "... Use this information only after there is valid |
||||
first error info indicated by bit 62". |
||||
Also fix invalid comparisons of unsigned variables "rank0" and "rank1". |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel-haswell.c | 14 ++++++-------- |
||||
1 file changed, 6 insertions(+), 8 deletions(-) |
||||
|
||||
diff --git a/mce-intel-haswell.c b/mce-intel-haswell.c |
||||
index 3ac12f2..0a817bf 100644 |
||||
--- a/mce-intel-haswell.c |
||||
+++ b/mce-intel-haswell.c |
||||
@@ -174,22 +174,20 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
|
||||
mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
|
||||
- if (EXTRACT(e->misc, 62, 62)) |
||||
+ if (EXTRACT(e->misc, 62, 62)) { |
||||
rank0 = EXTRACT(e->misc, 46, 50); |
||||
- |
||||
- if (EXTRACT(e->misc, 63, 63)) |
||||
- rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ } |
||||
|
||||
/* |
||||
* FIXME: The conversion from rank to dimm requires to parse the |
||||
* DMI tables and call failrank2dimm(). |
||||
*/ |
||||
- if (rank0 >= 0 && rank1 >= 0) |
||||
+ if (rank0 != -1 && rank1 != -1) |
||||
mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
rank0, rank1); |
||||
- else if (rank0 >= 0) |
||||
+ else if (rank0 != -1) |
||||
mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
- else |
||||
- mce_snprintf(e->mc_location, "rank=%d", rank1); |
||||
} |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,261 @@
@@ -0,0 +1,261 @@
|
||||
From 3a38f8e66a2aa5c477cea152e1acc9a781834b83 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <aris@redhat.com> |
||||
Date: Mon, 1 Jun 2015 17:04:00 -0300 |
||||
Subject: [PATCH 11/13] rasdaemon: add support to match the machine by system's |
||||
product name |
||||
|
||||
In some cases the motherboard names will change but the mapping won't |
||||
across a line of products. This patch adds support for "Product:" to be |
||||
specified in the label files instead of Model:. |
||||
|
||||
An example: |
||||
Vendor: Dell Inc. |
||||
Product: PowerEdge R610 |
||||
DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2; |
||||
DIMM_A4: 0.1.0; DIMM_A5: 0.1.1; DIMM_A6: 0.1.2; |
||||
|
||||
DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2; |
||||
DIMM_B4: 1.1.0; DIMM_B5: 1.1.1; DIMM_B6: 1.1.2; |
||||
|
||||
Would match all 'PowerEdge R610' machines. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
util/ras-mc-ctl.in | 127 +++++++++++++++++++++++++++++++++++++++++------------ |
||||
1 file changed, 98 insertions(+), 29 deletions(-) |
||||
|
||||
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in |
||||
index 7b6d798..6350f62 100755 |
||||
--- a/util/ras-mc-ctl.in |
||||
+++ b/util/ras-mc-ctl.in |
||||
@@ -288,8 +288,27 @@ sub parse_dimm_nodes |
||||
} |
||||
} |
||||
|
||||
+sub guess_product { |
||||
+ my $pvendor = undef; |
||||
+ my $pname = undef; |
||||
+ |
||||
+ if (open (VENDOR, "/sys/class/dmi/id/product_vendor")) { |
||||
+ $pvendor = <VENDOR>; |
||||
+ close VENDOR; |
||||
+ chomp($pvendor); |
||||
+ } |
||||
+ if (open (NAME, "/sys/class/dmi/id/product_name")) { |
||||
+ $pname = <NAME>; |
||||
+ close NAME; |
||||
+ chomp($pname); |
||||
+ } |
||||
+ |
||||
+ return ($pvendor, $pname); |
||||
+} |
||||
+ |
||||
sub get_mainboard_info { |
||||
my ($vendor, $model); |
||||
+ my ($pvendor, $pname); |
||||
|
||||
if ($conf{opt}{mainboard} && $conf{opt}{mainboard} ne "report") { |
||||
($vendor, $model) = split (/[: ]/, $conf{opt}{mainboard}, 2); |
||||
@@ -301,6 +320,15 @@ sub get_mainboard_info { |
||||
|
||||
$conf{mainboard}{vendor} = $vendor; |
||||
$conf{mainboard}{model} = $model; |
||||
+ |
||||
+ ($pvendor, $pname) = guess_product (); |
||||
+ # since product vendor is rare, use mainboard's vendor |
||||
+ if ($pvendor) { |
||||
+ $conf{mainboard}{product_vendor} = $pvendor; |
||||
+ } else { |
||||
+ $conf{mainboard}{product_vendor} = $vendor; |
||||
+ } |
||||
+ $conf{mainboard}{product_name} = $pname if $pname; |
||||
} |
||||
|
||||
sub guess_vendor_model_dmidecode { |
||||
@@ -449,10 +477,11 @@ sub guess_dimm_label { |
||||
|
||||
sub parse_dimm_labels_file |
||||
{ |
||||
- my ($lh, $num_layers, $file) = (@_); |
||||
+ my ($lh, $num_layers, $lh_prod, $num_layers_prod, $file) = (@_); |
||||
my $line = -1; |
||||
my $vendor = ""; |
||||
my @models = (); |
||||
+ my @products = (); |
||||
my $num; |
||||
|
||||
open (LABELS, "$file") |
||||
@@ -469,12 +498,21 @@ sub parse_dimm_labels_file |
||||
if (/vendor\s*:\s*(.*\S)\s*/i) { |
||||
$vendor = lc $1; |
||||
@models = (); |
||||
+ @products = (); |
||||
$num = 0; |
||||
next; |
||||
} |
||||
if (/(model|board)\s*:\s*(.*)$/i) { |
||||
!$vendor && die "$file: line $line: MB model without vendor\n"; |
||||
@models = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); |
||||
+ @products = (); |
||||
+ $num = 0; |
||||
+ next; |
||||
+ } |
||||
+ if (/(product)\s*:\s*(.*)$/i) { |
||||
+ !$vendor && die "$file: line $line: product without vendor\n"; |
||||
+ @models = (); |
||||
+ @products = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); |
||||
$num = 0; |
||||
next; |
||||
} |
||||
@@ -513,10 +551,13 @@ sub parse_dimm_labels_file |
||||
} |
||||
map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } |
||||
@models; |
||||
+ map { $lh_prod->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } |
||||
+ @products; |
||||
} |
||||
if (!$num) { |
||||
$num = $n; |
||||
map { $num_layers->{$vendor}{lc $_} = $num } @models; |
||||
+ map { $num_layers_prod->{$vendor}{lc $_} = $num } @products; |
||||
} elsif ($num != $n) { |
||||
die ("Error: Inconsistent number of layers at label db \"$file\"\n"); |
||||
} |
||||
@@ -531,6 +572,8 @@ sub parse_dimm_labels |
||||
{ |
||||
my %labels = (); |
||||
my %num_layers = (); |
||||
+ my %labels_prod = (); |
||||
+ my %num_layers_prod = (); |
||||
|
||||
# |
||||
# Accrue all DIMM labels from the labels.db file, as |
||||
@@ -538,10 +581,10 @@ sub parse_dimm_labels |
||||
# |
||||
for my $file ($conf{labeldb}, <$conf{labeldir}/*>) { |
||||
next unless -r $file; |
||||
- parse_dimm_labels_file (\%labels, \%num_layers, $file); |
||||
+ parse_dimm_labels_file (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod, $file); |
||||
} |
||||
|
||||
- return (\%labels, \%num_layers); |
||||
+ return (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod); |
||||
} |
||||
|
||||
sub read_dimm_label |
||||
@@ -598,25 +641,9 @@ sub get_dimm_label_node |
||||
} |
||||
|
||||
|
||||
-sub print_dimm_labels |
||||
+sub _print_dimm_labels |
||||
{ |
||||
- my $fh = shift || *STDOUT; |
||||
- my ($lref, $num_layers) = parse_dimm_labels (); |
||||
- my $vendor = lc $conf{mainboard}{vendor}; |
||||
- my $model = lc $conf{mainboard}{model}; |
||||
- my $format = "%-35s %-20s %-20s\n"; |
||||
- |
||||
- if (!exists $$lref{$vendor}{$model}) { |
||||
- log_error ("No dimm labels for $conf{mainboard}{vendor} " . |
||||
- "model $conf{mainboard}{model}\n"); |
||||
- return; |
||||
- } |
||||
- |
||||
- my $sysfs_dir = "/sys/devices/system/edac/mc"; |
||||
- |
||||
- find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); |
||||
- |
||||
- printf $fh $format, "LOCATION", "CONFIGURED LABEL", "SYSFS CONTENTS"; |
||||
+ my ($lref, $num_layers, $vendor, $model, $fh, $format) = @_; |
||||
|
||||
for my $mc (sort keys %{$$lref{$vendor}{$model}}) { |
||||
for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) { |
||||
@@ -631,26 +658,40 @@ sub print_dimm_labels |
||||
} |
||||
} |
||||
print $fh "\n"; |
||||
- |
||||
} |
||||
|
||||
-sub register_dimm_labels |
||||
+sub print_dimm_labels |
||||
{ |
||||
- my ($lref, $num_layers) = parse_dimm_labels (); |
||||
+ my $fh = shift || *STDOUT; |
||||
+ my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels (); |
||||
my $vendor = lc $conf{mainboard}{vendor}; |
||||
my $model = lc $conf{mainboard}{model}; |
||||
- my $sysfs = "/sys/devices/system/edac/mc"; |
||||
+ my $pvendor = lc $conf{mainboard}{product_vendor}; |
||||
+ my $pname = lc $conf{mainboard}{product_name}; |
||||
+ my $format = "%-35s %-20s %-20s\n"; |
||||
|
||||
- if (!exists $$lref{$vendor}{$model}) { |
||||
+ if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) { |
||||
log_error ("No dimm labels for $conf{mainboard}{vendor} " . |
||||
- "model $conf{mainboard}{model}\n"); |
||||
- return 0; |
||||
+ "model $conf{mainboard}{model}\n"); |
||||
+ return; |
||||
} |
||||
+ |
||||
my $sysfs_dir = "/sys/devices/system/edac/mc"; |
||||
|
||||
find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); |
||||
|
||||
- select (undef, undef, undef, $conf{opt}{delay}); |
||||
+ printf $fh $format, "LOCATION", "CONFIGURED LABEL", "SYSFS CONTENTS"; |
||||
+ |
||||
+ if (exists $$lref{$vendor}{$model}) { |
||||
+ _print_dimm_labels($lref, $num_layers, $vendor, $model, $fh, $format); |
||||
+ } elsif (exists $$lref_prod{$pvendor}{$pname}) { |
||||
+ _print_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname, $fh, $format); |
||||
+ } |
||||
+} |
||||
+ |
||||
+sub write_dimm_labels |
||||
+{ |
||||
+ my ($lref, $num_layers, $vendor, $model) = @_; |
||||
|
||||
for my $mc (sort keys %{$$lref{$vendor}{$model}}) { |
||||
for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) { |
||||
@@ -675,6 +716,34 @@ sub register_dimm_labels |
||||
} |
||||
} |
||||
} |
||||
+} |
||||
+ |
||||
+sub register_dimm_labels |
||||
+{ |
||||
+ my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels (); |
||||
+ my $vendor = lc $conf{mainboard}{vendor}; |
||||
+ my $model = lc $conf{mainboard}{model}; |
||||
+ my $pvendor = lc $conf{mainboard}{product_vendor}; |
||||
+ my $pname = lc $conf{mainboard}{product_name}; |
||||
+ my $sysfs = "/sys/devices/system/edac/mc"; |
||||
+ |
||||
+ if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) { |
||||
+ log_error ("No dimm labels for $conf{mainboard}{vendor} " . |
||||
+ "model $conf{mainboard}{model}\n"); |
||||
+ return 0; |
||||
+ } |
||||
+ my $sysfs_dir = "/sys/devices/system/edac/mc"; |
||||
+ |
||||
+ find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); |
||||
+ |
||||
+ select (undef, undef, undef, $conf{opt}{delay}); |
||||
+ |
||||
+ if (exists $$lref{$vendor}{$model}) { |
||||
+ write_dimm_labels($lref, $num_layers, $vendor, $model); |
||||
+ } else { |
||||
+ write_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname); |
||||
+ } |
||||
+ |
||||
return 1; |
||||
} |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,48 @@
@@ -0,0 +1,48 @@
|
||||
From a50a2ae341f8821d71a19d9a3c6ca345e1499e25 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Wed, 17 Jun 2015 07:56:57 -0300 |
||||
Subject: [PATCH 5/5] rasdaemon: add internal errors of IA32_MC4_STATUS for |
||||
Haswell |
||||
|
||||
Now rasdaemon looks purposely omitting internal errors of |
||||
IA32_MC4_STATUS for Haswell-family processors, which are described in |
||||
Intel SDM vol3 Table 16-20. I think it's better to show these errors |
||||
because mcelog does show them. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel-haswell.c | 11 +++++------ |
||||
1 file changed, 5 insertions(+), 6 deletions(-) |
||||
|
||||
diff --git a/mce-intel-haswell.c b/mce-intel-haswell.c |
||||
index 0a817bf..b70e399 100644 |
||||
--- a/mce-intel-haswell.c |
||||
+++ b/mce-intel-haswell.c |
||||
@@ -126,18 +126,17 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
case 4: |
||||
switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { |
||||
case 0x402: case 0x403: |
||||
- /* Internal errors */ |
||||
+ mce_snprintf(e->mcastatus_msg, "PCU Internal Errors"); |
||||
break; |
||||
case 0x406: |
||||
- /* Intel TXT errors */ |
||||
+ mce_snprintf(e->mcastatus_msg, "Intel TXT Errors"); |
||||
break; |
||||
case 0x407: |
||||
- /* Other UBOX Internal errors */ |
||||
+ mce_snprintf(e->mcastatus_msg, "Other UBOX Internal Errors"); |
||||
break; |
||||
} |
||||
- if (EXTRACT(status, 16, 19)) |
||||
- /* PCU internal error */ |
||||
- ; |
||||
+ if (EXTRACT(status, 16, 17) && !EXTRACT(status, 18, 19)) |
||||
+ mce_snprintf(e->error_msg, "PCU Internal error"); |
||||
decode_bitfield(e, status, pcu_mc4); |
||||
break; |
||||
case 5: |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,34 @@
@@ -0,0 +1,34 @@
|
||||
From 45b575b791dbd3d5660a0c08065a9fbcb6e21eb9 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Wed, 10 Jun 2015 07:29:03 -0300 |
||||
Subject: [PATCH 2/5] rasdaemon: remove a space from mcgstatus_msg |
||||
|
||||
"ras-mc-ctl --errors" shows an unnecessary space character in the |
||||
mcgstatus string of MCE event, like below: |
||||
|
||||
2 2015-04-04 19:57:22 +0900 error: MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT, mcg mcgstatus= 0, mci Corrected_error, mcgcap=0x07000c16, status=0x8000000067000e0b, walltime=0x555da140, cpu=0x00000001, cpuid=0x000306f3, apicid=0x00000002, bank=0x00000004 |
||||
|
||||
Let's remove it. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel.c | 2 +- |
||||
1 file changed, 1 insertion(+), 1 deletion(-) |
||||
|
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 3503c6a..77b929b 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -176,7 +176,7 @@ static void decode_mcg(struct mce_event *e) |
||||
{ |
||||
uint64_t mcgstatus = e->mcgstatus; |
||||
|
||||
- mce_snprintf(e->mcgstatus_msg, "mcgstatus= %lld", |
||||
+ mce_snprintf(e->mcgstatus_msg, "mcgstatus=%lld", |
||||
(long long)e->mcgstatus); |
||||
|
||||
if (mcgstatus & MCG_STATUS_RIPV) |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,36 @@
@@ -0,0 +1,36 @@
|
||||
From 349da4c3d63ec6dceef66a405561984561d31582 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Wed, 10 Jun 2015 20:49:55 -0300 |
||||
Subject: [PATCH 3/5] rasdaemon: unnecessary comma for empty mc_location string |
||||
|
||||
Into the /var/log/messages, rasdaemon sometimes prints an unnecessary |
||||
comma ", " between mca= and cpu_type= like below: |
||||
|
||||
Jun 9 02:44:39 localhost rasdaemon: <...>-4585 [1638893312] 1031.109000: mce_record: 2015-06-08 10:07:28 +0900 bank=3, status= 9c0000000000017a, mci=Corrected_error Error_enabled, mca=Generic CACHE Level-2 Eviction Error, , cpu_type= Intel Xeon v3 (Haswell) EP/EX, cpu= 1, socketid= 0, misc= 4004000000000080, addr= 204fffffff, mcgstatus= 0, mcgcap= 7000c16, apicid= 2 |
||||
|
||||
That's the comma for mc_location which is printed even if mc_location is |
||||
empty due to a wrong if condition. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Acked-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
ras-mce-handler.c | 2 +- |
||||
1 file changed, 1 insertion(+), 1 deletion(-) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index fb6db8a..07252a0 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -278,7 +278,7 @@ static void report_mce_event(struct ras_events *ras, |
||||
if (*e->user_action) |
||||
trace_seq_printf(s, " %s", e->user_action); |
||||
|
||||
- if (e->mc_location) |
||||
+ if (*e->mc_location) |
||||
trace_seq_printf(s, ", %s", e->mc_location); |
||||
|
||||
#if 0 |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
From 9136d7422a6b53c50a920f3dd2539bf7fcd4fdf5 Mon Sep 17 00:00:00 2001 |
||||
From: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Date: Fri, 12 Jun 2015 06:35:37 -0300 |
||||
Subject: [PATCH 4/5] rasdaemon: use MCA error msg as error_msg |
||||
|
||||
In the case of machine-checks which do not have a model-specific MCA |
||||
error code but have an architectural code only, mce_event.error_msg |
||||
becomes empty then you don't know what happened. |
||||
|
||||
(snip) |
||||
MCE records summary: |
||||
1 errors |
||||
^ |
||||
empty! |
||||
|
||||
(snip) |
||||
MCE events: |
||||
1 2015-06-12 00:21:46 +0900 error: , mcg mcgstatus= 0, mci Corrected_error |
||||
^ |
||||
empty! |
||||
|
||||
Error_enabled, mcgcap=0x07000c16, status=0x9c0000000000017a, addr=0x204fffffff, misc=0x4004000000000080, walltime=0x557b0db2, cpu=0x00000001, cpuid=0x000306f3, apicid=0x00000002, bank=0x00000003 |
||||
|
||||
In such a case, let's use the content of mcastatus_msg as error_msg |
||||
instead. |
||||
|
||||
(snip) |
||||
MCE records summary: |
||||
1 Generic CACHE Level-2 Eviction Error errors |
||||
(snip) |
||||
MCE events: |
||||
1 2015-06-12 02:39:04 +0900 error: Generic CACHE Level-2 Eviction Error, mcg mcgstatus= 0, mci Corrected_error Error_enabled, mcgcap=0x07000c16, status=0x9c0000000000017a, addr=0x204fffffff, misc=0x4004000000000080, walltime=0x557b1f22, cpu=0x00000001, cpuid=0x000306f3, apicid=0x00000002, bank=0x00000003 |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
Acked-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
ras-mce-handler.c | 3 +++ |
||||
1 file changed, 3 insertions(+) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 07252a0..3976f90 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -411,6 +411,9 @@ int ras_mce_event_handler(struct trace_seq *s, |
||||
if (rc) |
||||
return rc; |
||||
|
||||
+ if (!*e.error_msg && *e.mcastatus_msg) |
||||
+ mce_snprintf(e.error_msg, "%s", e.mcastatus_msg); |
||||
+ |
||||
report_mce_event(ras, record, s, &e); |
||||
|
||||
#ifdef HAVE_SQLITE3 |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,50 @@
@@ -0,0 +1,50 @@
|
||||
From fa6260eb1304c6c829af177ab4aa1937db36fab1 Mon Sep 17 00:00:00 2001 |
||||
From: Ashok Raj <ashok.raj@intel.com> |
||||
Date: Fri, 5 Jun 2015 13:32:47 -0300 |
||||
Subject: [PATCH 1/5] x86, rasdaemon: Add support to log Local Machine Check |
||||
Exception (LMCE) |
||||
|
||||
Local Machine Check Exception allows certain errors to be signaled to |
||||
only the affected logical processor. This change captures them for |
||||
rasdaemon. |
||||
|
||||
log:Changes to rasdaemon to support new architectural changes to MCE |
||||
|
||||
Changet to rasdaemon to support new architectural extentions in Intel |
||||
CPUs. |
||||
|
||||
Signed-off-by: Ashok Raj <ashok.raj@intel.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel.c | 2 ++ |
||||
ras-mce-handler.h | 1 + |
||||
2 files changed, 3 insertions(+) |
||||
|
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 3684602..3503c6a 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -185,6 +185,8 @@ static void decode_mcg(struct mce_event *e) |
||||
mce_snprintf(e->mcgstatus_msg, "EIPV"); |
||||
if (mcgstatus & MCG_STATUS_MCIP) |
||||
mce_snprintf(e->mcgstatus_msg, "MCIP"); |
||||
+ if (mcgstatus & MCG_STATUS_LMCE) |
||||
+ mce_snprintf(e->mcgstatus_msg, "LMCE"); |
||||
} |
||||
|
||||
static void bank_name(struct mce_event *e) |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index 28aad00..13b8f52 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -139,6 +139,7 @@ void tulsa_decode_model(struct mce_event *e); |
||||
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
||||
#define MCG_STATUS_EIPV (1ULL<<1) /* eip points to correct instruction */ |
||||
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ |
||||
+#define MCG_STATUS_LMCE (1ULL<<3) /* local machine check signaled */ |
||||
|
||||
/* Those functions are defined on per-cpu vendor C files */ |
||||
int parse_intel_event(struct ras_events *ras, struct mce_event *e); |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,22 @@
@@ -0,0 +1,22 @@
|
||||
Based on mcelog code. |
||||
|
||||
Signed-off-by: Seiichi Ikarashi <s.ikarashi@jp.fujitsu.com> |
||||
|
||||
--- |
||||
ras-mce-handler.c | 3 ++- |
||||
1 files changed, 2 insertions(+), 1 deletions(-) |
||||
|
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 3976f90..23f2488 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -90,7 +90,8 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_HASWELL; |
||||
else if (mce->model == 0x3f) |
||||
return CPU_HASWELL_EPEX; |
||||
- else if (mce->model == 0x3d) |
||||
+ else if (mce->model == 0x3d || mce->model == 0x4f || |
||||
+ mce->model == 0x56) |
||||
return CPU_BROADWELL; |
||||
else if (mce->model == 0x57) |
||||
return CPU_KNIGHTS_LANDING; |
@ -0,0 +1,43 @@
@@ -0,0 +1,43 @@
|
||||
From d9fe70fe7db45618f7b46b81ebee85e7a8801870 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <aris@redhat.com> |
||||
Date: Mon, 10 Aug 2015 14:24:41 -0400 |
||||
Subject: [PATCH 1/5] rasdaemon: fix typos on ras-mc-ctl man page |
||||
|
||||
Fixed two markers and two typos in the documentation. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <aris@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
man/ras-mc-ctl.8.in | 6 +++--- |
||||
1 file changed, 3 insertions(+), 3 deletions(-) |
||||
|
||||
diff --git a/man/ras-mc-ctl.8.in b/man/ras-mc-ctl.8.in |
||||
index 7441b3a..60997dd 100644 |
||||
--- a/man/ras-mc-ctl.8.in |
||||
+++ b/man/ras-mc-ctl.8.in |
||||
@@ -69,14 +69,14 @@ Display the configured labels for the current hardware, as |
||||
well as the current labels registered with EDAC. |
||||
.TP |
||||
.BI "--guess-labels" |
||||
-Print DMI labels, when bank locator is available at the DMI table. |
||||
+Print DMI labels, when bank locator is available in the DMI table. |
||||
It helps to fill the labels database at @sysconfdir@/ras/dimm_labels.d/. |
||||
.TP |
||||
.BI "--labeldb="DB |
||||
Specify an alternate location for the labels database. |
||||
.TP |
||||
.BI "--delay="time |
||||
-Specify a delay of \ftime\fR seconds before registering dimm labels. |
||||
+Specify a delay of \fBtime\fR seconds before registering DIMM labels. |
||||
Only meaninful if used together with --register-labels. |
||||
.TP |
||||
.BI "--layout |
||||
@@ -121,4 +121,4 @@ back to parsing output of the \fBdmidecode\fR(8) utility. Use of this |
||||
utility will most often require that \fBras-mc-ctl\fR be run as root. |
||||
|
||||
.SH SEE ALSO |
||||
-\f\fBrasdaemon\fR(1) |
||||
+\fBrasdaemon\fR(1) |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,213 @@
@@ -0,0 +1,213 @@
|
||||
From 2d656c4ec9d5f68ac39b2a8461b0cd4f77dd7c21 Mon Sep 17 00:00:00 2001 |
||||
From: Marcin Koss <marcin.koss@intel.com> |
||||
Date: Thu, 3 Dec 2015 15:19:47 +0100 |
||||
Subject: [PATCH 3/5] rasdaemon: Add support for Knights Landing processor |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
Makefile.am | 3 +- |
||||
mce-intel-knl.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
||||
mce-intel.c | 5 +++ |
||||
ras-mce-handler.c | 1 + |
||||
ras-mce-handler.h | 1 + |
||||
5 files changed, 137 insertions(+), 1 deletion(-) |
||||
create mode 100644 mce-intel-knl.c |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index a6bf18f..a1cb02a 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -28,7 +28,8 @@ if WITH_MCE |
||||
rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c \ |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
- mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c |
||||
+ mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ |
||||
+ mce-intel-knl.c |
||||
endif |
||||
if WITH_EXTLOG |
||||
rasdaemon_SOURCES += ras-extlog-handler.c |
||||
diff --git a/mce-intel-knl.c b/mce-intel-knl.c |
||||
new file mode 100644 |
||||
index 0000000..96b0a59 |
||||
--- /dev/null |
||||
+++ b/mce-intel-knl.c |
||||
@@ -0,0 +1,128 @@ |
||||
+/* |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#include <string.h> |
||||
+#include <stdio.h> |
||||
+ |
||||
+#include "ras-mce-handler.h" |
||||
+#include "bitfield.h" |
||||
+ |
||||
+static struct field memctrl_mc7[] = { |
||||
+ SBITFIELD(16, "CA Parity error"), |
||||
+ SBITFIELD(17, "Internal Parity error except WDB"), |
||||
+ SBITFIELD(18, "Internal Parity error from WDB"), |
||||
+ SBITFIELD(19, "Correctable Patrol Scrub"), |
||||
+ SBITFIELD(20, "Uncorrectable Patrol Scrub"), |
||||
+ SBITFIELD(21, "Spare Correctable Error"), |
||||
+ SBITFIELD(22, "Spare UC Error"), |
||||
+ SBITFIELD(23, "CORR Chip fail even MC only, 4 bit burst error EDC only"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+void knl_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
+{ |
||||
+ uint64_t status = e->status; |
||||
+ uint32_t mca = status & 0xffff; |
||||
+ unsigned rank0 = -1, rank1 = -1, chan = 0; |
||||
+ |
||||
+ switch (e->bank) { |
||||
+ case 5: |
||||
+ switch (EXTRACT(status, 0, 15)) { |
||||
+ case 0x402: |
||||
+ mce_snprintf(e->mcastatus_msg, "PCU Internal Errors"); |
||||
+ break; |
||||
+ case 0x403: |
||||
+ mce_snprintf(e->mcastatus_msg, "VCU Internal Errors"); |
||||
+ break; |
||||
+ case 0x407: |
||||
+ mce_snprintf(e->mcastatus_msg, "Other UBOX Internal Errors"); |
||||
+ break; |
||||
+ } |
||||
+ break; |
||||
+ case 7: case 8: case 9: case 10: |
||||
+ case 11: case 12: case 13: case 14: |
||||
+ case 15: case 16: |
||||
+ if ((EXTRACT(status, 0, 15)) == 0x5) { |
||||
+ mce_snprintf(e->mcastatus_msg, "Internal Parity error"); |
||||
+ } else { |
||||
+ chan = (EXTRACT(status, 0, 3)) + 3 * (e->bank == 15); |
||||
+ switch (EXTRACT(status, 4, 7)) { |
||||
+ case 0x0: |
||||
+ mce_snprintf(e->mcastatus_msg, "Undefined request on channel %d", chan); |
||||
+ break; |
||||
+ case 0x1: |
||||
+ mce_snprintf(e->mcastatus_msg, "Read on channel %d", chan); |
||||
+ break; |
||||
+ case 0x2: |
||||
+ mce_snprintf(e->mcastatus_msg, "Write on channel %d", chan); |
||||
+ break; |
||||
+ case 0x3: |
||||
+ mce_snprintf(e->mcastatus_msg, "CA error on channel %d", chan); |
||||
+ break; |
||||
+ case 0x4: |
||||
+ mce_snprintf(e->mcastatus_msg, "Scrub error on channel %d", chan); |
||||
+ break; |
||||
+ } |
||||
+ } |
||||
+ decode_bitfield(e, status, memctrl_mc7); |
||||
+ break; |
||||
+ default: |
||||
+ break; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * Memory error specific code. Returns if the error is not a MC one |
||||
+ */ |
||||
+ |
||||
+ /* Check if the error is at the memory controller */ |
||||
+ if ((mca >> 7) != 1) |
||||
+ return; |
||||
+ |
||||
+ /* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
+ if (e->bank < 7 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
+ return; |
||||
+ |
||||
+ /* |
||||
+ * Parse the reported channel and ranks |
||||
+ */ |
||||
+ |
||||
+ chan = EXTRACT(status, 0, 3); |
||||
+ if (chan == 0xf) |
||||
+ { |
||||
+ mce_snprintf(e->mc_location, "memory_channel=unspecified"); |
||||
+ } |
||||
+ else |
||||
+ { |
||||
+ chan = chan + 3 * (e->bank == 15); |
||||
+ mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 62, 62)) |
||||
+ rank0 = EXTRACT(e->misc, 46, 50); |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ |
||||
+ /* |
||||
+ * FIXME: The conversion from rank to dimm requires to parse the |
||||
+ * DMI tables and call failrank2dimm(). |
||||
+ */ |
||||
+ if (rank0 != -1 && rank1 != -1) |
||||
+ mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
+ rank0, rank1); |
||||
+ else if (rank0 != -1) |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
+ } |
||||
+} |
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 77b929b..032f4e0 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -397,6 +397,10 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) |
||||
break; |
||||
case CPU_HASWELL_EPEX: |
||||
hsw_decode_model(ras, e); |
||||
+ break; |
||||
+ case CPU_KNIGHTS_LANDING: |
||||
+ knl_decode_model(ras, e); |
||||
+ break; |
||||
default: |
||||
break; |
||||
} |
||||
@@ -460,6 +464,7 @@ int set_intel_imc_log(enum cputype cputype, unsigned ncpus) |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
+ case CPU_KNIGHTS_LANDING: |
||||
msr = 0x17f; /* MSR_ERROR_CONTROL */ |
||||
bit = 0x2; /* MemError Log Enable */ |
||||
break; |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 23f2488..3b0b05b 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -223,6 +223,7 @@ int register_mce_handler(struct ras_events *ras, unsigned ncpus) |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
+ case CPU_KNIGHTS_LANDING: |
||||
set_intel_imc_log(mce->cputype, ncpus); |
||||
default: |
||||
break; |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index 13b8f52..5466743 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -119,6 +119,7 @@ void dunnington_decode_model(struct mce_event *e); |
||||
void snb_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void ivb_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void hsw_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
+void knl_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void tulsa_decode_model(struct mce_event *e); |
||||
|
||||
/* Software defined banks */ |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,106 @@
@@ -0,0 +1,106 @@
|
||||
From 17f4e17d9870fbd35572ae6bf6c227c787b07fe9 Mon Sep 17 00:00:00 2001 |
||||
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
Date: Fri, 5 Feb 2016 15:15:18 -0200 |
||||
Subject: [PATCH 4/5] mce-intel-knl: Fix CodingStyle |
||||
|
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
mce-intel-knl.c | 43 +++++++++++++++++++++++++++---------------- |
||||
1 file changed, 27 insertions(+), 16 deletions(-) |
||||
|
||||
diff --git a/mce-intel-knl.c b/mce-intel-knl.c |
||||
index 96b0a59..7062fbb 100644 |
||||
--- a/mce-intel-knl.c |
||||
+++ b/mce-intel-knl.c |
||||
@@ -48,32 +48,46 @@ void knl_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
mce_snprintf(e->mcastatus_msg, "VCU Internal Errors"); |
||||
break; |
||||
case 0x407: |
||||
- mce_snprintf(e->mcastatus_msg, "Other UBOX Internal Errors"); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "Other UBOX Internal Errors"); |
||||
break; |
||||
} |
||||
break; |
||||
- case 7: case 8: case 9: case 10: |
||||
- case 11: case 12: case 13: case 14: |
||||
- case 15: case 16: |
||||
+ case 7: |
||||
+ case 8: |
||||
+ case 9: |
||||
+ case 10: |
||||
+ case 11: |
||||
+ case 12: |
||||
+ case 13: |
||||
+ case 14: |
||||
+ case 15: |
||||
+ case 16: |
||||
if ((EXTRACT(status, 0, 15)) == 0x5) { |
||||
mce_snprintf(e->mcastatus_msg, "Internal Parity error"); |
||||
} else { |
||||
chan = (EXTRACT(status, 0, 3)) + 3 * (e->bank == 15); |
||||
switch (EXTRACT(status, 4, 7)) { |
||||
case 0x0: |
||||
- mce_snprintf(e->mcastatus_msg, "Undefined request on channel %d", chan); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "Undefined request on channel %d", |
||||
+ chan); |
||||
break; |
||||
case 0x1: |
||||
- mce_snprintf(e->mcastatus_msg, "Read on channel %d", chan); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "Read on channel %d", chan); |
||||
break; |
||||
case 0x2: |
||||
- mce_snprintf(e->mcastatus_msg, "Write on channel %d", chan); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "Write on channel %d", chan); |
||||
break; |
||||
case 0x3: |
||||
- mce_snprintf(e->mcastatus_msg, "CA error on channel %d", chan); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "CA error on channel %d", chan); |
||||
break; |
||||
case 0x4: |
||||
- mce_snprintf(e->mcastatus_msg, "Scrub error on channel %d", chan); |
||||
+ mce_snprintf(e->mcastatus_msg, |
||||
+ "Scrub error on channel %d", chan); |
||||
break; |
||||
} |
||||
} |
||||
@@ -93,7 +107,7 @@ void knl_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
|
||||
/* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
if (e->bank < 7 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
- !test_prefix(7, status & 0xefff)) |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
return; |
||||
|
||||
/* |
||||
@@ -101,12 +115,9 @@ void knl_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
*/ |
||||
|
||||
chan = EXTRACT(status, 0, 3); |
||||
- if (chan == 0xf) |
||||
- { |
||||
+ if (chan == 0xf) { |
||||
mce_snprintf(e->mc_location, "memory_channel=unspecified"); |
||||
- } |
||||
- else |
||||
- { |
||||
+ } else { |
||||
chan = chan + 3 * (e->bank == 15); |
||||
mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
|
||||
@@ -121,7 +132,7 @@ void knl_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
*/ |
||||
if (rank0 != -1 && rank1 != -1) |
||||
mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
- rank0, rank1); |
||||
+ rank0, rank1); |
||||
else if (rank0 != -1) |
||||
mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
} |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,244 @@
@@ -0,0 +1,244 @@
|
||||
From e7b88730f8a753a50fa0b8d1f7027f79baa05ca4 Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Fri, 8 Apr 2016 15:07:18 -0400 |
||||
Subject: [PATCH 1/2] Add Broadwell DE MSCOD values |
||||
|
||||
Based on mcelog commit id 32252e9c37e97ea5083d90d2cf194bb85a4a0cda. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
Makefile.am | 2 +- |
||||
mce-intel-broadwell-de.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++ |
||||
mce-intel.c | 3 + |
||||
ras-mce-handler.c | 6 +- |
||||
ras-mce-handler.h | 2 + |
||||
5 files changed, 156 insertions(+), 3 deletions(-) |
||||
create mode 100644 mce-intel-broadwell-de.c |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index a1cb02a..a8477d3 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -29,7 +29,7 @@ if WITH_MCE |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ |
||||
- mce-intel-knl.c |
||||
+ mce-intel-knl.c mce-intel-broadwell-de.c |
||||
endif |
||||
if WITH_EXTLOG |
||||
rasdaemon_SOURCES += ras-extlog-handler.c |
||||
diff --git a/mce-intel-broadwell-de.c b/mce-intel-broadwell-de.c |
||||
new file mode 100644 |
||||
index 0000000..d52c82e |
||||
--- /dev/null |
||||
+++ b/mce-intel-broadwell-de.c |
||||
@@ -0,0 +1,146 @@ |
||||
+/* |
||||
+ * The code below came from Tony Luck's mcelog code, |
||||
+ * released under GNU Public General License, v.2 |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#include <string.h> |
||||
+#include <stdio.h> |
||||
+ |
||||
+#include "ras-mce-handler.h" |
||||
+#include "bitfield.h" |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-24 */ |
||||
+ |
||||
+static char *pcu_1[] = { |
||||
+ [0x00] = "No Error", |
||||
+ [0x09] = "MC_MESSAGE_CHANNEL_TIMEOUT", |
||||
+ [0x13] = "MC_DMI_TRAINING_TIMEOUT", |
||||
+ [0x15] = "MC_DMI_CPU_RESET_ACK_TIMEOUT", |
||||
+ [0x1E] = "MC_VR_ICC_MAX_LT_FUSED_ICC_MAX", |
||||
+ [0x25] = "MC_SVID_COMMAN_TIMEOUT", |
||||
+ [0x26] = "MCA_PKGC_DIRECT_WAKE_RING_TIMEOUT", |
||||
+ [0x29] = "MC_VR_VOUT_MAC_LT_FUSED_SVID", |
||||
+ [0x2B] = "MC_PKGC_WATCHDOG_HANG_CBZ_DOWN", |
||||
+ [0x2C] = "MC_PKGC_WATCHDOG_HANG_CBZ_UP", |
||||
+ [0x44] = "MC_CRITICAL_VR_FAILED", |
||||
+ [0x46] = "MC_VID_RAMP_DOWN_FAILED", |
||||
+ [0x49] = "MC_SVID_WRITE_REG_VOUT_MAX_FAILED", |
||||
+ [0x4B] = "MC_BOOT_VID_TIMEOUT_DRAM_0", |
||||
+ [0x4F] = "MC_SVID_COMMAND_ERROR", |
||||
+ [0x52] = "MC_FIVR_CATAS_OVERVOL_FAULT", |
||||
+ [0x53] = "MC_FIVR_CATAS_OVERCUR_FAULT", |
||||
+ [0x57] = "MC_SVID_PKGC_REQUEST_FAILED", |
||||
+ [0x58] = "MC_SVID_IMON_REQUEST_FAILED", |
||||
+ [0x59] = "MC_SVID_ALERT_REQUEST_FAILED", |
||||
+ [0x62] = "MC_INVALID_PKGS_RSP_QPI", |
||||
+ [0x64] = "MC_INVALID_PKG_STATE_CONFIG", |
||||
+ [0x67] = "MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT", |
||||
+ [0x6A] = "MC_MSGCH_PMREQ_CMP_TIMEOUT", |
||||
+ [0x72] = "MC_WATCHDOG_TIMEOUT_PKGS_MASTER", |
||||
+ [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT" |
||||
+}; |
||||
+ |
||||
+static struct field pcu_mc4[] = { |
||||
+ FIELD(24, pcu_1), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-18 */ |
||||
+ |
||||
+static struct field memctrl_mc9[] = { |
||||
+ SBITFIELD(16, "Address parity error"), |
||||
+ SBITFIELD(17, "HA Wrt buffer Data parity error"), |
||||
+ SBITFIELD(18, "HA Wrt byte enable parity error"), |
||||
+ SBITFIELD(19, "Corrected patrol scrub error"), |
||||
+ SBITFIELD(20, "Uncorrected patrol scrub error"), |
||||
+ SBITFIELD(21, "Corrected spare error"), |
||||
+ SBITFIELD(22, "Uncorrected spare error"), |
||||
+ SBITFIELD(23, "Corrected memory read error"), |
||||
+ SBITFIELD(24, "iMC, WDB, parity errors"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
+{ |
||||
+ uint64_t status = e->status; |
||||
+ uint32_t mca = status & 0xffff; |
||||
+ unsigned rank0 = -1, rank1 = -1, chan; |
||||
+ |
||||
+ switch (e->bank) { |
||||
+ case 4: |
||||
+ switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { |
||||
+ case 0x402: case 0x403: |
||||
+ mce_snprintf(e->mcastatus_msg, "Internal errors "); |
||||
+ break; |
||||
+ case 0x406: |
||||
+ mce_snprintf(e->mcastatus_msg, "Intel TXT errors "); |
||||
+ break; |
||||
+ case 0x407: |
||||
+ mce_snprintf(e->mcastatus_msg, "Other UBOX Internal errors "); |
||||
+ break; |
||||
+ } |
||||
+ if (EXTRACT(status, 16, 19) & 3) |
||||
+ mce_snprintf(e->mcastatus_msg, "PCU internal error "); |
||||
+ if (EXTRACT(status, 20, 23) & 4) |
||||
+ mce_snprintf(e->mcastatus_msg, "Ubox error "); |
||||
+ decode_bitfield(e, status, pcu_mc4); |
||||
+ break; |
||||
+ case 9: case 10: |
||||
+ mce_snprintf(e->mcastatus_msg, "MemCtrl: "); |
||||
+ decode_bitfield(e, status, memctrl_mc9); |
||||
+ break; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * Memory error specific code. Returns if the error is not a MC one |
||||
+ */ |
||||
+ |
||||
+ /* Check if the error is at the memory controller */ |
||||
+ if ((mca >> 7) != 1) |
||||
+ return; |
||||
+ |
||||
+ /* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
+ if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
+ return; |
||||
+ |
||||
+ /* |
||||
+ * Parse the reported channel and ranks |
||||
+ */ |
||||
+ |
||||
+ chan = EXTRACT(status, 0, 3); |
||||
+ if (chan == 0xf) |
||||
+ return; |
||||
+ |
||||
+ mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 62, 62)) { |
||||
+ rank0 = EXTRACT(e->misc, 46, 50); |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * FIXME: The conversion from rank to dimm requires to parse the |
||||
+ * DMI tables and call failrank2dimm(). |
||||
+ */ |
||||
+ if (rank0 != -1 && rank1 != -1) |
||||
+ mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
+ rank0, rank1); |
||||
+ else if (rank0 != -1) |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
+} |
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index 032f4e0..b132903 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -401,6 +401,9 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) |
||||
case CPU_KNIGHTS_LANDING: |
||||
knl_decode_model(ras, e); |
||||
break; |
||||
+ case CPU_BROADWELL_DE: |
||||
+ broadwell_de_decode_model(ras, e); |
||||
+ break; |
||||
default: |
||||
break; |
||||
} |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index 3b0b05b..b58d6e0 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -50,6 +50,7 @@ static char *cputype_name[] = { |
||||
[CPU_HASWELL] = "Haswell", |
||||
[CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", |
||||
[CPU_BROADWELL] = "Broadwell", |
||||
+ [CPU_BROADWELL_DE] = "Broadwell DE", |
||||
[CPU_KNIGHTS_LANDING] = "Knights Landing", |
||||
}; |
||||
|
||||
@@ -90,8 +91,9 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_HASWELL; |
||||
else if (mce->model == 0x3f) |
||||
return CPU_HASWELL_EPEX; |
||||
- else if (mce->model == 0x3d || mce->model == 0x4f || |
||||
- mce->model == 0x56) |
||||
+ else if (mce->model == 0x56) |
||||
+ return CPU_BROADWELL_DE; |
||||
+ else if (mce->model == 0x3d || mce->model == 0x4f) |
||||
return CPU_BROADWELL; |
||||
else if (mce->model == 0x57) |
||||
return CPU_KNIGHTS_LANDING; |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index 5466743..2648048 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -45,6 +45,7 @@ enum cputype { |
||||
CPU_HASWELL, |
||||
CPU_HASWELL_EPEX, |
||||
CPU_BROADWELL, |
||||
+ CPU_BROADWELL_DE, |
||||
CPU_KNIGHTS_LANDING, |
||||
}; |
||||
|
||||
@@ -121,6 +122,7 @@ void ivb_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void hsw_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void knl_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void tulsa_decode_model(struct mce_event *e); |
||||
+void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
|
||||
/* Software defined banks */ |
||||
#define MCE_EXTENDED_BANK 128 |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,289 @@
@@ -0,0 +1,289 @@
|
||||
From 0dd44fca9d756990acf01cd2cdaa585f369168bc Mon Sep 17 00:00:00 2001 |
||||
From: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Fri, 8 Apr 2016 15:07:19 -0400 |
||||
Subject: [PATCH 2/2] Add Broadwell EP/EX MSCOD values |
||||
|
||||
Based on mcelog commit id 32252e9c37e97ea5083d90d2cf194bb85a4a0cda. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> |
||||
--- |
||||
Makefile.am | 3 +- |
||||
mce-intel-broadwell-epex.c | 191 +++++++++++++++++++++++++++++++++++++++++++++ |
||||
mce-intel.c | 3 + |
||||
ras-mce-handler.c | 5 +- |
||||
ras-mce-handler.h | 2 + |
||||
5 files changed, 202 insertions(+), 2 deletions(-) |
||||
create mode 100644 mce-intel-broadwell-epex.c |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index a8477d3..c9e4481 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -29,7 +29,8 @@ if WITH_MCE |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ |
||||
- mce-intel-knl.c mce-intel-broadwell-de.c |
||||
+ mce-intel-knl.c mce-intel-broadwell-de.c \ |
||||
+ mce-intel-broadwell-epex.c |
||||
endif |
||||
if WITH_EXTLOG |
||||
rasdaemon_SOURCES += ras-extlog-handler.c |
||||
diff --git a/mce-intel-broadwell-epex.c b/mce-intel-broadwell-epex.c |
||||
new file mode 100644 |
||||
index 0000000..f7cd3b6 |
||||
--- /dev/null |
||||
+++ b/mce-intel-broadwell-epex.c |
||||
@@ -0,0 +1,191 @@ |
||||
+/* |
||||
+ * The code below came from Tony Luck's mcelog code, |
||||
+ * released under GNU Public General License, v.2 |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#include <string.h> |
||||
+#include <stdio.h> |
||||
+ |
||||
+#include "ras-mce-handler.h" |
||||
+#include "bitfield.h" |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-20 */ |
||||
+ |
||||
+static char *pcu_1[] = { |
||||
+ [0x00] = "No Error", |
||||
+ [0x09] = "MC_MESSAGE_CHANNEL_TIMEOUT", |
||||
+ [0x0D] = "MC_IMC_FORCE_SR_S3_TIMEOUT", |
||||
+ [0x0E] = "MC_CPD_UNCPD_SD_TIMEOUT", |
||||
+ [0x13] = "MC_DMI_TRAINING_TIMEOUT", |
||||
+ [0x15] = "MC_DMI_CPU_RESET_ACK_TIMEOUT", |
||||
+ [0x1E] = "MC_VR_ICC_MAX_LT_FUSED_ICC_MAX", |
||||
+ [0x25] = "MC_SVID_COMMAN_TIMEOUT", |
||||
+ [0x29] = "MC_VR_VOUT_MAC_LT_FUSED_SVID", |
||||
+ [0x2B] = "MC_PKGC_WATCHDOG_HANG_CBZ_DOWN", |
||||
+ [0x2C] = "MC_PKGC_WATCHDOG_HANG_CBZ_UP", |
||||
+ [0x39] = "MC_PKGC_WATCHDOG_HANG_C3_UP_SF", |
||||
+ [0x44] = "MC_CRITICAL_VR_FAILED", |
||||
+ [0x45] = "MC_ICC_MAX_NOTSUPPORTED", |
||||
+ [0x46] = "MC_VID_RAMP_DOWN_FAILED", |
||||
+ [0x47] = "MC_EXCL_MODE_NO_PMREQ_CMP", |
||||
+ [0x48] = "MC_SVID_READ_REG_ICC_MAX_FAILED", |
||||
+ [0x49] = "MC_SVID_WRITE_REG_VOUT_MAX_FAILED", |
||||
+ [0x4B] = "MC_BOOT_VID_TIMEOUT_DRAM_0", |
||||
+ [0x4C] = "MC_BOOT_VID_TIMEOUT_DRAM_1", |
||||
+ [0x4D] = "MC_BOOT_VID_TIMEOUT_DRAM_2", |
||||
+ [0x4E] = "MC_BOOT_VID_TIMEOUT_DRAM_3", |
||||
+ [0x4F] = "MC_SVID_COMMAND_ERROR", |
||||
+ [0x52] = "MC_FIVR_CATAS_OVERVOL_FAULT", |
||||
+ [0x53] = "MC_FIVR_CATAS_OVERCUR_FAULT", |
||||
+ [0x57] = "MC_SVID_PKGC_REQUEST_FAILED", |
||||
+ [0x58] = "MC_SVID_IMON_REQUEST_FAILED", |
||||
+ [0x59] = "MC_SVID_ALERT_REQUEST_FAILED", |
||||
+ [0x60] = "MC_INVALID_PKGS_REQ_PCH", |
||||
+ [0x61] = "MC_INVALID_PKGS_REQ_QPI", |
||||
+ [0x62] = "MC_INVALID_PKGS_RSP_QPI", |
||||
+ [0x63] = "MC_INVALID_PKGS_RSP_PCH", |
||||
+ [0x64] = "MC_INVALID_PKG_STATE_CONFIG", |
||||
+ [0x67] = "MC_HA_IMC_RW_BLOCK_ACK_TIMEOUT", |
||||
+ [0x68] = "MC_IMC_RW_SMBUS_TIMEOUT", |
||||
+ [0x69] = "MC_HA_FAILSTS_CHANGE_DETECTED", |
||||
+ [0x6A] = "MC_MSGCH_PMREQ_CMP_TIMEOUT", |
||||
+ [0x70] = "MC_WATCHDOG_TIMEOUT_PKGC_SLAVE", |
||||
+ [0x71] = "MC_WATCHDOG_TIMEOUT_PKGC_MASTER", |
||||
+ [0x72] = "MC_WATCHDOG_TIMEOUT_PKGS_MASTER", |
||||
+ [0x7C] = "MC_BIOS_RST_CPL_INVALID_SEQ", |
||||
+ [0x7D] = "MC_MORE_THAN_ONE_TXT_AGENT", |
||||
+ [0x81] = "MC_RECOVERABLE_DIE_THERMAL_TOO_HOT" |
||||
+}; |
||||
+ |
||||
+static struct field pcu_mc4[] = { |
||||
+ FIELD(24, pcu_1), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-21 */ |
||||
+ |
||||
+static char *qpi[] = { |
||||
+ [0x02] = "Intel QPI physical layer detected drift buffer alarm", |
||||
+ [0x03] = "Intel QPI physical layer detected latency buffer rollover", |
||||
+ [0x10] = "Intel QPI link layer detected control error from R3QPI", |
||||
+ [0x11] = "Rx entered LLR abort state on CRC error", |
||||
+ [0x12] = "Unsupported or undefined packet", |
||||
+ [0x13] = "Intel QPI link layer control error", |
||||
+ [0x15] = "RBT used un-initialized value", |
||||
+ [0x20] = "Intel QPI physical layer detected a QPI in-band reset but aborted initialization", |
||||
+ [0x21] = "Link failover data self healing", |
||||
+ [0x22] = "Phy detected in-band reset (no width change)", |
||||
+ [0x23] = "Link failover clock failover", |
||||
+ [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", |
||||
+ [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", |
||||
+}; |
||||
+ |
||||
+static struct field qpi_mc[] = { |
||||
+ FIELD(16, qpi), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-26 */ |
||||
+ |
||||
+static struct field memctrl_mc9[] = { |
||||
+ SBITFIELD(16, "DDR3 address parity error"), |
||||
+ SBITFIELD(17, "Uncorrected HA write data error"), |
||||
+ SBITFIELD(18, "Uncorrected HA data byte enable error"), |
||||
+ SBITFIELD(19, "Corrected patrol scrub error"), |
||||
+ SBITFIELD(20, "Uncorrected patrol scrub error"), |
||||
+ SBITFIELD(21, "Corrected spare error"), |
||||
+ SBITFIELD(22, "Uncorrected spare error"), |
||||
+ SBITFIELD(24, "iMC write data buffer parity error"), |
||||
+ SBITFIELD(25, "DDR4 command address parity error"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
+{ |
||||
+ uint64_t status = e->status; |
||||
+ uint32_t mca = status & 0xffff; |
||||
+ unsigned rank0 = -1, rank1 = -1, chan; |
||||
+ |
||||
+ switch (e->bank) { |
||||
+ case 4: |
||||
+ switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { |
||||
+ case 0x402: case 0x403: |
||||
+ mce_snprintf(e->mcastatus_msg, "Internal errors "); |
||||
+ break; |
||||
+ case 0x406: |
||||
+ mce_snprintf(e->mcastatus_msg, "Intel TXT errors "); |
||||
+ break; |
||||
+ case 0x407: |
||||
+ mce_snprintf(e->mcastatus_msg, "Other UBOX Internal errors "); |
||||
+ break; |
||||
+ } |
||||
+ if (EXTRACT(status, 16, 19)) |
||||
+ mce_snprintf(e->mcastatus_msg, "PCU internal error "); |
||||
+ decode_bitfield(e, status, pcu_mc4); |
||||
+ break; |
||||
+ case 5: |
||||
+ case 20: |
||||
+ case 21: |
||||
+ mce_snprintf(e->mcastatus_msg, "QPI: "); |
||||
+ decode_bitfield(e, status, qpi_mc); |
||||
+ break; |
||||
+ case 9: case 10: case 11: case 12: |
||||
+ case 13: case 14: case 15: case 16: |
||||
+ mce_snprintf(e->mcastatus_msg, "MemCtrl: "); |
||||
+ decode_bitfield(e, status, memctrl_mc9); |
||||
+ break; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * Memory error specific code. Returns if the error is not a MC one |
||||
+ */ |
||||
+ |
||||
+ /* Check if the error is at the memory controller */ |
||||
+ if ((mca >> 7) != 1) |
||||
+ return; |
||||
+ |
||||
+ /* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
+ if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
+ return; |
||||
+ |
||||
+ /* |
||||
+ * Parse the reported channel and ranks |
||||
+ */ |
||||
+ |
||||
+ chan = EXTRACT(status, 0, 3); |
||||
+ if (chan == 0xf) |
||||
+ return; |
||||
+ |
||||
+ mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 62, 62)) { |
||||
+ rank0 = EXTRACT(e->misc, 46, 50); |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * FIXME: The conversion from rank to dimm requires to parse the |
||||
+ * DMI tables and call failrank2dimm(). |
||||
+ */ |
||||
+ if (rank0 != -1 && rank1 != -1) |
||||
+ mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
+ rank0, rank1); |
||||
+ else if (rank0 != -1) |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
+} |
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index b132903..bf68d9b 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -404,6 +404,9 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) |
||||
case CPU_BROADWELL_DE: |
||||
broadwell_de_decode_model(ras, e); |
||||
break; |
||||
+ case CPU_BROADWELL_EPEX: |
||||
+ broadwell_epex_decode_model(ras, e); |
||||
+ break; |
||||
default: |
||||
break; |
||||
} |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index b58d6e0..b875512 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -51,6 +51,7 @@ static char *cputype_name[] = { |
||||
[CPU_HASWELL_EPEX] = "Intel Xeon v3 (Haswell) EP/EX", |
||||
[CPU_BROADWELL] = "Broadwell", |
||||
[CPU_BROADWELL_DE] = "Broadwell DE", |
||||
+ [CPU_BROADWELL_EPEX] = "Broadwell EP/EX", |
||||
[CPU_KNIGHTS_LANDING] = "Knights Landing", |
||||
}; |
||||
|
||||
@@ -93,7 +94,9 @@ static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
return CPU_HASWELL_EPEX; |
||||
else if (mce->model == 0x56) |
||||
return CPU_BROADWELL_DE; |
||||
- else if (mce->model == 0x3d || mce->model == 0x4f) |
||||
+ else if (mce->model == 0x4f) |
||||
+ return CPU_BROADWELL_EPEX; |
||||
+ else if (mce->model == 0x3d) |
||||
return CPU_BROADWELL; |
||||
else if (mce->model == 0x57) |
||||
return CPU_KNIGHTS_LANDING; |
||||
diff --git a/ras-mce-handler.h b/ras-mce-handler.h |
||||
index 2648048..c5a3717 100644 |
||||
--- a/ras-mce-handler.h |
||||
+++ b/ras-mce-handler.h |
||||
@@ -46,6 +46,7 @@ enum cputype { |
||||
CPU_HASWELL_EPEX, |
||||
CPU_BROADWELL, |
||||
CPU_BROADWELL_DE, |
||||
+ CPU_BROADWELL_EPEX, |
||||
CPU_KNIGHTS_LANDING, |
||||
}; |
||||
|
||||
@@ -123,6 +124,7 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void knl_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void tulsa_decode_model(struct mce_event *e); |
||||
void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
+void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
|
||||
/* Software defined banks */ |
||||
#define MCE_EXTENDED_BANK 128 |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,63 @@
@@ -0,0 +1,63 @@
|
||||
--- |
||||
mce-intel.c | 3 +++ |
||||
ras-mce-handler.c | 5 +++++ |
||||
ras-mce-handler.h | 1 + |
||||
3 files changed, 9 insertions(+) |
||||
|
||||
--- rasdaemon-0.4.1.orig/mce-intel.c 2017-05-30 12:04:54.440167730 -0400 |
||||
+++ rasdaemon-0.4.1/mce-intel.c 2017-05-30 12:06:51.705755469 -0400 |
||||
@@ -399,6 +399,7 @@ if (test_prefix(11, (e->status & 0xffffL |
||||
hsw_decode_model(ras, e); |
||||
break; |
||||
case CPU_KNIGHTS_LANDING: |
||||
+ case CPU_KNIGHTS_MILL: |
||||
knl_decode_model(ras, e); |
||||
break; |
||||
case CPU_BROADWELL_DE: |
||||
@@ -470,6 +471,8 @@ int set_intel_imc_log(enum cputype cputy |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
+ case CPU_KNIGHTS_LANDING: |
||||
+ case CPU_KNIGHTS_MILL: |
||||
msr = 0x17f; /* MSR_ERROR_CONTROL */ |
||||
bit = 0x2; /* MemError Log Enable */ |
||||
break; |
||||
--- rasdaemon-0.4.1.orig/ras-mce-handler.c 2017-05-30 12:04:54.440167730 -0400 |
||||
+++ rasdaemon-0.4.1/ras-mce-handler.c 2017-05-30 12:07:59.850934779 -0400 |
||||
@@ -53,6 +53,7 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series |
||||
[CPU_BROADWELL_DE] = "Broadwell DE", |
||||
[CPU_BROADWELL_EPEX] = "Broadwell EP/EX", |
||||
[CPU_KNIGHTS_LANDING] = "Knights Landing", |
||||
+ [CPU_KNIGHTS_MILL] = "Knights Mill", |
||||
}; |
||||
|
||||
static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
@@ -100,6 +101,8 @@ else if (mce->model == 0x3d) |
||||
return CPU_BROADWELL; |
||||
else if (mce->model == 0x57) |
||||
return CPU_KNIGHTS_LANDING; |
||||
+ else if (mce->model == 0x85) |
||||
+ return CPU_KNIGHTS_MILL; |
||||
|
||||
if (mce->model > 0x1a) { |
||||
log(ALL, LOG_INFO, |
||||
@@ -228,6 +231,8 @@ int register_mce_handler(struct ras_even |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
+ case CPU_KNIGHTS_LANDING: |
||||
+ case CPU_KNIGHTS_MILL: |
||||
set_intel_imc_log(mce->cputype, ncpus); |
||||
default: |
||||
break; |
||||
--- rasdaemon-0.4.1.orig/ras-mce-handler.h 2017-05-30 12:04:54.440167730 -0400 |
||||
+++ rasdaemon-0.4.1/ras-mce-handler.h 2017-05-30 12:04:58.976113103 -0400 |
||||
@@ -48,6 +48,7 @@ enum cputype { |
||||
CPU_BROADWELL_DE, |
||||
CPU_BROADWELL_EPEX, |
||||
CPU_KNIGHTS_LANDING, |
||||
+ CPU_KNIGHTS_MILL, |
||||
}; |
||||
|
||||
struct mce_event { |
@ -0,0 +1,344 @@
@@ -0,0 +1,344 @@
|
||||
commit f9a5724021d8bc9f38cee3a0a71eb4032da1ec66 |
||||
Author: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Mon Sep 19 15:28:33 2016 -0400 |
||||
|
||||
rasdaemon: add support for Skylake client and server |
||||
|
||||
Base on upstream mcelog commits |
||||
6c07f906dadfe2c4bb7a21e5fc60dc2f34056bf0 |
||||
e4aca6312aee03066ab45632a7bee23dc892a425 |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
|
||||
--- |
||||
Makefile.am | 2 |
||||
mce-intel-skx.c | 257 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
||||
mce-intel.c | 3 |
||||
ras-mce-handler.c | 6 + |
||||
ras-mce-handler.h | 3 |
||||
5 files changed, 270 insertions(+), 1 deletion(-) |
||||
|
||||
--- rasdaemon-0.4.1.orig/Makefile.am 2017-05-30 12:43:11.975591485 -0400 |
||||
+++ rasdaemon-0.4.1/Makefile.am 2017-05-30 12:43:16.948531592 -0400 |
||||
@@ -30,7 +30,7 @@ if WITH_MCE |
||||
mce-intel-dunnington.c mce-intel-tulsa.c \ |
||||
mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ |
||||
mce-intel-knl.c mce-intel-broadwell-de.c \ |
||||
- mce-intel-broadwell-epex.c |
||||
+ mce-intel-broadwell-epex.c mce-intel-skx.c |
||||
endif |
||||
if WITH_EXTLOG |
||||
rasdaemon_SOURCES += ras-extlog-handler.c |
||||
--- /dev/null 1970-01-01 00:00:00.000000000 +0000 |
||||
+++ rasdaemon-0.4.1/mce-intel-skx.c 2017-05-30 12:43:16.948531592 -0400 |
||||
@@ -0,0 +1,257 @@ |
||||
+/* |
||||
+ * The code below came from Tony Luck mcelog code, |
||||
+ * released under GNU Public General License, v.2 |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License as published by |
||||
+ * the Free Software Foundation; either version 2 of the License, or |
||||
+ * (at your option) any later version. |
||||
+ * |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ * |
||||
+ * You should have received a copy of the GNU General Public License |
||||
+ * along with this program; if not, write to the Free Software |
||||
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||
+*/ |
||||
+ |
||||
+#include <string.h> |
||||
+#include <stdio.h> |
||||
+ |
||||
+#include "ras-mce-handler.h" |
||||
+#include "bitfield.h" |
||||
+ |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-27 */ |
||||
+ |
||||
+static char *pcu_1[] = { |
||||
+ [0x00] = "No Error", |
||||
+ [0x0d] = "MCA_DMI_TRAINING_TIMEOUT", |
||||
+ [0x0f] = "MCA_DMI_CPU_RESET_ACK_TIMEOUT", |
||||
+ [0x10] = "MCA_MORE_THAN_ONE_LT_AGENT", |
||||
+ [0x1e] = "MCA_BIOS_RST_CPL_INVALID_SEQ", |
||||
+ [0x1f] = "MCA_BIOS_INVALID_PKG_STATE_CONFIG", |
||||
+ [0x25] = "MCA_MESSAGE_CHANNEL_TIMEOUT", |
||||
+ [0x27] = "MCA_MSGCH_PMREQ_CMP_TIMEOUT", |
||||
+ [0x30] = "MCA_PKGC_DIRECT_WAKE_RING_TIMEOUT", |
||||
+ [0x31] = "MCA_PKGC_INVALID_RSP_PCH", |
||||
+ [0x33] = "MCA_PKGC_WATCHDOG_HANG_CBZ_DOWN", |
||||
+ [0x34] = "MCA_PKGC_WATCHDOG_HANG_CBZ_UP", |
||||
+ [0x38] = "MCA_PKGC_WATCHDOG_HANG_C3_UP_SF", |
||||
+ [0x40] = "MCA_SVID_VCCIN_VR_ICC_MAX_FAILURE", |
||||
+ [0x41] = "MCA_SVID_COMMAND_TIMEOUT", |
||||
+ [0x42] = "MCA_SVID_VCCIN_VR_VOUT_FAILURE", |
||||
+ [0x43] = "MCA_SVID_CPU_VR_CAPABILITY_ERROR", |
||||
+ [0x44] = "MCA_SVID_CRITICAL_VR_FAILED", |
||||
+ [0x45] = "MCA_SVID_SA_ITD_ERROR", |
||||
+ [0x46] = "MCA_SVID_READ_REG_FAILED", |
||||
+ [0x47] = "MCA_SVID_WRITE_REG_FAILED", |
||||
+ [0x48] = "MCA_SVID_PKGC_INIT_FAILED", |
||||
+ [0x49] = "MCA_SVID_PKGC_CONFIG_FAILED", |
||||
+ [0x4a] = "MCA_SVID_PKGC_REQUEST_FAILED", |
||||
+ [0x4b] = "MCA_SVID_IMON_REQUEST_FAILED", |
||||
+ [0x4c] = "MCA_SVID_ALERT_REQUEST_FAILED", |
||||
+ [0x4d] = "MCA_SVID_MCP_VR_ABSENT_OR_RAMP_ERROR", |
||||
+ [0x4e] = "MCA_SVID_UNEXPECTED_MCP_VR_DETECTED", |
||||
+ [0x51] = "MCA_FIVR_CATAS_OVERVOL_FAULT", |
||||
+ [0x52] = "MCA_FIVR_CATAS_OVERCUR_FAULT", |
||||
+ [0x58] = "MCA_WATCHDOG_TIMEOUT_PKGC_SLAVE", |
||||
+ [0x59] = "MCA_WATCHDOG_TIMEOUT_PKGC_MASTER", |
||||
+ [0x5a] = "MCA_WATCHDOG_TIMEOUT_PKGS_MASTER", |
||||
+ [0x61] = "MCA_PKGS_CPD_UNCPD_TIMEOUT", |
||||
+ [0x63] = "MCA_PKGS_INVALID_REQ_PCH", |
||||
+ [0x64] = "MCA_PKGS_INVALID_REQ_INTERNAL", |
||||
+ [0x65] = "MCA_PKGS_INVALID_RSP_INTERNAL", |
||||
+ [0x6b] = "MCA_PKGS_SMBUS_VPP_PAUSE_TIMEOUT", |
||||
+ [0x81] = "MCA_RECOVERABLE_DIE_THERMAL_TOO_HOT", |
||||
+}; |
||||
+ |
||||
+static struct field pcu_mc4[] = { |
||||
+ FIELD(24, pcu_1), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-28 */ |
||||
+ |
||||
+static char *qpi[] = { |
||||
+ [0x00] = "UC Phy Initialization Failure", |
||||
+ [0x01] = "UC Phy detected drift buffer alarm", |
||||
+ [0x02] = "UC Phy detected latency buffer rollover", |
||||
+ [0x10] = "UC LL Rx detected CRC error: unsuccessful LLR: entered abort state", |
||||
+ [0x11] = "UC LL Rx unsupported or undefined packet", |
||||
+ [0x12] = "UC LL or Phy control error", |
||||
+ [0x13] = "UC LL Rx parameter exchange exception", |
||||
+ [0x1F] = "UC LL detected control error from the link-mesh interface", |
||||
+ [0x20] = "COR Phy initialization abort", |
||||
+ [0x21] = "COR Phy reset", |
||||
+ [0x22] = "COR Phy lane failure, recovery in x8 width", |
||||
+ [0x23] = "COR Phy L0c error corrected without Phy reset", |
||||
+ [0x24] = "COR Phy L0c error triggering Phy Reset", |
||||
+ [0x25] = "COR Phy L0p exit error corrected with Phy reset", |
||||
+ [0x30] = "COR LL Rx detected CRC error - successful LLR without Phy Reinit", |
||||
+ [0x31] = "COR LL Rx detected CRC error - successful LLR with Phy Reinit", |
||||
+}; |
||||
+ |
||||
+static struct field qpi_mc[] = { |
||||
+ FIELD(16, qpi), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* These apply to MSCOD 0x12 "UC LL or Phy control error" */ |
||||
+static struct field qpi_0x12[] = { |
||||
+ SBITFIELD(22, "Phy Control Error"), |
||||
+ SBITFIELD(23, "Unexpected Retry.Ack flit"), |
||||
+ SBITFIELD(24, "Unexpected Retry.Req flit"), |
||||
+ SBITFIELD(25, "RF parity error"), |
||||
+ SBITFIELD(26, "Routeback Table error"), |
||||
+ SBITFIELD(27, "unexpected Tx Protocol flit (EOP, Header or Data)"), |
||||
+ SBITFIELD(28, "Rx Header-or-Credit BGF credit overflow/underflow"), |
||||
+ SBITFIELD(29, "Link Layer Reset still in progress when Phy enters L0"), |
||||
+ SBITFIELD(30, "Link Layer reset initiated while protocol traffic not idle"), |
||||
+ SBITFIELD(31, "Link Layer Tx Parity Error"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-29 */ |
||||
+ |
||||
+static struct field mc_bits[] = { |
||||
+ SBITFIELD(16, "Address parity error"), |
||||
+ SBITFIELD(17, "HA write data parity error"), |
||||
+ SBITFIELD(18, "HA write byte enable parity error"), |
||||
+ SBITFIELD(19, "Corrected patrol scrub error"), |
||||
+ SBITFIELD(20, "Uncorrected patrol scrub error"), |
||||
+ SBITFIELD(21, "Corrected spare error"), |
||||
+ SBITFIELD(22, "Uncorrected spare error"), |
||||
+ SBITFIELD(23, "Any HA read error"), |
||||
+ SBITFIELD(24, "WDB read parity error"), |
||||
+ SBITFIELD(25, "DDR4 command address parity error"), |
||||
+ SBITFIELD(26, "Uncorrected address parity error"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+static char *mc_0x8xx[] = { |
||||
+ [0x0] = "Unrecognized request type", |
||||
+ [0x1] = "Read response to an invalid scoreboard entry", |
||||
+ [0x2] = "Unexpected read response", |
||||
+ [0x3] = "DDR4 completion to an invalid scoreboard entry", |
||||
+ [0x4] = "Completion to an invalid scoreboard entry", |
||||
+ [0x5] = "Completion FIFO overflow", |
||||
+ [0x6] = "Correctable parity error", |
||||
+ [0x7] = "Uncorrectable error", |
||||
+ [0x8] = "Interrupt received while outstanding interrupt was not ACKed", |
||||
+ [0x9] = "ERID FIFO overflow", |
||||
+ [0xa] = "Error on Write credits", |
||||
+ [0xb] = "Error on Read credits", |
||||
+ [0xc] = "Scheduler error", |
||||
+ [0xd] = "Error event", |
||||
+}; |
||||
+ |
||||
+static struct field memctrl_mc13[] = { |
||||
+ FIELD(16, mc_0x8xx), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+/* See IA32 SDM Vol3B Table 16-30 */ |
||||
+ |
||||
+static struct field m2m[] = { |
||||
+ SBITFIELD(16, "MscodDataRdErr"), |
||||
+ SBITFIELD(17, "Reserved"), |
||||
+ SBITFIELD(18, "MscodPtlWrErr"), |
||||
+ SBITFIELD(19, "MscodFullWrErr"), |
||||
+ SBITFIELD(20, "MscodBgfErr"), |
||||
+ SBITFIELD(21, "MscodTimeout"), |
||||
+ SBITFIELD(22, "MscodParErr"), |
||||
+ SBITFIELD(23, "MscodBucket1Err"), |
||||
+ {} |
||||
+}; |
||||
+ |
||||
+void skylake_xeon_decode_model(struct ras_events *ras, struct mce_event *e) |
||||
+{ |
||||
+ uint64_t status = e->status; |
||||
+ uint32_t mca = status & 0xffff; |
||||
+ unsigned rank0 = -1, rank1 = -1, chan; |
||||
+ |
||||
+ switch (e->bank) { |
||||
+ case 4: |
||||
+ switch (EXTRACT(status, 0, 15) & ~(1ull << 12)) { |
||||
+ case 0x402: case 0x403: |
||||
+ mce_snprintf(e->mcastatus_msg, "Internal errors "); |
||||
+ break; |
||||
+ case 0x406: |
||||
+ mce_snprintf(e->mcastatus_msg, "Intel TXT errors "); |
||||
+ break; |
||||
+ case 0x407: |
||||
+ mce_snprintf(e->mcastatus_msg, "Other UBOX Internal errors "); |
||||
+ break; |
||||
+ } |
||||
+ if (EXTRACT(status, 16, 19)) |
||||
+ mce_snprintf(e->mcastatus_msg, "PCU internal error "); |
||||
+ decode_bitfield(e, status, pcu_mc4); |
||||
+ break; |
||||
+ case 5: |
||||
+ case 12: |
||||
+ case 19: |
||||
+ mce_snprintf(e->mcastatus_msg, "QPI: "); |
||||
+ decode_bitfield(e, status, qpi_mc); |
||||
+ if ((EXTRACT(status, 16, 21) == 0x12)) |
||||
+ decode_bitfield(e, status, qpi_0x12); |
||||
+ break; |
||||
+ case 7: |
||||
+ case 8: |
||||
+ mce_snprintf(e->mcastatus_msg, "M2M: "); |
||||
+ decode_bitfield(e, status, m2m); |
||||
+ break; |
||||
+ case 13: |
||||
+ case 14: |
||||
+ case 15: |
||||
+ case 16: |
||||
+ mce_snprintf(e->mcastatus_msg, "MemCtrl: "); |
||||
+ if (EXTRACT(status, 27, 27)) |
||||
+ decode_bitfield(e, status, memctrl_mc13); |
||||
+ else |
||||
+ decode_bitfield(e, status, mc_bits); |
||||
+ break; |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * Memory error specific code. Returns if the error is not a MC one |
||||
+ */ |
||||
+ |
||||
+ /* Check if the error is at the memory controller */ |
||||
+ if ((mca >> 7) != 1) |
||||
+ return; |
||||
+ |
||||
+ /* Ignore unless this is an corrected extended error from an iMC bank */ |
||||
+ if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || |
||||
+ !test_prefix(7, status & 0xefff)) |
||||
+ return; |
||||
+ |
||||
+ /* |
||||
+ * Parse the reported channel and ranks |
||||
+ */ |
||||
+ |
||||
+ chan = EXTRACT(status, 0, 3); |
||||
+ if (chan == 0xf) |
||||
+ return; |
||||
+ |
||||
+ mce_snprintf(e->mc_location, "memory_channel=%d", chan); |
||||
+ |
||||
+ if (EXTRACT(e->misc, 62, 62)) { |
||||
+ rank0 = EXTRACT(e->misc, 46, 50); |
||||
+ if (EXTRACT(e->misc, 63, 63)) |
||||
+ rank1 = EXTRACT(e->misc, 51, 55); |
||||
+ } |
||||
+ |
||||
+ /* |
||||
+ * FIXME: The conversion from rank to dimm requires to parse the |
||||
+ * DMI tables and call failrank2dimm(). |
||||
+ */ |
||||
+ if (rank0 != -1 && rank1 != -1) |
||||
+ mce_snprintf(e->mc_location, "ranks=%d and %d", |
||||
+ rank0, rank1); |
||||
+ else if (rank0 != -1) |
||||
+ mce_snprintf(e->mc_location, "rank=%d", rank0); |
||||
+} |
||||
+ |
||||
--- rasdaemon-0.4.1.orig/mce-intel.c 2017-05-30 12:43:11.975591485 -0400 |
||||
+++ rasdaemon-0.4.1/mce-intel.c 2017-05-30 12:43:16.948531592 -0400 |
||||
@@ -408,6 +408,9 @@ if (test_prefix(11, (e->status & 0xffffL |
||||
case CPU_BROADWELL_EPEX: |
||||
broadwell_epex_decode_model(ras, e); |
||||
break; |
||||
+ case CPU_SKYLAKE_XEON: |
||||
+ skylake_xeon_decode_model(ras, e); |
||||
+ break; |
||||
default: |
||||
break; |
||||
} |
||||
--- rasdaemon-0.4.1.orig/ras-mce-handler.c 2017-05-30 12:43:16.948531592 -0400 |
||||
+++ rasdaemon-0.4.1/ras-mce-handler.c 2017-05-30 12:44:00.295009527 -0400 |
||||
@@ -54,6 +54,8 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series |
||||
[CPU_BROADWELL_EPEX] = "Broadwell EP/EX", |
||||
[CPU_KNIGHTS_LANDING] = "Knights Landing", |
||||
[CPU_KNIGHTS_MILL] = "Knights Mill", |
||||
+ [CPU_SKYLAKE] = "Skylake", |
||||
+ [CPU_SKYLAKE_XEON] = "Skylake Xeon", |
||||
}; |
||||
|
||||
static enum cputype select_intel_cputype(struct ras_events *ras) |
||||
@@ -103,6 +105,10 @@ else if (mce->model == 0x57) |
||||
return CPU_KNIGHTS_LANDING; |
||||
else if (mce->model == 0x85) |
||||
return CPU_KNIGHTS_MILL; |
||||
+ else if (mce->model == 0x4e || mce->model == 0x5e) |
||||
+ return CPU_SKYLAKE; |
||||
+ else if (mce->model == 0x55) |
||||
+ return CPU_SKYLAKE_XEON; |
||||
|
||||
if (mce->model > 0x1a) { |
||||
log(ALL, LOG_INFO, |
||||
--- rasdaemon-0.4.1.orig/ras-mce-handler.h 2017-05-30 12:43:11.976591473 -0400 |
||||
+++ rasdaemon-0.4.1/ras-mce-handler.h 2017-05-30 12:44:25.745703000 -0400 |
||||
@@ -49,6 +49,8 @@ enum cputype { |
||||
CPU_BROADWELL_EPEX, |
||||
CPU_KNIGHTS_LANDING, |
||||
CPU_KNIGHTS_MILL, |
||||
+ CPU_SKYLAKE, |
||||
+ CPU_SKYLAKE_XEON, |
||||
}; |
||||
|
||||
struct mce_event { |
||||
@@ -126,6 +128,7 @@ void knl_decode_model(struct ras_events |
||||
void tulsa_decode_model(struct mce_event *e); |
||||
void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
+void skylake_xeon_decode_model(struct ras_events *ras, struct mce_event *e); |
||||
|
||||
/* Software defined banks */ |
||||
#define MCE_EXTENDED_BANK 128 |
@ -0,0 +1,142 @@
@@ -0,0 +1,142 @@
|
||||
--- |
||||
labels/dell | 96 +++++++++++++++++++++++++++++++++++------------------------- |
||||
1 file changed, 56 insertions(+), 40 deletions(-) |
||||
|
||||
--- rasdaemon-0.4.1.orig/labels/dell 2017-08-23 16:14:36.086652150 -0400 |
||||
+++ rasdaemon-0.4.1/labels/dell 2017-08-23 16:16:59.091057241 -0400 |
||||
@@ -4,23 +4,35 @@ |
||||
# labels are found from the silk screen on the motherboard. |
||||
# |
||||
#Vendor: <vendor-name> |
||||
+# Product: <product-name> |
||||
# Model: <model-name> |
||||
# <label>: <mc>.<top>.<mid>.<low> |
||||
# |
||||
|
||||
Vendor: Dell Inc. |
||||
-#### 11G #### |
||||
+# 1-socket |
||||
+ Product: PowerEdge R220, PowerEdge R330, PowerEdge T330, PowerEdge R230, PowerEdge T130, PowerEdge T30 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; |
||||
+ DIMM_A3: 0.1.0; DIMM_A4: 0.1.1; |
||||
+ |
||||
+ Product: PowerEdge T110 II, PowerEdge T20 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; |
||||
+ |
||||
+ DIMM_B1: 0.0.1; DIMM_B2: 0.1.1; |
||||
+ |
||||
+ Product: PowerEdge R320, PowerEdge T320 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1; |
||||
+ |
||||
# 2-socket |
||||
-# PowerEdge R610 |
||||
- Model: 0K399H, 0F0XJ6 |
||||
+ Product: PowerEdge R610 |
||||
DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; DIMM_A3: 0.0.2; |
||||
DIMM_A4: 0.1.0; DIMM_A5: 0.1.1; DIMM_A6: 0.1.2; |
||||
|
||||
DIMM_B1: 1.0.0; DIMM_B2: 1.0.1; DIMM_B3: 1.0.2; |
||||
DIMM_B4: 1.1.0; DIMM_B5: 1.1.1; DIMM_B6: 1.1.2; |
||||
|
||||
-# PowerEdge T710 R710 |
||||
- Model: 01CTXG, 0N0H4P, 0MD99X, 0N047H, 0PV9DG |
||||
+ Product: PowerEdge T710, PowerEdge R710 |
||||
DIMM_A3: 0.0.0; DIMM_A2: 0.1.0; DIMM_A1: 0.2.0; |
||||
DIMM_A6: 0.0.1; DIMM_A5: 0.1.1; DIMM_A4: 0.2.1; |
||||
DIMM_A9: 0.0.2; DIMM_A8: 0.1.2; DIMM_A7: 0.2.2; |
||||
@@ -29,27 +41,7 @@ DIMM_B3: 1.0.0; DIMM_B2: 1.1.0; DIMM_B1 |
||||
DIMM_B6: 1.0.1; DIMM_B5: 1.1.1; DIMM_B4: 1.2.1; |
||||
DIMM_B9: 1.0.2; DIMM_B8: 1.1.2; DIMM_B7: 1.2.2; |
||||
|
||||
-#### 12/13G #### |
||||
-# 1-socket |
||||
-# PowerEdge R220 |
||||
- Model: 081N4V |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.0.1; |
||||
- DIMM_A3: 0.1.0; DIMM_A4: 0.1.1; |
||||
- |
||||
-#PowerEdge T110 II, T20 |
||||
- Model: 0PC2WT, 0PM2CW, 015TH9, 0MDHN4, 0VD5HY |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; |
||||
- |
||||
- DIMM_B1: 0.0.1; DIMM_B2: 0.1.1; |
||||
- |
||||
-#PowerEdge R320 T320 |
||||
- Model: 0YCV59, 0Y97HY, 07DKYR, 0VJ84C, 07MYHN, 04DMNN, 0W7H8C, 0K20G5, 0V719V, 0FDT3J |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
- DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1; |
||||
- |
||||
-# 2-socket |
||||
-# PowerEdge R620/T620 R720/xd R730/xd T630 R730 R630 T620 M620, FC620 |
||||
- Model: 0VWT90, 07NDJ2, 0F5XM3, 0PXXHP, 0X3D66, 061P35, 0H5J4J, 00W9X3, 0599V5, 0W9WXC, 0599V5, 0H21J3, 0CNCJW, 02CD1V, 0T5TFW, 0F5XM3, 0G1CNH, 05YV77, 0PDCCX, 093MW8, 0NJVT7 |
||||
+ Product: PowerEdge R620, PowerEdge T620, PowerEdge R720xd, PowerEdge R730xd, PowerEdge T630, PowerEdge R730, PowerEdge R630, PowerEdge T620, PowerEdge M620, PowerEdge FC620, PowerEdge M630, PowerEdge FC630 |
||||
DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2; |
||||
@@ -58,23 +50,38 @@ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_ |
||||
DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2; |
||||
|
||||
-# PowerEdge M520 R420 T420 |
||||
- Model: 0NRG83, 0DW6GX, 03WPHJ, 06HTRX, 0H1Y24, 02T9N6, 0TT5P2, 0CPKXG, 03015M, 061VPC, 0PC9H0, 0K3G34, 0PC0V5, 08NVYK |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
- DIMM_A4: 0.0.1; DIMM_A5: 0.1.1; DIMM_A6: 0.2.1; |
||||
- |
||||
- DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; |
||||
- DIMM_B4: 1.0.1; DIMM_B5: 1.1.1; DIMM_B6: 1.2.1; |
||||
- |
||||
-#PowerEdge FC420, M420 |
||||
- Model: 0DPJGD, 068CTP, 0MN3VC, 0417VP |
||||
- DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ Product: PowerEdge M520, PowerEdge R420, PowerEdge T420 |
||||
+ DIMM_A1: 0.1.0; DIMM_A2: 0.2.0; DIMM_A3: 0.3.0; |
||||
+ DIMM_A4: 0.1.1; DIMM_A5: 0.2.1; DIMM_A6: 0.3.1; |
||||
+ |
||||
+ DIMM_B1: 1.1.0; DIMM_B2: 1.2.0; DIMM_B3: 1.3.0; |
||||
+ DIMM_B4: 1.1.1; DIMM_B5: 1.2.1; DIMM_B6: 1.3.1; |
||||
|
||||
- DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; |
||||
+ Product: PowerEdge FC420, PowerEdge M420 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; |
||||
+ |
||||
+ Product: PowerEdge C6320, PowerEdge C4130 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
+ DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
+ |
||||
+ Product: PowerEdge R430, PowerEdge T430, PowerEdge R530 |
||||
+ DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
+ DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
+ |
||||
+ DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
+ |
||||
+ Product: PowerEdge FC430 |
||||
+ DIMM_A1: 0.1.0; DIMM_A2: 0.0.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
+ |
||||
+ DIMM_B1: 1.1.0; DIMM_B2: 1.0.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
|
||||
# 4-socket |
||||
-# # PowerEdge M820 |
||||
- Model: 0RN9TC, 0YWR73, 066N7P, 0PFG1N, 0JC2W3 |
||||
+ Product: PowerEdge M820, PowerEdge R830, PowerEdge M830, PowerEdge R930, PowerEdge FC830 |
||||
DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
DIMM_A9: 0.0.2; DIMM_A10: 0.1.2; DIMM_A11: 0.2.2; DIMM_A12: 0.3.2; |
||||
@@ -90,3 +97,12 @@ DIMM_C9: 2.0.2; DIMM_C10: 2.1.2; |
||||
DIMM_D1: 3.0.0; DIMM_D2: 3.1.0; DIMM_D3: 3.2.0; DIMM_D4: 3.3.0; |
||||
DIMM_D5: 3.0.1; DIMM_D6: 3.1.1; DIMM_D7: 3.2.1; DIMM_D8: 3.3.1; |
||||
DIMM_D9: 3.0.2; DIMM_D10: 3.1.2; DIMM_D11: 3.2.2; DIMM_D12: 3.3.2; |
||||
+ |
||||
+ Product: PowerEdge FM120x4 |
||||
+ DIMM_A_A1: 0.1.0; DIMM_A_A2: 0.2.0; |
||||
+ |
||||
+ DIMM_B_A1: 1.1.0; DIMM_B_A2: 1.2.0; |
||||
+ |
||||
+ DIMM_C_A1: 2.1.0; DIMM_C_A2: 2.2.0; |
||||
+ |
||||
+ DIMM_D_A1: 3.1.0; DIMM_D_A2: 3.2.0; |
@ -0,0 +1,69 @@
@@ -0,0 +1,69 @@
|
||||
From 993b8c40bd0c09a177d52c4f41b09ef2c969fa8d Mon Sep 17 00:00:00 2001 |
||||
From: "Charles.Rose@dell.com" <Charles.Rose@dell.com> |
||||
Date: Fri, 11 Aug 2017 20:09:10 +0000 |
||||
Subject: [PATCH] rasdaemon: Update DIMM labels for Intel Skylake servers |
||||
|
||||
Update labels for Intel Skylake based Dell PowerEdge servers. |
||||
|
||||
Signed-off-by: Charles Rose <charles_rose@dell.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com> |
||||
--- |
||||
labels/dell | 31 +++++++++++++++++++++++++++++++ |
||||
1 file changed, 31 insertions(+) |
||||
|
||||
diff --git a/labels/dell b/labels/dell |
||||
index 5abcd90..58455df 100644 |
||||
--- a/labels/dell |
||||
+++ b/labels/dell |
||||
@@ -50,6 +50,13 @@ Vendor: Dell Inc. |
||||
DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
DIMM_B9: 1.0.2; DIMM_B10: 1.1.2; DIMM_B11: 1.2.2; DIMM_B12: 1.3.2; |
||||
|
||||
+ Product: PowerEdge R640, PowerEdge R740, PowerEdge R740xd |
||||
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0; |
||||
+ A7: 0.0.1; A8: 0.1.1; A9: 0.2.1; A10: 1.0.1; A11: 1.1.1; A12: 1.2.1; |
||||
+ |
||||
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0; |
||||
+ B7: 2.0.1; B8: 2.1.1; B9: 2.2.1; B10: 3.0.1; B11: 3.1.1; B12: 3.2.1; |
||||
+ |
||||
Product: PowerEdge M520, PowerEdge R420, PowerEdge T420 |
||||
DIMM_A1: 0.1.0; DIMM_A2: 0.2.0; DIMM_A3: 0.3.0; |
||||
DIMM_A4: 0.1.1; DIMM_A5: 0.2.1; DIMM_A6: 0.3.1; |
||||
@@ -69,6 +76,17 @@ Vendor: Dell Inc. |
||||
DIMM_B1: 1.0.0; DIMM_B2: 1.1.0; DIMM_B3: 1.2.0; DIMM_B4: 1.3.0; |
||||
DIMM_B5: 1.0.1; DIMM_B6: 1.1.1; DIMM_B7: 1.2.1; DIMM_B8: 1.3.1; |
||||
|
||||
+ Product: PowerEdge C6320p |
||||
+ A1: 0.0.0; B1: 0.1.0; C1: 0.2.0; |
||||
+ D1: 1.0.0; E1: 1.1.0; F1: 1.2.0; |
||||
+ |
||||
+ Product: PowerEdge C6420 |
||||
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0; |
||||
+ A7: 0.0.1; A8: 1.0.1; |
||||
+ |
||||
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0; |
||||
+ B7: 2.0.1; B8: 3.0.1; |
||||
+ |
||||
Product: PowerEdge R430, PowerEdge T430, PowerEdge R530 |
||||
DIMM_A1: 0.0.0; DIMM_A2: 0.1.0; DIMM_A3: 0.2.0; DIMM_A4: 0.3.0; |
||||
DIMM_A5: 0.0.1; DIMM_A6: 0.1.1; DIMM_A7: 0.2.1; DIMM_A8: 0.3.1; |
||||
@@ -106,3 +124,16 @@ Vendor: Dell Inc. |
||||
DIMM_C_A1: 2.1.0; DIMM_C_A2: 2.2.0; |
||||
|
||||
DIMM_D_A1: 3.1.0; DIMM_D_A2: 3.2.0; |
||||
+ |
||||
+ Product: PowerEdge R940 |
||||
+ A1: 0.0.0; A2: 0.1.0; A3: 0.2.0; A4: 1.0.0; A5: 1.1.0; A6: 1.2.0; |
||||
+ A7: 0.0.1; A8: 0.1.1; A9: 0.2.1; A10: 1.0.1; A11: 1.1.1; A12: 1.2.1; |
||||
+ |
||||
+ B1: 2.0.0; B2: 2.1.0; B3: 2.2.0; B4: 3.0.0; B5: 3.1.0; B6: 3.2.0; |
||||
+ B7: 2.0.1; B8: 2.1.1; B9: 2.2.1; B10: 3.0.1; B11: 3.1.1; B12: 3.2.1; |
||||
+ |
||||
+ C1: 4.0.0; C2: 4.1.0; C3: 4.2.0; C4: 5.0.0; C5: 5.1.0; C6: 5.2.0; |
||||
+ C7: 4.0.1; C8: 4.1.1; C9: 4.2.1; C10: 5.0.1; C11: 5.1.1; C12: 5.2.1; |
||||
+ |
||||
+ D1: 6.0.0; D2: 6.1.0; D3: 6.2.0; D4: 7.0.0; D5: 7.1.0; D6: 7.2.0; |
||||
+ D7: 6.0.1; D8: 6.1.1; D9: 6.2.1; D10: 7.0.1; D11: 7.1.1; D12: 7.2.1; |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,601 @@
@@ -0,0 +1,601 @@
|
||||
From 624d8a1d99a2f3bd06cbc537aff3cc30201ba7c2 Mon Sep 17 00:00:00 2001 |
||||
From: Tyler Baicar <tbaicar@codeaurora.org> |
||||
Date: Mon, 12 Jun 2017 16:16:04 -0600 |
||||
Subject: [PATCH 1/2] rasdaemon: add support for non standard CPER section |
||||
events |
||||
|
||||
Add support to handle the non standard CPER section kernel trace |
||||
events which cover RAS errors who's section type is unknown. |
||||
|
||||
Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com> |
||||
--- |
||||
Makefile.am | 3 + |
||||
configure.ac | 9 +++ |
||||
ras-events.c | 15 +++++ |
||||
ras-events.h | 8 +++ |
||||
ras-non-standard-handler.c | 147 +++++++++++++++++++++++++++++++++++++++++++++ |
||||
ras-non-standard-handler.h | 26 ++++++++ |
||||
ras-record.c | 59 ++++++++++++++++++ |
||||
ras-record.h | 15 +++++ |
||||
ras-report.c | 80 ++++++++++++++++++++++++ |
||||
ras-report.h | 18 +++++- |
||||
10 files changed, 379 insertions(+), 1 deletion(-) |
||||
create mode 100644 ras-non-standard-handler.c |
||||
create mode 100644 ras-non-standard-handler.h |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index a10e4b3..c5811e8 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -24,6 +24,9 @@ endif |
||||
if WITH_AER |
||||
rasdaemon_SOURCES += ras-aer-handler.c |
||||
endif |
||||
+if WITH_NON_STANDARD |
||||
+ rasdaemon_SOURCES += ras-non-standard-handler.c |
||||
+endif |
||||
if WITH_MCE |
||||
rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c \ |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
diff --git a/configure.ac b/configure.ac |
||||
index 5af5227..31bf6bd 100644 |
||||
--- a/configure.ac |
||||
+++ b/configure.ac |
||||
@@ -44,6 +44,15 @@ AS_IF([test "x$enable_aer" = "xyes"], [ |
||||
]) |
||||
AM_CONDITIONAL([WITH_AER], [test x$enable_aer = xyes]) |
||||
|
||||
+AC_ARG_ENABLE([non_standard], |
||||
+ AS_HELP_STRING([--enable-non-standard], [enable NON_STANDARD events (currently experimental)])) |
||||
+ |
||||
+AS_IF([test "x$enable_non_standard" = "xyes"], [ |
||||
+ AC_DEFINE(HAVE_NON_STANDARD,1,"have UNKNOWN_SEC events collect") |
||||
+ AC_SUBST([WITH_NON_STANDARD]) |
||||
+]) |
||||
+AM_CONDITIONAL([WITH_NON_STANDARD], [test x$enable_non_standard = xyes]) |
||||
+ |
||||
AC_ARG_ENABLE([mce], |
||||
AS_HELP_STRING([--enable-mce], [enable MCE events (currently experimental)])) |
||||
|
||||
diff --git a/ras-events.c b/ras-events.c |
||||
index 0be7c3f..96aa6f1 100644 |
||||
--- a/ras-events.c |
||||
+++ b/ras-events.c |
||||
@@ -29,6 +29,7 @@ |
||||
#include "libtrace/event-parse.h" |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-aer-handler.h" |
||||
+#include "ras-non-standard-handler.h" |
||||
#include "ras-mce-handler.h" |
||||
#include "ras-extlog-handler.h" |
||||
#include "ras-record.h" |
||||
@@ -208,6 +209,10 @@ int toggle_ras_mc_event(int enable) |
||||
rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_NON_STANDARD |
||||
+ rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable); |
||||
+#endif |
||||
+ |
||||
free_ras: |
||||
free(ras); |
||||
return rc; |
||||
@@ -676,6 +681,16 @@ int handle_ras_events(int record_events) |
||||
"ras", "aer_event"); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_NON_STANDARD |
||||
+ rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", |
||||
+ ras_non_standard_event_handler); |
||||
+ if (!rc) |
||||
+ num_events++; |
||||
+ else |
||||
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", |
||||
+ "ras", "non_standard_event"); |
||||
+#endif |
||||
+ |
||||
cpus = get_num_cpus(ras); |
||||
|
||||
#ifdef HAVE_MCE |
||||
diff --git a/ras-events.h b/ras-events.h |
||||
index 64e045a..3e1008f 100644 |
||||
--- a/ras-events.h |
||||
+++ b/ras-events.h |
||||
@@ -68,6 +68,14 @@ enum hw_event_mc_err_type { |
||||
HW_EVENT_ERR_INFO, |
||||
}; |
||||
|
||||
+/* Should match the code at Kernel's include/acpi/ghes.h */ |
||||
+enum ghes_severity { |
||||
+ GHES_SEV_NO, |
||||
+ GHES_SEV_CORRECTED, |
||||
+ GHES_SEV_RECOVERABLE, |
||||
+ GHES_SEV_PANIC, |
||||
+}; |
||||
+ |
||||
/* Function prototypes */ |
||||
int toggle_ras_mc_event(int enable); |
||||
int handle_ras_events(int record_events); |
||||
diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c |
||||
new file mode 100644 |
||||
index 0000000..4c154e5 |
||||
--- /dev/null |
||||
+++ b/ras-non-standard-handler.c |
||||
@@ -0,0 +1,147 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
+#include <stdio.h> |
||||
+#include <stdlib.h> |
||||
+#include <string.h> |
||||
+#include <unistd.h> |
||||
+#include "libtrace/kbuffer.h" |
||||
+#include "ras-non-standard-handler.h" |
||||
+#include "ras-record.h" |
||||
+#include "ras-logger.h" |
||||
+#include "ras-report.h" |
||||
+ |
||||
+void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) { |
||||
+ trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]); |
||||
+} |
||||
+ |
||||
+static char *uuid_le(const char *uu) |
||||
+{ |
||||
+ static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; |
||||
+ char *p = uuid; |
||||
+ int i; |
||||
+ static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; |
||||
+ |
||||
+ for (i = 0; i < 16; i++) { |
||||
+ p += sprintf(p, "%.2x", uu[le[i]]); |
||||
+ switch (i) { |
||||
+ case 3: |
||||
+ case 5: |
||||
+ case 7: |
||||
+ case 9: |
||||
+ *p++ = '-'; |
||||
+ break; |
||||
+ } |
||||
+ } |
||||
+ |
||||
+ *p = 0; |
||||
+ |
||||
+ return uuid; |
||||
+} |
||||
+ |
||||
+int ras_non_standard_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context) |
||||
+{ |
||||
+ int len, i, line_count; |
||||
+ unsigned long long val; |
||||
+ struct ras_events *ras = context; |
||||
+ time_t now; |
||||
+ struct tm *tm; |
||||
+ struct ras_non_standard_event ev; |
||||
+ |
||||
+ /* |
||||
+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
||||
+ * On previous kernels, the way to properly generate an event would |
||||
+ * be to inject a fake one, measure its timestamp and diff it against |
||||
+ * gettimeofday. We won't do it here. Instead, let's use uptime, |
||||
+ * falling-back to the event report's time, if "uptime" clock is |
||||
+ * not available (legacy kernels). |
||||
+ */ |
||||
+ |
||||
+ if (ras->use_uptime) |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
+ else |
||||
+ now = time(NULL); |
||||
+ |
||||
+ tm = localtime(&now); |
||||
+ if (tm) |
||||
+ strftime(ev.timestamp, sizeof(ev.timestamp), |
||||
+ "%Y-%m-%d %H:%M:%S %z", tm); |
||||
+ trace_seq_printf(s, "%s ", ev.timestamp); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "sev", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ switch (val) { |
||||
+ case GHES_SEV_NO: |
||||
+ ev.severity = "Informational"; |
||||
+ break; |
||||
+ case GHES_SEV_CORRECTED: |
||||
+ ev.severity = "Corrected"; |
||||
+ break; |
||||
+ case GHES_SEV_RECOVERABLE: |
||||
+ ev.severity = "Recoverable"; |
||||
+ break; |
||||
+ default: |
||||
+ case GHES_SEV_PANIC: |
||||
+ ev.severity = "Fatal"; |
||||
+ } |
||||
+ trace_seq_printf(s, "\n %s", ev.severity); |
||||
+ |
||||
+ ev.sec_type = pevent_get_field_raw(s, event, "sec_type", record, &len, 1); |
||||
+ if(!ev.sec_type) |
||||
+ return -1; |
||||
+ trace_seq_printf(s, "\n section type: %s", uuid_le(ev.sec_type)); |
||||
+ ev.fru_text = pevent_get_field_raw(s, event, "fru_text", |
||||
+ record, &len, 1); |
||||
+ ev.fru_id = pevent_get_field_raw(s, event, "fru_id", |
||||
+ record, &len, 1); |
||||
+ trace_seq_printf(s, " fru text: %s fru id: %s ", |
||||
+ ev.fru_text, |
||||
+ uuid_le(ev.fru_id)); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "len", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.length = val; |
||||
+ trace_seq_printf(s, "\n length: %d\n", ev.length); |
||||
+ |
||||
+ ev.error = pevent_get_field_raw(s, event, "buf", record, &len, 1); |
||||
+ if(!ev.error) |
||||
+ return -1; |
||||
+ len = ev.length; |
||||
+ i = 0; |
||||
+ line_count = 0; |
||||
+ trace_seq_printf(s, " error:\n %08x: ", i); |
||||
+ while(len >= 4) { |
||||
+ print_le_hex(s, ev.error, i); |
||||
+ i+=4; |
||||
+ len-=4; |
||||
+ if(++line_count == 4) { |
||||
+ trace_seq_printf(s, "\n %08x: ", i); |
||||
+ line_count = 0; |
||||
+ } else |
||||
+ trace_seq_printf(s, " "); |
||||
+ } |
||||
+ |
||||
+ /* Insert data into the SGBD */ |
||||
+#ifdef HAVE_SQLITE3 |
||||
+ ras_store_non_standard_record(ras, &ev); |
||||
+#endif |
||||
+ |
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ /* Report event to ABRT */ |
||||
+ ras_report_non_standard_event(ras, &ev); |
||||
+#endif |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h |
||||
new file mode 100644 |
||||
index 0000000..2b5ac35 |
||||
--- /dev/null |
||||
+++ b/ras-non-standard-handler.h |
||||
@@ -0,0 +1,26 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
+#ifndef __RAS_NON_STANDARD_HANDLER_H |
||||
+#define __RAS_NON_STANDARD_HANDLER_H |
||||
+ |
||||
+#include "ras-events.h" |
||||
+#include "libtrace/event-parse.h" |
||||
+ |
||||
+int ras_non_standard_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context); |
||||
+ |
||||
+void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index); |
||||
+ |
||||
+#endif |
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 3dc4493..357ab61 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -1,5 +1,6 @@ |
||||
/* |
||||
* Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
@@ -157,6 +158,57 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) |
||||
} |
||||
#endif |
||||
|
||||
+/* |
||||
+ * Table and functions to handle ras:non standard |
||||
+ */ |
||||
+ |
||||
+#ifdef HAVE_NON_STANDARD |
||||
+static const struct db_fields non_standard_event_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ { .name="sec_type", .type="BLOB" }, |
||||
+ { .name="fru_id", .type="BLOB" }, |
||||
+ { .name="fru_text", .type="TEXT" }, |
||||
+ { .name="severity", .type="TEXT" }, |
||||
+ { .name="error", .type="BLOB" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor non_standard_event_tab = { |
||||
+ .name = "non_standard_event", |
||||
+ .fields = non_standard_event_fields, |
||||
+ .num_fields = ARRAY_SIZE(non_standard_event_fields), |
||||
+}; |
||||
+ |
||||
+int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_non_standard_record) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "non_standard_event store: %p\n", priv->stmt_non_standard_record); |
||||
+ |
||||
+ sqlite3_bind_text (priv->stmt_non_standard_record, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_blob (priv->stmt_non_standard_record, 2, ev->sec_type, -1, NULL); |
||||
+ sqlite3_bind_blob (priv->stmt_non_standard_record, 3, ev->fru_id, 16, NULL); |
||||
+ sqlite3_bind_text (priv->stmt_non_standard_record, 4, ev->fru_text, -1, NULL); |
||||
+ sqlite3_bind_text (priv->stmt_non_standard_record, 5, ev->severity, -1, NULL); |
||||
+ sqlite3_bind_blob (priv->stmt_non_standard_record, 6, ev->error, ev->length, NULL); |
||||
+ |
||||
+ rc = sqlite3_step(priv->stmt_non_standard_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do non_standard_event step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_non_standard_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset non_standard_event on sqlite: error = %d\n", rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+#endif |
||||
+ |
||||
#ifdef HAVE_EXTLOG |
||||
static const struct db_fields extlog_event_fields[] = { |
||||
{ .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
@@ -450,6 +502,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
&mce_record_tab); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_NON_STANDARD |
||||
+ rc = ras_mc_create_table(priv, &non_standard_event_tab); |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_non_standard_record, |
||||
+ &non_standard_event_tab); |
||||
+#endif |
||||
+ |
||||
ras->db_priv = priv; |
||||
return 0; |
||||
} |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 5d84297..473ae40 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -1,5 +1,6 @@ |
||||
/* |
||||
* Copyright (C) 2013 Mauro Carvalho Chehab <mchehab@redhat.com> |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
@@ -56,9 +57,18 @@ struct ras_extlog_event { |
||||
unsigned short cper_data_length; |
||||
}; |
||||
|
||||
+struct ras_non_standard_event { |
||||
+ char timestamp[64]; |
||||
+ const char *sec_type, *fru_id, *fru_text; |
||||
+ const char *severity; |
||||
+ const uint8_t *error; |
||||
+ uint32_t length; |
||||
+}; |
||||
+ |
||||
struct ras_mc_event; |
||||
struct ras_aer_event; |
||||
struct ras_extlog_event; |
||||
+struct ras_non_standard_event; |
||||
struct mce_event; |
||||
|
||||
#ifdef HAVE_SQLITE3 |
||||
@@ -77,6 +87,9 @@ struct sqlite3_priv { |
||||
#ifdef HAVE_EXTLOG |
||||
sqlite3_stmt *stmt_extlog_record; |
||||
#endif |
||||
+#ifdef HAVE_NON_STANDARD |
||||
+ sqlite3_stmt *stmt_non_standard_record; |
||||
+#endif |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
@@ -84,6 +97,7 @@ int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); |
||||
int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); |
||||
+int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); |
||||
|
||||
#else |
||||
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
||||
@@ -91,6 +105,7 @@ static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event |
||||
static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; |
||||
+static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
diff --git a/ras-report.c b/ras-report.c |
||||
index 0a05732..1eb9f79 100644 |
||||
--- a/ras-report.c |
||||
+++ b/ras-report.c |
||||
@@ -1,3 +1,16 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
#include <stdio.h> |
||||
#include <string.h> |
||||
#include <unistd.h> |
||||
@@ -196,6 +209,25 @@ static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){ |
||||
return 0; |
||||
} |
||||
|
||||
+static int set_non_standard_event_backtrace(char *buf, struct ras_non_standard_event *ev){ |
||||
+ char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
+ |
||||
+ if(!buf || !ev) |
||||
+ return -1; |
||||
+ |
||||
+ sprintf(bt_buf, "BACKTRACE=" \ |
||||
+ "timestamp=%s\n" \ |
||||
+ "severity=%s\n" \ |
||||
+ "length=%d\n", \ |
||||
+ ev->timestamp, \ |
||||
+ ev->severity, \ |
||||
+ ev->length); |
||||
+ |
||||
+ strcat(buf, bt_buf); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
static int commit_report_backtrace(int sockfd, int type, void *ev){ |
||||
char buf[MAX_BACKTRACE_SIZE]; |
||||
char *pbuf = buf; |
||||
@@ -218,6 +250,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ |
||||
case MCE_EVENT: |
||||
rc = set_mce_event_backtrace(buf, (struct mce_event *)ev); |
||||
break; |
||||
+ case NON_STANDARD_EVENT: |
||||
+ rc = set_non_standard_event_backtrace(buf, (struct ras_non_standard_event *)ev); |
||||
+ break; |
||||
default: |
||||
return -1; |
||||
} |
||||
@@ -345,6 +380,51 @@ aer_fail: |
||||
} |
||||
} |
||||
|
||||
+int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev){ |
||||
+ char buf[MAX_MESSAGE_SIZE]; |
||||
+ int sockfd = 0; |
||||
+ int rc = -1; |
||||
+ |
||||
+ memset(buf, 0, sizeof(buf)); |
||||
+ |
||||
+ sockfd = setup_report_socket(); |
||||
+ if(sockfd < 0){ |
||||
+ return rc; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_basic(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto non_standard_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_backtrace(sockfd, NON_STANDARD_EVENT, ev); |
||||
+ if(rc < 0){ |
||||
+ goto non_standard_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-non-standard"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto non_standard_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "REASON=%s", "Unknown CPER section problem"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto non_standard_fail; |
||||
+ } |
||||
+ |
||||
+ rc = 0; |
||||
+ |
||||
+non_standard_fail: |
||||
+ |
||||
+ if(sockfd > 0){ |
||||
+ close(sockfd); |
||||
+ } |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+ |
||||
int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ |
||||
char buf[MAX_MESSAGE_SIZE]; |
||||
int sockfd = 0; |
||||
diff --git a/ras-report.h b/ras-report.h |
||||
index 7920cdf..c2fcf42 100644 |
||||
--- a/ras-report.h |
||||
+++ b/ras-report.h |
||||
@@ -1,3 +1,16 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
#ifndef __RAS_REPORT_H |
||||
#define __RAS_REPORT_H |
||||
|
||||
@@ -19,7 +32,8 @@ |
||||
enum { |
||||
MC_EVENT, |
||||
MCE_EVENT, |
||||
- AER_EVENT |
||||
+ AER_EVENT, |
||||
+ NON_STANDARD_EVENT |
||||
}; |
||||
|
||||
#ifdef HAVE_ABRT_REPORT |
||||
@@ -27,12 +41,14 @@ enum { |
||||
int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); |
||||
+int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); |
||||
|
||||
#else |
||||
|
||||
static inline int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; }; |
||||
static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
+static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,150 @@
@@ -0,0 +1,150 @@
|
||||
From 873e88d6ba1ce5ec97f5cc0f4f0b45dfd2026b9f Mon Sep 17 00:00:00 2001 |
||||
From: "shiju.jose@huawei.com" <shiju.jose@huawei.com> |
||||
Date: Wed, 4 Oct 2017 10:11:08 +0100 |
||||
Subject: [PATCH] rasdaemon:add support for non-standard error decoder |
||||
|
||||
This patch add support to decode the non-standard |
||||
error information. |
||||
|
||||
Signed-off-by: Shiju Jose <shiju.jose@huawei.com> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com> |
||||
--- |
||||
ras-non-standard-handler.c | 62 +++++++++++++++++++++++++++++++++++++++++++++- |
||||
ras-non-standard-handler.h | 10 ++++++++ |
||||
2 files changed, 71 insertions(+), 1 deletion(-) |
||||
|
||||
diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c |
||||
index 4c154e5..21e6a76 100644 |
||||
--- a/ras-non-standard-handler.c |
||||
+++ b/ras-non-standard-handler.c |
||||
@@ -13,6 +13,7 @@ |
||||
|
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
+#include <stdbool.h> |
||||
#include <string.h> |
||||
#include <unistd.h> |
||||
#include "libtrace/kbuffer.h" |
||||
@@ -21,6 +22,31 @@ |
||||
#include "ras-logger.h" |
||||
#include "ras-report.h" |
||||
|
||||
+static p_ns_dec_tab * ns_dec_tab; |
||||
+static size_t dec_tab_count; |
||||
+ |
||||
+int register_ns_dec_tab(const p_ns_dec_tab tab) |
||||
+{ |
||||
+ ns_dec_tab = (p_ns_dec_tab *)realloc(ns_dec_tab, |
||||
+ (dec_tab_count + 1) * sizeof(tab)); |
||||
+ if (ns_dec_tab == NULL) { |
||||
+ printf("%s p_ns_dec_tab malloc failed", __func__); |
||||
+ return -1; |
||||
+ } |
||||
+ ns_dec_tab[dec_tab_count] = tab; |
||||
+ dec_tab_count++; |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
+void unregister_ns_dec_tab(void) |
||||
+{ |
||||
+ if (ns_dec_tab) { |
||||
+ free(ns_dec_tab); |
||||
+ ns_dec_tab = NULL; |
||||
+ dec_tab_count = 0; |
||||
+ } |
||||
+} |
||||
+ |
||||
void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) { |
||||
trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]); |
||||
} |
||||
@@ -49,16 +75,32 @@ static char *uuid_le(const char *uu) |
||||
return uuid; |
||||
} |
||||
|
||||
+static int uuid_le_cmp(const char *sec_type, const char *uuid2) |
||||
+{ |
||||
+ static char uuid1[32]; |
||||
+ char *p = uuid1; |
||||
+ int i; |
||||
+ static const unsigned char le[16] = { |
||||
+ 3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; |
||||
+ |
||||
+ for (i = 0; i < 16; i++) |
||||
+ p += sprintf(p, "%.2x", sec_type[le[i]]); |
||||
+ *p = 0; |
||||
+ return strncmp(uuid1, uuid2, 32); |
||||
+} |
||||
+ |
||||
int ras_non_standard_event_handler(struct trace_seq *s, |
||||
struct pevent_record *record, |
||||
struct event_format *event, void *context) |
||||
{ |
||||
- int len, i, line_count; |
||||
+ int len, i, line_count, count; |
||||
unsigned long long val; |
||||
struct ras_events *ras = context; |
||||
time_t now; |
||||
struct tm *tm; |
||||
struct ras_non_standard_event ev; |
||||
+ p_ns_dec_tab dec_tab; |
||||
+ bool dec_done = false; |
||||
|
||||
/* |
||||
* Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
||||
@@ -133,6 +175,18 @@ int ras_non_standard_event_handler(struct trace_seq *s, |
||||
trace_seq_printf(s, " "); |
||||
} |
||||
|
||||
+ for (count = 0; count < dec_tab_count && !dec_done; count++) { |
||||
+ dec_tab = ns_dec_tab[count]; |
||||
+ for (i = 0; i < dec_tab[0].len; i++) { |
||||
+ if (uuid_le_cmp(ev.sec_type, |
||||
+ dec_tab[i].sec_type) == 0) { |
||||
+ dec_tab[i].decode(s, ev.error); |
||||
+ dec_done = true; |
||||
+ break; |
||||
+ } |
||||
+ } |
||||
+ } |
||||
+ |
||||
/* Insert data into the SGBD */ |
||||
#ifdef HAVE_SQLITE3 |
||||
ras_store_non_standard_record(ras, &ev); |
||||
@@ -145,3 +199,9 @@ int ras_non_standard_event_handler(struct trace_seq *s, |
||||
|
||||
return 0; |
||||
} |
||||
+ |
||||
+__attribute__((destructor)) |
||||
+static void ns_exit(void) |
||||
+{ |
||||
+ unregister_ns_dec_tab(); |
||||
+} |
||||
diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h |
||||
index 2b5ac35..a183d1a 100644 |
||||
--- a/ras-non-standard-handler.h |
||||
+++ b/ras-non-standard-handler.h |
||||
@@ -17,10 +17,20 @@ |
||||
#include "ras-events.h" |
||||
#include "libtrace/event-parse.h" |
||||
|
||||
+typedef struct ras_ns_dec_tab { |
||||
+ const char *sec_type; |
||||
+ int (*decode)(struct trace_seq *s, const void *err); |
||||
+ size_t len; |
||||
+} *p_ns_dec_tab; |
||||
+ |
||||
int ras_non_standard_event_handler(struct trace_seq *s, |
||||
struct pevent_record *record, |
||||
struct event_format *event, void *context); |
||||
|
||||
void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index); |
||||
|
||||
+int register_ns_dec_tab(const p_ns_dec_tab tab); |
||||
+ |
||||
+void unregister_ns_dec_tab(void); |
||||
+ |
||||
#endif |
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,489 @@
@@ -0,0 +1,489 @@
|
||||
From 5662e5376adcc45da43d7818c8ac1882883c18ac Mon Sep 17 00:00:00 2001 |
||||
From: Tyler Baicar <tbaicar@codeaurora.org> |
||||
Date: Tue, 12 Sep 2017 14:58:25 -0600 |
||||
Subject: [PATCH 1/2] rasdaemon: add support for ARM events |
||||
|
||||
Add support to handle the ARM kernel trace events |
||||
which cover RAS ARM processor errors. |
||||
|
||||
[V4]: fix arm_event_tab usage |
||||
|
||||
Change-Id: Ife99c97042498d5fad4d9b8e873ecfba6a47947d |
||||
Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org> |
||||
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com> |
||||
--- |
||||
Makefile.am | 3 ++ |
||||
configure.ac | 9 ++++++ |
||||
ras-arm-handler.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
||||
ras-arm-handler.h | 24 +++++++++++++++ |
||||
ras-events.c | 15 ++++++++++ |
||||
ras-record.c | 59 ++++++++++++++++++++++++++++++++++++ |
||||
ras-record.h | 16 ++++++++++ |
||||
ras-report.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++ |
||||
ras-report.h | 5 +++- |
||||
9 files changed, 295 insertions(+), 1 deletion(-) |
||||
create mode 100644 ras-arm-handler.c |
||||
create mode 100644 ras-arm-handler.h |
||||
|
||||
diff --git a/Makefile.am b/Makefile.am |
||||
index 2500772..4aa5543 100644 |
||||
--- a/Makefile.am |
||||
+++ b/Makefile.am |
||||
@@ -27,6 +27,9 @@ endif |
||||
if WITH_NON_STANDARD |
||||
rasdaemon_SOURCES += ras-non-standard-handler.c |
||||
endif |
||||
+if WITH_ARM |
||||
+ rasdaemon_SOURCES += ras-arm-handler.c |
||||
+endif |
||||
if WITH_MCE |
||||
rasdaemon_SOURCES += ras-mce-handler.c mce-intel.c mce-amd-k8.c \ |
||||
mce-intel-p4-p6.c mce-intel-nehalem.c \ |
||||
diff --git a/configure.ac b/configure.ac |
||||
index ecd4b2f..14fc2f2 100644 |
||||
--- a/configure.ac |
||||
+++ b/configure.ac |
||||
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_non_standard" = "xyes"], [ |
||||
]) |
||||
AM_CONDITIONAL([WITH_NON_STANDARD], [test x$enable_non_standard = xyes]) |
||||
|
||||
+AC_ARG_ENABLE([arm], |
||||
+ AS_HELP_STRING([--enable-arm], [enable ARM events (currently experimental)])) |
||||
+ |
||||
+AS_IF([test "x$enable_arm" = "xyes"], [ |
||||
+ AC_DEFINE(HAVE_ARM,1,"have ARM events collect") |
||||
+ AC_SUBST([WITH_ARM]) |
||||
+]) |
||||
+AM_CONDITIONAL([WITH_ARM], [test x$enable_arm = xyes]) |
||||
+ |
||||
AC_ARG_ENABLE([mce], |
||||
AS_HELP_STRING([--enable-mce], [enable MCE events (currently experimental)])) |
||||
|
||||
diff --git a/ras-arm-handler.c b/ras-arm-handler.c |
||||
new file mode 100644 |
||||
index 0000000..a76470d |
||||
--- /dev/null |
||||
+++ b/ras-arm-handler.c |
||||
@@ -0,0 +1,90 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
+#include <stdio.h> |
||||
+#include <stdlib.h> |
||||
+#include <string.h> |
||||
+#include <unistd.h> |
||||
+#include "libtrace/kbuffer.h" |
||||
+#include "ras-arm-handler.h" |
||||
+#include "ras-record.h" |
||||
+#include "ras-logger.h" |
||||
+#include "ras-report.h" |
||||
+ |
||||
+int ras_arm_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context) |
||||
+{ |
||||
+ unsigned long long val; |
||||
+ struct ras_events *ras = context; |
||||
+ time_t now; |
||||
+ struct tm *tm; |
||||
+ struct ras_arm_event ev; |
||||
+ |
||||
+ /* |
||||
+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
||||
+ * On previous kernels, the way to properly generate an event would |
||||
+ * be to inject a fake one, measure its timestamp and diff it against |
||||
+ * gettimeofday. We won't do it here. Instead, let's use uptime, |
||||
+ * falling-back to the event report's time, if "uptime" clock is |
||||
+ * not available (legacy kernels). |
||||
+ */ |
||||
+ |
||||
+ if (ras->use_uptime) |
||||
+ now = record->ts/user_hz + ras->uptime_diff; |
||||
+ else |
||||
+ now = time(NULL); |
||||
+ |
||||
+ tm = localtime(&now); |
||||
+ if (tm) |
||||
+ strftime(ev.timestamp, sizeof(ev.timestamp), |
||||
+ "%Y-%m-%d %H:%M:%S %z", tm); |
||||
+ trace_seq_printf(s, "%s\n", ev.timestamp); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "affinity", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.affinity = val; |
||||
+ trace_seq_printf(s, " affinity: %d", ev.affinity); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "mpidr", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.mpidr = val; |
||||
+ trace_seq_printf(s, "\n MPIDR: 0x%llx", (unsigned long long)ev.mpidr); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "midr", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.midr = val; |
||||
+ trace_seq_printf(s, "\n MIDR: 0x%llx", (unsigned long long)ev.midr); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "running_state", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.running_state = val; |
||||
+ trace_seq_printf(s, "\n running_state: %d", ev.running_state); |
||||
+ |
||||
+ if (pevent_get_field_val(s, event, "psci_state", record, &val, 1) < 0) |
||||
+ return -1; |
||||
+ ev.psci_state = val; |
||||
+ trace_seq_printf(s, "\n psci_state: %d", ev.psci_state); |
||||
+ |
||||
+ /* Insert data into the SGBD */ |
||||
+#ifdef HAVE_SQLITE3 |
||||
+ ras_store_arm_record(ras, &ev); |
||||
+#endif |
||||
+ |
||||
+#ifdef HAVE_ABRT_REPORT |
||||
+ /* Report event to ABRT */ |
||||
+ ras_report_arm_event(ras, &ev); |
||||
+#endif |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
diff --git a/ras-arm-handler.h b/ras-arm-handler.h |
||||
new file mode 100644 |
||||
index 0000000..eae10ec |
||||
--- /dev/null |
||||
+++ b/ras-arm-handler.h |
||||
@@ -0,0 +1,24 @@ |
||||
+/* |
||||
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved. |
||||
+ * |
||||
+ * This program is free software; you can redistribute it and/or modify |
||||
+ * it under the terms of the GNU General Public License version 2 and |
||||
+ * only version 2 as published by the Free Software Foundation. |
||||
+ |
||||
+ * This program is distributed in the hope that it will be useful, |
||||
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
+ * GNU General Public License for more details. |
||||
+ */ |
||||
+ |
||||
+#ifndef __RAS_ARM_HANDLER_H |
||||
+#define __RAS_ARM_HANDLER_H |
||||
+ |
||||
+#include "ras-events.h" |
||||
+#include "libtrace/event-parse.h" |
||||
+ |
||||
+int ras_arm_event_handler(struct trace_seq *s, |
||||
+ struct pevent_record *record, |
||||
+ struct event_format *event, void *context); |
||||
+ |
||||
+#endif |
||||
diff --git a/ras-events.c b/ras-events.c |
||||
index 96aa6f1..812d712 100644 |
||||
--- a/ras-events.c |
||||
+++ b/ras-events.c |
||||
@@ -30,6 +30,7 @@ |
||||
#include "ras-mc-handler.h" |
||||
#include "ras-aer-handler.h" |
||||
#include "ras-non-standard-handler.h" |
||||
+#include "ras-arm-handler.h" |
||||
#include "ras-mce-handler.h" |
||||
#include "ras-extlog-handler.h" |
||||
#include "ras-record.h" |
||||
@@ -213,6 +214,10 @@ int toggle_ras_mc_event(int enable) |
||||
rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_ARM |
||||
+ rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); |
||||
+#endif |
||||
+ |
||||
free_ras: |
||||
free(ras); |
||||
return rc; |
||||
@@ -691,6 +696,16 @@ int handle_ras_events(int record_events) |
||||
"ras", "non_standard_event"); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_ARM |
||||
+ rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", |
||||
+ ras_arm_event_handler); |
||||
+ if (!rc) |
||||
+ num_events++; |
||||
+ else |
||||
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", |
||||
+ "ras", "arm_event"); |
||||
+#endif |
||||
+ |
||||
cpus = get_num_cpus(ras); |
||||
|
||||
#ifdef HAVE_MCE |
||||
diff --git a/ras-record.c b/ras-record.c |
||||
index 357ab61..c3644cb 100644 |
||||
--- a/ras-record.c |
||||
+++ b/ras-record.c |
||||
@@ -209,6 +209,58 @@ int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standar |
||||
} |
||||
#endif |
||||
|
||||
+/* |
||||
+ * Table and functions to handle ras:arm |
||||
+ */ |
||||
+ |
||||
+#ifdef HAVE_ARM |
||||
+static const struct db_fields arm_event_fields[] = { |
||||
+ { .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
+ { .name="timestamp", .type="TEXT" }, |
||||
+ { .name="error_count", .type="INTEGER" }, |
||||
+ { .name="affinity", .type="INTEGER" }, |
||||
+ { .name="mpidr", .type="INTEGER" }, |
||||
+ { .name="running_state", .type="INTEGER" }, |
||||
+ { .name="psci_state", .type="INTEGER" }, |
||||
+}; |
||||
+ |
||||
+static const struct db_table_descriptor arm_event_tab = { |
||||
+ .name = "arm_event", |
||||
+ .fields = arm_event_fields, |
||||
+ .num_fields = ARRAY_SIZE(arm_event_fields), |
||||
+}; |
||||
+ |
||||
+int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) |
||||
+{ |
||||
+ int rc; |
||||
+ struct sqlite3_priv *priv = ras->db_priv; |
||||
+ |
||||
+ if (!priv || !priv->stmt_arm_record) |
||||
+ return 0; |
||||
+ log(TERM, LOG_INFO, "arm_event store: %p\n", priv->stmt_arm_record); |
||||
+ |
||||
+ sqlite3_bind_text (priv->stmt_arm_record, 1, ev->timestamp, -1, NULL); |
||||
+ sqlite3_bind_int (priv->stmt_arm_record, 2, ev->error_count); |
||||
+ sqlite3_bind_int (priv->stmt_arm_record, 3, ev->affinity); |
||||
+ sqlite3_bind_int (priv->stmt_arm_record, 4, ev->mpidr); |
||||
+ sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state); |
||||
+ sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state); |
||||
+ |
||||
+ rc = sqlite3_step(priv->stmt_arm_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed to do arm_event step on sqlite: error = %d\n", rc); |
||||
+ rc = sqlite3_reset(priv->stmt_arm_record); |
||||
+ if (rc != SQLITE_OK && rc != SQLITE_DONE) |
||||
+ log(TERM, LOG_ERR, |
||||
+ "Failed reset arm_event on sqlite: error = %d\n", |
||||
+ rc); |
||||
+ log(TERM, LOG_INFO, "register inserted at db\n"); |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+#endif |
||||
+ |
||||
#ifdef HAVE_EXTLOG |
||||
static const struct db_fields extlog_event_fields[] = { |
||||
{ .name="id", .type="INTEGER PRIMARY KEY" }, |
||||
@@ -509,6 +561,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) |
||||
&non_standard_event_tab); |
||||
#endif |
||||
|
||||
+#ifdef HAVE_ARM |
||||
+ rc = ras_mc_create_table(priv, &arm_event_tab); |
||||
+ if (rc == SQLITE_OK) |
||||
+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record, |
||||
+ &arm_event_tab); |
||||
+#endif |
||||
+ |
||||
ras->db_priv = priv; |
||||
return 0; |
||||
} |
||||
diff --git a/ras-record.h b/ras-record.h |
||||
index 473ae40..12c2218 100644 |
||||
--- a/ras-record.h |
||||
+++ b/ras-record.h |
||||
@@ -65,10 +65,21 @@ struct ras_non_standard_event { |
||||
uint32_t length; |
||||
}; |
||||
|
||||
+struct ras_arm_event { |
||||
+ char timestamp[64]; |
||||
+ int32_t error_count; |
||||
+ int8_t affinity; |
||||
+ int64_t mpidr; |
||||
+ int64_t midr; |
||||
+ int32_t running_state; |
||||
+ int32_t psci_state; |
||||
+}; |
||||
+ |
||||
struct ras_mc_event; |
||||
struct ras_aer_event; |
||||
struct ras_extlog_event; |
||||
struct ras_non_standard_event; |
||||
+struct ras_arm_event; |
||||
struct mce_event; |
||||
|
||||
#ifdef HAVE_SQLITE3 |
||||
@@ -90,6 +101,9 @@ struct sqlite3_priv { |
||||
#ifdef HAVE_NON_STANDARD |
||||
sqlite3_stmt *stmt_non_standard_record; |
||||
#endif |
||||
+#ifdef HAVE_ARM |
||||
+ sqlite3_stmt *stmt_arm_record; |
||||
+#endif |
||||
}; |
||||
|
||||
int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); |
||||
@@ -98,6 +112,7 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev); |
||||
int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); |
||||
int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); |
||||
+int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); |
||||
|
||||
#else |
||||
static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; |
||||
@@ -106,6 +121,7 @@ static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_eve |
||||
static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; |
||||
static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; |
||||
+static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
diff --git a/ras-report.c b/ras-report.c |
||||
index 1eb9f79..d4beee0 100644 |
||||
--- a/ras-report.c |
||||
+++ b/ras-report.c |
||||
@@ -228,6 +228,33 @@ static int set_non_standard_event_backtrace(char *buf, struct ras_non_standard_e |
||||
return 0; |
||||
} |
||||
|
||||
+static int set_arm_event_backtrace(char *buf, struct ras_arm_event *ev){ |
||||
+ char bt_buf[MAX_BACKTRACE_SIZE]; |
||||
+ |
||||
+ if(!buf || !ev) |
||||
+ return -1; |
||||
+ |
||||
+ sprintf(bt_buf, "BACKTRACE=" \ |
||||
+ "timestamp=%s\n" \ |
||||
+ "error_count=%d\n" \ |
||||
+ "affinity=%d\n" \ |
||||
+ "mpidr=0x%lx\n" \ |
||||
+ "midr=0x%lx\n" \ |
||||
+ "running_state=%d\n" \ |
||||
+ "psci_state=%d\n", \ |
||||
+ ev->timestamp, \ |
||||
+ ev->error_count, \ |
||||
+ ev->affinity, \ |
||||
+ ev->mpidr, \ |
||||
+ ev->midr, \ |
||||
+ ev->running_state, \ |
||||
+ ev->psci_state); |
||||
+ |
||||
+ strcat(buf, bt_buf); |
||||
+ |
||||
+ return 0; |
||||
+} |
||||
+ |
||||
static int commit_report_backtrace(int sockfd, int type, void *ev){ |
||||
char buf[MAX_BACKTRACE_SIZE]; |
||||
char *pbuf = buf; |
||||
@@ -253,6 +280,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ |
||||
case NON_STANDARD_EVENT: |
||||
rc = set_non_standard_event_backtrace(buf, (struct ras_non_standard_event *)ev); |
||||
break; |
||||
+ case ARM_EVENT: |
||||
+ rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev); |
||||
+ break; |
||||
default: |
||||
return -1; |
||||
} |
||||
@@ -425,6 +455,51 @@ non_standard_fail: |
||||
return rc; |
||||
} |
||||
|
||||
+int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev){ |
||||
+ char buf[MAX_MESSAGE_SIZE]; |
||||
+ int sockfd = 0; |
||||
+ int rc = -1; |
||||
+ |
||||
+ memset(buf, 0, sizeof(buf)); |
||||
+ |
||||
+ sockfd = setup_report_socket(); |
||||
+ if(sockfd < 0){ |
||||
+ return rc; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_basic(sockfd); |
||||
+ if(rc < 0){ |
||||
+ goto arm_fail; |
||||
+ } |
||||
+ |
||||
+ rc = commit_report_backtrace(sockfd, ARM_EVENT, ev); |
||||
+ if(rc < 0){ |
||||
+ goto arm_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-arm"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto arm_fail; |
||||
+ } |
||||
+ |
||||
+ sprintf(buf, "REASON=%s", "ARM CPU report problem"); |
||||
+ rc = write(sockfd, buf, strlen(buf) + 1); |
||||
+ if(rc < strlen(buf) + 1){ |
||||
+ goto arm_fail; |
||||
+ } |
||||
+ |
||||
+ rc = 0; |
||||
+ |
||||
+arm_fail: |
||||
+ |
||||
+ if(sockfd > 0){ |
||||
+ close(sockfd); |
||||
+ } |
||||
+ |
||||
+ return rc; |
||||
+} |
||||
+ |
||||
int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ |
||||
char buf[MAX_MESSAGE_SIZE]; |
||||
int sockfd = 0; |
||||
diff --git a/ras-report.h b/ras-report.h |
||||
index c2fcf42..6c466f5 100644 |
||||
--- a/ras-report.h |
||||
+++ b/ras-report.h |
||||
@@ -33,7 +33,8 @@ enum { |
||||
MC_EVENT, |
||||
MCE_EVENT, |
||||
AER_EVENT, |
||||
- NON_STANDARD_EVENT |
||||
+ NON_STANDARD_EVENT, |
||||
+ ARM_EVENT |
||||
}; |
||||
|
||||
#ifdef HAVE_ABRT_REPORT |
||||
@@ -42,6 +43,7 @@ int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev); |
||||
int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev); |
||||
int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); |
||||
int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); |
||||
+int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); |
||||
|
||||
#else |
||||
|
||||
@@ -49,6 +51,7 @@ static inline int ras_report_mc_event(struct ras_events *ras, struct ras_mc_even |
||||
static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; }; |
||||
static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; |
||||
static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; |
||||
+static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; |
||||
|
||||
#endif |
||||
|
||||
-- |
||||
1.8.3.1 |
||||
|
@ -0,0 +1,27 @@
@@ -0,0 +1,27 @@
|
||||
commit 1b23bf7d97bacd1d00adb4404dfc5004df394358 |
||||
Author: Aristeu Rozanski <arozansk@redhat.com> |
||||
Date: Fri Feb 2 10:15:25 2018 -0500 |
||||
|
||||
ARM: initialize with 0 unused ras_arm_event members |
||||
|
||||
Issue found by covscan: |
||||
|
||||
1. rasdaemon-0.4.1/ras-arm-handler.c:32: var_decl: Declaring variable "ev" without initializer. |
||||
16. rasdaemon-0.4.1/ras-arm-handler.c:81: uninit_use_in_call: Using uninitialized value "ev.error_count" when calling "ras_store_arm_record". |
||||
23. rasdaemon-0.4.1/ras-record.c:243:2: read_parm_fld: Reading a parameter field. |
||||
|
||||
Signed-off-by: Aristeu Rozanski <arozansk@redhat.com> |
||||
|
||||
diff --git a/ras-arm-handler.c b/ras-arm-handler.c |
||||
index a76470d..2f170e2 100644 |
||||
--- a/ras-arm-handler.c |
||||
+++ b/ras-arm-handler.c |
||||
@@ -31,6 +31,8 @@ int ras_arm_event_handler(struct trace_seq *s, |
||||
struct tm *tm; |
||||
struct ras_arm_event ev; |
||||
|
||||
+ memset(&ev, 0, sizeof(ev)); |
||||
+ |
||||
/* |
||||
* Newer kernels (3.10-rc1 or upper) provide an uptime clock. |
||||
* On previous kernels, the way to properly generate an event would |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
diff --git a/mce-intel.c b/mce-intel.c |
||||
index bf68d9b..80e4b6f 100644 |
||||
--- a/mce-intel.c |
||||
+++ b/mce-intel.c |
||||
@@ -470,7 +470,6 @@ int set_intel_imc_log(enum cputype cputype, unsigned ncpus) |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
- case CPU_KNIGHTS_LANDING: |
||||
msr = 0x17f; /* MSR_ERROR_CONTROL */ |
||||
bit = 0x2; /* MemError Log Enable */ |
||||
break; |
||||
diff --git a/ras-mce-handler.c b/ras-mce-handler.c |
||||
index b875512..f930fd1 100644 |
||||
--- a/ras-mce-handler.c |
||||
+++ b/ras-mce-handler.c |
||||
@@ -228,7 +228,6 @@ int register_mce_handler(struct ras_events *ras, unsigned ncpus) |
||||
case CPU_SANDY_BRIDGE_EP: |
||||
case CPU_IVY_BRIDGE_EPEX: |
||||
case CPU_HASWELL_EPEX: |
||||
- case CPU_KNIGHTS_LANDING: |
||||
set_intel_imc_log(mce->cputype, ncpus); |
||||
default: |
||||
break; |
@ -0,0 +1,302 @@
@@ -0,0 +1,302 @@
|
||||
%define _hardened_build 1 |
||||
|
||||
Name: rasdaemon |
||||
Version: 0.4.1 |
||||
Release: 32%{?dist} |
||||
Summary: Utility to receive RAS error tracings |
||||
Group: Applications/System |
||||
License: GPLv2 |
||||
URL: https://pagure.io/rasdaemon |
||||
Source0: http://mchehab.fedorapeople.org/%{name}-%{version}.tar.bz2 |
||||
|
||||
ExclusiveArch: %{ix86} x86_64 aarch64 %{power64} |
||||
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) |
||||
BuildRequires: autoconf, automake, gettext-devel, libtool, sqlite-devel |
||||
Requires: hwdata, perl-DBD-SQLite |
||||
%ifnarch %{arm} |
||||
%ifnarch %{power64} |
||||
Requires: dmidecode |
||||
%endif |
||||
%endif |
||||
|
||||
Requires(post): systemd-units |
||||
Requires(preun): systemd-units |
||||
Requires(postun): systemd-units |
||||
|
||||
Patch1: 0001-ras-mc-ctl-Improve-error-summary-to-show-label-and-m.patch |
||||
Patch2: 0002-ras-record-make-the-code-more-generic.patch |
||||
Patch3: 0003-ras-record-rename-stmt-to-stmt_mc_event.patch |
||||
Patch4: 0004-ras-record-reorder-functions.patch |
||||
Patch5: 0005-ras-record-Make-the-code-easier-to-add-support-for-o.patch |
||||
Patch6: 0006-Add-support-to-record-AER-events.patch |
||||
Patch7: 0007-Add-support-to-store-MCE-events-at-the-database.patch |
||||
Patch8: 0008-ras-mc-ctl-add-summary-for-MCE-and-PCIe-AER-errors.patch |
||||
Patch9: 0009-ras-mc-ctl-report-errors-also-for-PCIe-AER-and-MCE.patch |
||||
Patch10: 0010-ras-mc-ctl-Fix-the-name-of-the-error-table-data.patch |
||||
Patch11: 0013-ras-mc-ctl-Improve-parser.patch |
||||
Patch12: 0014-ras-mc-ctl-Fix-label-register-with-2-layers.patch |
||||
Patch13: 0015-Add-an-example-of-labels-file.patch |
||||
Patch14: 0017-ras-mc-ctl-Fix-the-DIMM-layout-display.patch |
||||
Patch15: 0019-ras-mc-ctl-remove-completely-use-of-modprobe.patch |
||||
Patch16: 0022-mce-amd-k8.c-fix-a-warning.patch |
||||
Patch17: 0023-add-abrt-suppport-for-rasdaemon.patch |
||||
Patch18: 0026-rasdaemon-Add-record-option-to-rasdaemon-man-page.patch |
||||
Patch19: 0027-ras-mc-ctl-Print-useful-message-when-run-without-ras.patch |
||||
Patch20: 0028-Make-paths-in-the-systemd-services-configurable.patch |
||||
Patch21: 0031-Correct-ABRT-report-data.patch |
||||
Patch22: 0032-rasdaemon-handle-failures-of-snprintf.patch |
||||
Patch23: 0033-rasdaemon-correct-range-while-parsing-top-middle-and.patch |
||||
Patch24: 0034-rasdaemon-enable-recording-by-default.patch |
||||
Patch25: 0035-eMCA-support.patch |
||||
Patch26: 0036-rasdaemon-fix-some-errors-in-sqlite.patch |
||||
Patch27: 0037-rasdaemon-sqlite-truncates-some-MCE-fields-to-32-bit.patch |
||||
Patch28: 0038-rasdaemon-fix-mce-numfield-decoded-error.patch |
||||
Patch29: 0039-rasdaemon-do-not-assume-dimmX-directories-will-be-pr.patch |
||||
Patch30: 0040-rasdaemon-add-more-dell-labels.patch |
||||
Patch31: 0041-rasdaemon-add-support-for-Haswell.patch |
||||
Patch32: 0042-rasdaemon-decode-new-simple-error-code-number-6.patch |
||||
Patch33: 0043-rasdaemon-Add-missing-entry-to-Ivy-Bridge-memory-con.patch |
||||
Patch34: 0044-rasdaemon-Identify-Ivy-Bridge-properly.patch |
||||
Patch35: 0045-rasdaemon-add-support-for-Broadwell.patch |
||||
Patch36: 0046-rasdaemon-add-support-for-Knights-Landing.patch |
||||
Patch37: 0047-rasdaemon-properly-pring-message-strings-in-decode_b.patch |
||||
Patch38: 0048-rasdaemon-add-missing-semicolon-in-hsw_decode_model.patch |
||||
Patch39: 0049-rasdaemon-enable-IMC-status-usage-for-Haswell-E.patch |
||||
Patch40: 0050-rasdaemon-make-sure-the-error-is-valid-before-handli.patch |
||||
Patch41: 0051-rasdaemon-add-support-to-match-the-machine-by-system.patch |
||||
Patch42: 0052-rasdaemon-add-internal-errors-of-IA32_MC4_STATUS-for.patch |
||||
Patch43: 0053-rasdaemon-remove-a-space-from-mcgstatus_msg.patch |
||||
Patch44: 0054-rasdaemon-unnecessary-comma-for-empty-mc_location-st.patch |
||||
Patch45: 0055-rasdaemon-use-MCA-error-msg-as-error_msg.patch |
||||
Patch46: 0056-x86-rasdaemon-Add-support-to-log-Local-Machine-Check.patch |
||||
Patch47: 0057-rasdaemon-add-support-for-haswell-ex.patch |
||||
Patch48: 0058-rasdaemon-fix-typos-on-ras-mc-ctl-man-page.patch |
||||
Patch49: 0059-rasdaemon-Add-support-for-Knights-Landing-processor.patch |
||||
Patch50: 0060-mce-intel-knl-Fix-CodingStyle.patch |
||||
Patch51: 0061-Add-Broadwell-DE-MSCOD-values.patch |
||||
Patch52: 0062-Add-Broadwell-EP-EX-MSCOD-values.patch |
||||
# Patch53 was submitted upstream but not merged yet |
||||
Patch53: rasdaemon-dont_use_memerror_log_enable_on_knl.patch |
||||
Patch54: 0063-add_support_for_knights_mill.patch |
||||
Patch55: 0064-add_support_for_skylake.patch |
||||
Patch56: 0065-rasdaemon-Update-DIMM-labels-for-Dell-Servers.patch |
||||
Patch57: 0066-rasdaemon-Update-DIMM-labels-for-Intel-Skylake-serve.patch |
||||
Patch58: 0067-rasdaemon-add-support-for-non-standard-CPER-section-.patch |
||||
Patch59: 0068-rasdaemon-add-support-for-non-standard-error-decoder.patch |
||||
Patch60: 0069-rasdaemon-add-support-for-ARM-events.patch |
||||
Patch61: 0070-rasdaemon-ARM-fully-initialize-ras_arm_event.patch |
||||
|
||||
%description |
||||
%{name} is a RAS (Reliability, Availability and Serviceability) logging tool. |
||||
It currently records memory errors, using the EDAC tracing events. |
||||
EDAC is drivers in the Linux kernel that handle detection of ECC errors |
||||
from memory controllers for most chipsets on i386 and x86_64 architectures. |
||||
EDAC drivers for other architectures like arm also exists. |
||||
This userspace component consists of an init script which makes sure |
||||
EDAC drivers and DIMM labels are loaded at system startup, as well as |
||||
an utility for reporting current error counts from the EDAC sysfs files. |
||||
|
||||
%prep |
||||
%setup -q |
||||
%patch1 -p1 |
||||
%patch2 -p1 |
||||
%patch3 -p1 |
||||
%patch4 -p1 |
||||
%patch5 -p1 |
||||
%patch6 -p1 |
||||
%patch7 -p1 |
||||
%patch8 -p1 |
||||
%patch9 -p1 |
||||
%patch10 -p1 |
||||
%patch11 -p1 |
||||
%patch12 -p1 |
||||
%patch13 -p1 |
||||
%patch14 -p1 |
||||
%patch15 -p1 |
||||
%patch16 -p1 |
||||
%patch17 -p1 |
||||
%patch18 -p1 |
||||
%patch19 -p1 |
||||
%patch20 -p1 |
||||
%patch21 -p1 |
||||
%patch22 -p1 |
||||
%patch23 -p1 |
||||
%patch24 -p1 |
||||
%patch25 -p1 |
||||
%patch26 -p1 |
||||
%patch27 -p1 |
||||
%patch28 -p1 |
||||
%patch29 -p1 |
||||
%patch30 -p1 |
||||
%patch31 -p1 |
||||
%patch32 -p1 |
||||
%patch33 -p1 |
||||
%patch34 -p1 |
||||
%patch35 -p1 |
||||
%patch36 -p1 |
||||
%patch37 -p1 |
||||
%patch38 -p1 |
||||
%patch39 -p1 |
||||
%patch40 -p1 |
||||
%patch41 -p1 |
||||
%patch42 -p1 |
||||
%patch43 -p1 |
||||
%patch44 -p1 |
||||
%patch45 -p1 |
||||
%patch46 -p1 |
||||
%patch47 -p1 |
||||
%patch48 -p1 |
||||
%patch49 -p1 |
||||
%patch50 -p1 |
||||
%patch51 -p1 |
||||
%patch52 -p1 |
||||
%patch53 -p1 |
||||
%patch54 -p1 |
||||
%patch55 -p1 |
||||
%patch56 -p1 |
||||
%patch57 -p1 |
||||
%patch58 -p1 |
||||
%patch59 -p1 |
||||
%patch60 -p1 |
||||
%patch61 -p1 |
||||
|
||||
%build |
||||
autoreconf -vfi |
||||
%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-arm |
||||
make %{?_smp_mflags} |
||||
|
||||
%install |
||||
make install DESTDIR=%{buildroot} |
||||
install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service |
||||
install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service |
||||
install -D -p -m 0655 labels/* %{buildroot}%{_sysconfdir}/ras/dimm_labels.d |
||||
rm INSTALL %{buildroot}/usr/include/*.h |
||||
|
||||
%clean |
||||
rm -rf %{buildroot} |
||||
|
||||
%files |
||||
%defattr(-,root,root) |
||||
%doc AUTHORS ChangeLog COPYING README TODO |
||||
%{_sbindir}/rasdaemon |
||||
%{_sbindir}/ras-mc-ctl |
||||
%{_mandir}/*/* |
||||
%{_unitdir}/*.service |
||||
%{_sharedstatedir}/rasdaemon |
||||
%{_sysconfdir}/ras/dimm_labels.d |
||||
|
||||
%changelog |
||||
* Fri Feb 02 2018 Aristeu Rozanski <aris@redhat.com> 0.4.1-32.el7 |
||||
- Fixed covscan error [1520602] |
||||
|
||||
* Wed Jan 24 2018 Aristeu Rozanski <aris@redhat.com> 0.4.1-31.el7 |
||||
- Added ARM support [1520602] |
||||
|
||||
* Thu Oct 19 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-30.el7 |
||||
- Updated project url [1502400] |
||||
|
||||
* Wed Aug 23 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-29.el7 |
||||
- Updating Dell labels [1458938] |
||||
|
||||
* Tue May 30 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-28.el7 |
||||
- Bump release [1448113] |
||||
|
||||
* Tue May 30 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-28.el7 |
||||
- Identify as Knights Mill systems as such [1448113] |
||||
|
||||
* Mon May 8 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-27.el7 |
||||
- Fixed error found by covscan in the last patch [1377467] |
||||
|
||||
* Tue Apr 11 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-26.el7 |
||||
- add support for Skylake client and server [1377467] |
||||
|
||||
* Wed Mar 22 2017 Aristeu Rozanski <aris@redhat.com> 0.4.1-25.el7 |
||||
- add support for Knights Mill [1433862] |
||||
|
||||
* Wed Aug 24 2016 Aristeu Rozanski <aris@redaht.com> 0.4.1-24.el7 |
||||
- don't use MemError Log Enable on Knights Landing [1273326] |
||||
|
||||
* Fri Apr 15 2016 Aristeu Rozanski <aris@redhat.com> 0.4.1-23.el7 |
||||
- add Broadwell DE/EP/EX MSCOD values [1299512] |
||||
|
||||
* Mon Feb 08 2016 Aristeu Rozanski <aris@redhat.com> 0.4.1-22.el7 |
||||
- add missing upstream patches for Knights Landing [1273326] |
||||
- fix documentation typos [1247562] |
||||
|
||||
* Thu Dec 03 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-21.el7 |
||||
- add support for Knights Landing [1273326] |
||||
|
||||
* Wed Sep 30 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-20.el7 |
||||
- add support for Haswell EP/EX [1267137] |
||||
|
||||
* Mon Jul 27 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-19.el7 |
||||
- pull latest fixes by Seiichi Ikarashi from upstream [1243941] |
||||
|
||||
* Mon Jul 27 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-18.el7 |
||||
- don't depend on dmidecode on ppc64, fix typo [1244593] |
||||
|
||||
* Wed Jul 22 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-17.el7 |
||||
- don't depend on dmidecode on ppc64 [1244593] |
||||
|
||||
* Wed Jul 08 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-16.el7 |
||||
- allow label files to specify by system product name [1168340] |
||||
|
||||
* Wed Jun 03 2015 Aristeu Rozanski <aris@redhat.com> 0.4.1-15.el7 |
||||
- add support to Haswell and newer processors [1221912] |
||||
|
||||
* Tue Dec 16 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-14.el7 |
||||
- properly install the labels so it can be packaged [1073090] |
||||
|
||||
* Tue Dec 02 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-13.el7 |
||||
- updated labels patch to the new version submitted upstream [1073090] |
||||
|
||||
* Tue Nov 25 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-12.el7 |
||||
- fix on how sysfs tree is parsed for DIMMs [1073090] |
||||
- include new Dell labels [1073090] |
||||
|
||||
* Fri Oct 10 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-11.el7 |
||||
- don't require dmidecode for ppc64le [1151385] |
||||
|
||||
* Fri Aug 22 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-10.el7 |
||||
- use power64 macro instead, we have a driver enabled on ppc64 [1125663] |
||||
|
||||
* Mon Aug 18 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-9.el7 |
||||
- eMCA support [1085519] |
||||
- enable ppc64le [1125663] |
||||
|
||||
* Mon Jun 09 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-8.el7 |
||||
- Revert patch in 0.4.1-7.el7, replaced by a better patch |
||||
- Fix sizeof() usage on pointer [1035742] |
||||
- Added macro to build the package with stack protector [1092558] |
||||
- Handle failures of snprintf() [1035741] |
||||
- Fix range checking on signed char variables [1035746] |
||||
- Added aarch64 as architecture [1070973] |
||||
- Start recording by default [1117366] [1117367] |
||||
|
||||
* Fri Jan 17 2014 Aristeu Rozanski <aris@redhat.com> 0.4.1-7.el7 |
||||
- Fixed rasdaemon.service executable path [1043478] |
||||
|
||||
* Fri Dec 27 2013 Daniel Mach <dmach@redhat.com> - 0.4.1-6 |
||||
- Mass rebuild 2013-12-27 |
||||
|
||||
* Tue Aug 20 2013 Aristeu Rozanski <aris@redhat.com> 0.4.1-5.el7 |
||||
- Applied Jarod Wilson fixes required to pass rpmlint tests |
||||
|
||||
* Thu Aug 15 2013 Aristeu Rozanski <aris@redhat.com> 0.4.1-4.el7 |
||||
- Rebuild |
||||
|
||||
* Sun Jun 2 2013 Peter Robinson <pbrobinson@fedoraproject.org> 0.4.1-3 |
||||
- ARM has EDMA drivers (currently supported in Calxeda highbank) |
||||
|
||||
* Tue May 28 2013 Mauro Carvalho Chehab <mchehab@redhat.com> 0.4.1-2 |
||||
- Fix the name of perl-DBD-SQLite package |
||||
|
||||
* Tue May 28 2013 Mauro Carvalho Chehab <mchehab@redhat.com> 0.4.1-1 |
||||
- Updated to version 0.4.1 with contains some bug fixes |
||||
|
||||
* Mon May 27 2013 Mauro Carvalho Chehab <mchehab@redhat.com> 0.4.0-1 |
||||
- Updated to version 0.4.0 and added support for mce, aer and sqlite3 storage |
||||
|
||||
* Mon May 20 2013 Mauro Carvalho Chehab <mchehab@redhat.com> 0.3.0-1 |
||||
- Package created |
||||
|
Loading…
Reference in new issue