You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

642 lines
14 KiB

From c6ed1e1af9356cdce1eaa652061dd6e4eb32d283 Mon Sep 17 00:00:00 2001
From: Junliang Li <lijunliang.dna@gmail.com>
Date: Thu, 13 Feb 2014 10:39:53 +0800
Subject: [PATCH 23/32] add abrt suppport for rasdaemon
Adds abrt as another error mechanism for the rasdaemon.
This patch does:
1) read ras event (mc,mce and aer)
2) setup a abrt-server unix socket
3) write messages follow ABRT server protocol, set event
info into backtrace zone.
4) commit report.
For now, it depends on ABRT to limit flood reports.
Signed-off-by: Junliang Li <lijunliang.dna@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
Makefile.am | 5 +-
configure.ac | 9 +
ras-aer-handler.c | 6 +
ras-events.h | 3 +
ras-mc-handler.c | 7 +
ras-mce-handler.c | 6 +
ras-report.c | 429 +++++++++++++++++++++++++++++++++++++++++++++++++++++
ras-report.h | 39 +++++
8 files changed, 503 insertions(+), 1 deletions(-)
create mode 100644 ras-report.c
create mode 100644 ras-report.h
diff --git a/Makefile.am b/Makefile.am
index 473ce98..c1668b4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,10 +17,13 @@ if WITH_MCE
mce-intel-dunnington.c mce-intel-tulsa.c \
mce-intel-sb.c mce-intel-ivb.c
endif
+if WITH_ABRT_REPORT
+ rasdaemon_SOURCES += ras-report.c
+endif
rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \
- ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h
+ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h
# This rule can't be called with more than one Makefile job (like make -j8)
# I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index 4fe6ef2..0ea962e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [
])
AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes])
+AC_ARG_ENABLE([abrt_report],
+ AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)]))
+
+AS_IF([test "x$enable_abrt_report" = "xyes"], [
+ AC_DEFINE(HAVE_ABRT_REPORT,1,"have report event to ABRT")
+ AC_SUBST([WITH_ABRT_REPORT])
+])
+AM_CONDITIONAL([WITH_ABRT_REPORT], [test x$enable_abrt_report = xyes])
+
test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
diff --git a/ras-aer-handler.c b/ras-aer-handler.c
index e5abaca..50526af 100644
--- a/ras-aer-handler.c
+++ b/ras-aer-handler.c
@@ -24,6 +24,7 @@
#include "ras-record.h"
#include "ras-logger.h"
#include "bitfield.h"
+#include "ras-report.h"
static const char *aer_errors[32] = {
/* Correctable errors */
@@ -115,5 +116,10 @@ int ras_aer_event_handler(struct trace_seq *s,
ras_store_aer_event(ras, &ev);
#endif
+#ifdef HAVE_ABRT_REPORT
+ /* Report event to ABRT */
+ ras_report_aer_event(ras, &ev);
+#endif
+
return 0;
}
diff --git a/ras-events.h b/ras-events.h
index 554a95e..64e045a 100644
--- a/ras-events.h
+++ b/ras-events.h
@@ -47,6 +47,9 @@ struct ras_events {
/* For the mce handler */
struct mce_priv *mce_priv;
+
+ /* For ABRT socket*/
+ int socketfd;
};
struct pthread_data {
diff --git a/ras-mc-handler.c b/ras-mc-handler.c
index 5c24f65..ffb3805 100644
--- a/ras-mc-handler.c
+++ b/ras-mc-handler.c
@@ -23,6 +23,7 @@
#include "ras-mc-handler.h"
#include "ras-record.h"
#include "ras-logger.h"
+#include "ras-report.h"
int ras_mc_event_handler(struct trace_seq *s,
struct pevent_record *record,
@@ -189,6 +190,12 @@ int ras_mc_event_handler(struct trace_seq *s,
/* Insert data into the SGBD */
ras_store_mc_event(ras, &ev);
+
+#ifdef HAVE_ABRT_REPORT
+ /* Report event to ABRT */
+ ras_report_mc_event(ras, &ev);
+#endif
+
return 0;
parse_error:
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
index 59e8d05..1431049 100644
--- a/ras-mce-handler.c
+++ b/ras-mce-handler.c
@@ -26,6 +26,7 @@
#include "ras-mce-handler.h"
#include "ras-record.h"
#include "ras-logger.h"
+#include "ras-report.h"
/*
* The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
@@ -401,5 +402,10 @@ int ras_mce_event_handler(struct trace_seq *s,
ras_store_mce_record(ras, &e);
#endif
+#ifdef HAVE_ABRT_REPORT
+ /* Report event to ABRT */
+ ras_report_mce_event(ras, &e);
+#endif
+
return 0;
}
diff --git a/ras-report.c b/ras-report.c
new file mode 100644
index 0000000..d3e4a79
--- /dev/null
+++ b/ras-report.c
@@ -0,0 +1,429 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "ras-report.h"
+
+static int setup_report_socket(void){
+ int sockfd = -1;
+ int rc = -1;
+ struct sockaddr_un addr;
+
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0){
+ return -1;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, ABRT_SOCKET, strlen(ABRT_SOCKET));
+
+ rc = connect(sockfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un));
+ if (rc < 0){
+ return -1;
+ }
+
+ return sockfd;
+}
+
+static int commit_report_basic(int sockfd){
+ char buf[INPUT_BUFFER_SIZE];
+ struct utsname un;
+ int rc = -1;
+
+ if(sockfd < 0){
+ return rc;
+ }
+
+ memset(buf, 0, INPUT_BUFFER_SIZE);
+ memset(&un, 0, sizeof(struct utsname));
+
+ rc = uname(&un);
+ if(rc < 0){
+ return rc;
+ }
+
+ /*
+ * ABRT server protocol
+ */
+ sprintf(buf, "PUT / HTTP/1.1\r\n\r\n");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ return -1;
+ }
+
+ sprintf(buf, "PID=%d", (int)getpid());
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ return -1;
+ }
+
+ sprintf(buf, "EXECUTABLE=/boot/vmlinuz-%s", un.release);
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ return -1;
+ }
+
+ sprintf(buf, "BASENAME=%s", "rasdaemon");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * add "DONE" string to finish message.
+ */
+static int commit_report_done(int sockfd){
+ int rc = -1;
+
+ if(sockfd < 0){
+ return -1;
+ }
+
+ rc = write(sockfd, "DONE\0", strlen("DONE\0"));
+ if(rc < strlen("DONE\0")){
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){
+ char bt_buf[MAX_BACKTRACE_SIZE];
+
+ if(!buf || !ev)
+ return -1;
+
+ sprintf(bt_buf, "BACKTRACE= " \
+ "timestamp=%s\n" \
+ "error_count=%d\n" \
+ "error_type=%s\n" \
+ "msg=%s\n" \
+ "label=%s\n" \
+ "mc_index=%c\n" \
+ "top_layer=%c\n" \
+ "middle_layer=%c\n" \
+ "lower_layer=%c\n" \
+ "address=%llu\n" \
+ "grain=%llu\n" \
+ "syndrome=%llu\n" \
+ "driver_detail=%s\n", \
+ ev->timestamp, \
+ ev->error_count, \
+ ev->error_type, \
+ ev->msg, \
+ ev->label, \
+ ev->mc_index, \
+ ev->top_layer, \
+ ev->middle_layer, \
+ ev->lower_layer, \
+ ev->address, \
+ ev->grain, \
+ ev->syndrome, \
+ ev->driver_detail);
+
+ strcat(buf, bt_buf);
+
+ return 0;
+}
+
+static int set_mce_event_backtrace(char *buf, struct mce_event *ev){
+ char bt_buf[MAX_BACKTRACE_SIZE];
+
+ if(!buf || !ev)
+ return -1;
+
+ sprintf(bt_buf, "BACKTRACE=" \
+ "timestamp=%s\n" \
+ "bank_name=%s\n" \
+ "error_msg=%s\n" \
+ "mcgstatus_msg=%s\n" \
+ "mcistatus_msg=%s\n" \
+ "mcastatus_msg=%s\n" \
+ "user_action=%s\n" \
+ "mc_location=%s\n" \
+ "mcgcap=%lu\n" \
+ "mcgstatus=%lu\n" \
+ "status=%lu\n" \
+ "addr=%lu\n" \
+ "misc=%lu\n" \
+ "ip=%lu\n" \
+ "tsc=%lu\n" \
+ "walltime=%lu\n" \
+ "cpu=%u\n" \
+ "cpuid=%u\n" \
+ "apicid=%u\n" \
+ "socketid=%u\n" \
+ "cs=%d\n" \
+ "bank=%d\n" \
+ "cpuvendor=%d\n", \
+ ev->timestamp, \
+ ev->bank_name, \
+ ev->error_msg, \
+ ev->mcgstatus_msg, \
+ ev->mcistatus_msg, \
+ ev->mcastatus_msg, \
+ ev->user_action, \
+ ev->mc_location, \
+ ev->mcgcap, \
+ ev->mcgstatus, \
+ ev->status, \
+ ev->addr, \
+ ev->misc, \
+ ev->ip, \
+ ev->tsc, \
+ ev->walltime, \
+ ev->cpu, \
+ ev->cpuid, \
+ ev->apicid, \
+ ev->socketid, \
+ ev->cs, \
+ ev->bank, \
+ ev->cpuvendor);
+
+ strcat(buf, bt_buf);
+
+ return 0;
+}
+
+static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){
+ char bt_buf[MAX_BACKTRACE_SIZE];
+
+ if(!buf || !ev)
+ return -1;
+
+ sprintf(bt_buf, "BACKTRACE=" \
+ "timestamp=%s\n" \
+ "error_type=%s\n" \
+ "dev_name=%s\n" \
+ "msg=%s\n", \
+ ev->timestamp, \
+ ev->error_type, \
+ ev->dev_name, \
+ ev->msg);
+
+ strcat(buf, bt_buf);
+
+ return 0;
+}
+
+static int commit_report_backtrace(int sockfd, int type, void *ev){
+ char buf[MAX_BACKTRACE_SIZE];
+ char *pbuf = buf;
+ int rc = -1;
+ int buf_len = 0;
+
+ if(sockfd < 0 || !ev){
+ return -1;
+ }
+
+ memset(buf, 0, MAX_BACKTRACE_SIZE);
+
+ switch(type){
+ case MC_EVENT:
+ rc = set_mc_event_backtrace(buf, (struct ras_mc_event *)ev);
+ break;
+ case AER_EVENT:
+ rc = set_aer_event_backtrace(buf, (struct ras_aer_event *)ev);
+ break;
+ case MCE_EVENT:
+ rc = set_mce_event_backtrace(buf, (struct mce_event *)ev);
+ break;
+ default:
+ return -1;
+ }
+
+ if(rc < 0){
+ return -1;
+ }
+
+ buf_len = strlen(buf);
+
+ for(;buf_len > INPUT_BUFFER_SIZE - 1; buf_len -= (INPUT_BUFFER_SIZE - 1)){
+ rc = write(sockfd, pbuf, INPUT_BUFFER_SIZE - 1);
+ if(rc < INPUT_BUFFER_SIZE - 1){
+ return -1;
+ }
+
+ pbuf = pbuf + INPUT_BUFFER_SIZE - 1;
+ }
+
+ rc = write(sockfd, pbuf, buf_len + 1);
+ if(rc < buf_len){
+ return -1;
+ }
+
+ return 0;
+}
+
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){
+ char buf[MAX_MESSAGE_SIZE];
+ int sockfd = -1;
+ int done = 0;
+ int rc = -1;
+
+ memset(buf, 0, sizeof(buf));
+
+ sockfd = setup_report_socket();
+ if(sockfd < 0){
+ return -1;
+ }
+
+ rc = commit_report_basic(sockfd);
+ if(rc < 0){
+ goto mc_fail;
+ }
+
+ rc = commit_report_backtrace(sockfd, MC_EVENT, ev);
+ if(rc < 0){
+ goto mc_fail;
+ }
+
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-mc");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto mc_fail;
+ }
+
+ sprintf(buf, "REASON=%s", "EDAC driver report problem");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto mc_fail;
+ }
+
+ rc = commit_report_done(sockfd);
+ if(rc < 0){
+ goto mc_fail;
+ }
+
+ done = 1;
+
+mc_fail:
+
+ if(sockfd > 0){
+ close(sockfd);
+ }
+
+ if(done){
+ return 0;
+ }else{
+ return -1;
+ }
+}
+
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){
+ char buf[MAX_MESSAGE_SIZE];
+ int sockfd = 0;
+ int done = 0;
+ int rc = -1;
+
+ memset(buf, 0, sizeof(buf));
+
+ sockfd = setup_report_socket();
+ if(sockfd < 0){
+ return -1;
+ }
+
+ rc = commit_report_basic(sockfd);
+ if(rc < 0){
+ goto aer_fail;
+ }
+
+ rc = commit_report_backtrace(sockfd, AER_EVENT, ev);
+ if(rc < 0){
+ goto aer_fail;
+ }
+
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-aer");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto aer_fail;
+ }
+
+ sprintf(buf, "REASON=%s", "PCIe AER driver report problem");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto aer_fail;
+ }
+
+ rc = commit_report_done(sockfd);
+ if(rc < 0){
+ goto aer_fail;
+ }
+
+ done = 1;
+
+aer_fail:
+
+ if(sockfd > 0){
+ close(sockfd);
+ }
+
+ if(done){
+ return 0;
+ }else{
+ return -1;
+ }
+}
+
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){
+ char buf[MAX_MESSAGE_SIZE];
+ int sockfd = 0;
+ int done = 0;
+ int rc = -1;
+
+ memset(buf, 0, sizeof(buf));
+
+ sockfd = setup_report_socket();
+ if(sockfd < 0){
+ return -1;
+ }
+
+ rc = commit_report_basic(sockfd);
+ if(rc < 0){
+ goto mce_fail;
+ }
+
+ rc = commit_report_backtrace(sockfd, MCE_EVENT, ev);
+ if(rc < 0){
+ goto mce_fail;
+ }
+
+ sprintf(buf, "ANALYZER=%s", "rasdaemon-mce");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto mce_fail;
+ }
+
+ sprintf(buf, "REASON=%s", "Machine Check driver report problem");
+ rc = write(sockfd, buf, strlen(buf) + 1);
+ if(rc < strlen(buf) + 1){
+ goto mce_fail;
+ }
+
+ rc = commit_report_done(sockfd);
+ if(rc < 0){
+ goto mce_fail;
+ }
+
+ done = 1;
+
+mce_fail:
+
+ if(sockfd > 0){
+ close(sockfd);
+ }
+
+ if(done){
+ return 0;
+ }else{
+ return -1;
+ }
+}
diff --git a/ras-report.h b/ras-report.h
new file mode 100644
index 0000000..7920cdf
--- /dev/null
+++ b/ras-report.h
@@ -0,0 +1,39 @@
+#ifndef __RAS_REPORT_H
+#define __RAS_REPORT_H
+
+#include "ras-record.h"
+#include "ras-events.h"
+#include "ras-mc-handler.h"
+#include "ras-mce-handler.h"
+#include "ras-aer-handler.h"
+
+/* Maximal length of backtrace. */
+#define MAX_BACKTRACE_SIZE (1024*1024)
+/* Amount of data received from one client for a message before reporting error. */
+#define MAX_MESSAGE_SIZE (4*MAX_BACKTRACE_SIZE)
+/* Maximal number of characters read from socket at once. */
+#define INPUT_BUFFER_SIZE (8*1024)
+/* ABRT socket file */
+#define ABRT_SOCKET "/var/run/abrt/abrt.socket"
+
+enum {
+ MC_EVENT,
+ MCE_EVENT,
+ AER_EVENT
+};
+
+#ifdef HAVE_ABRT_REPORT
+
+int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev);
+int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev);
+int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev);
+
+#else
+
+static inline int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; };
+static inline int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; };
+static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; };
+
+#endif
+
+#endif
--
1.7.1