You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3458 lines
92 KiB

---
Makefile.inc | 1
libmultipath/Makefile | 7
libmultipath/hwtable.c | 1
libmultipath/nvme-ioctl.c | 869 ++++++++++++++++++++
libmultipath/nvme-ioctl.h | 139 +++
libmultipath/nvme-lib.c | 49 +
libmultipath/nvme-lib.h | 39
libmultipath/nvme/argconfig.h | 99 ++
libmultipath/nvme/json.h | 87 ++
libmultipath/nvme/linux/nvme.h | 1450 +++++++++++++++++++++++++++++++++++
libmultipath/nvme/linux/nvme_ioctl.h | 67 +
libmultipath/nvme/nvme.h | 163 +++
libmultipath/nvme/plugin.h | 36
libmultipath/prio.h | 1
libmultipath/prioritizers/Makefile | 4
libmultipath/prioritizers/ana.c | 236 +++++
libmultipath/propsel.c | 10
libmultipath/util.h | 2
multipath/multipath.conf.5 | 3
19 files changed, 3258 insertions(+), 5 deletions(-)
Index: multipath-tools-130222/libmultipath/nvme/argconfig.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/argconfig.h
@@ -0,0 +1,99 @@
+////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 PMC-Sierra, Inc.
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+//
+// Author: Logan Gunthorpe <logang@deltatee.com>
+// Logan Gunthorpe
+//
+// Date: Oct 23 2014
+//
+// Description:
+// Header file for argconfig.c
+//
+////////////////////////////////////////////////////////////////////////
+
+#ifndef argconfig_H
+#define argconfig_H
+
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+
+enum argconfig_types {
+ CFG_NONE,
+ CFG_STRING,
+ CFG_INT,
+ CFG_SIZE,
+ CFG_LONG,
+ CFG_LONG_SUFFIX,
+ CFG_DOUBLE,
+ CFG_BOOL,
+ CFG_BYTE,
+ CFG_SHORT,
+ CFG_POSITIVE,
+ CFG_INCREMENT,
+ CFG_SUBOPTS,
+ CFG_FILE_A,
+ CFG_FILE_W,
+ CFG_FILE_R,
+ CFG_FILE_AP,
+ CFG_FILE_WP,
+ CFG_FILE_RP,
+};
+
+struct argconfig_commandline_options {
+ const char *option;
+ const char short_option;
+ const char *meta;
+ enum argconfig_types config_type;
+ void *default_value;
+ int argument_type;
+ const char *help;
+};
+
+#define CFG_MAX_SUBOPTS 500
+#define MAX_HELP_FUNC 20
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void argconfig_help_func(void);
+void argconfig_append_usage(const char *str);
+void argconfig_print_help(const char *program_desc,
+ const struct argconfig_commandline_options *options);
+int argconfig_parse(int argc, char *argv[], const char *program_desc,
+ const struct argconfig_commandline_options *options,
+ void *config_out, size_t config_size);
+int argconfig_parse_subopt_string(char *string, char **options,
+ size_t max_options);
+unsigned argconfig_parse_comma_sep_array(char *string, int *ret,
+ unsigned max_length);
+unsigned argconfig_parse_comma_sep_array_long(char *string,
+ unsigned long long *ret,
+ unsigned max_length);
+void argconfig_register_help_func(argconfig_help_func * f);
+
+void print_word_wrapped(const char *s, int indent, int start);
+#ifdef __cplusplus
+}
+#endif
+#endif
Index: multipath-tools-130222/libmultipath/nvme/json.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/json.h
@@ -0,0 +1,87 @@
+#ifndef __JSON__H
+#define __JSON__H
+
+struct json_object;
+struct json_array;
+struct json_pair;
+
+#define JSON_TYPE_STRING 0
+#define JSON_TYPE_INTEGER 1
+#define JSON_TYPE_FLOAT 2
+#define JSON_TYPE_OBJECT 3
+#define JSON_TYPE_ARRAY 4
+#define JSON_TYPE_UINT 5
+#define JSON_PARENT_TYPE_PAIR 0
+#define JSON_PARENT_TYPE_ARRAY 1
+struct json_value {
+ int type;
+ union {
+ long long integer_number;
+ unsigned long long uint_number;
+ long double float_number;
+ char *string;
+ struct json_object *object;
+ struct json_array *array;
+ };
+ int parent_type;
+ union {
+ struct json_pair *parent_pair;
+ struct json_array *parent_array;
+ };
+};
+
+struct json_array {
+ struct json_value **values;
+ int value_cnt;
+ struct json_value *parent;
+};
+
+struct json_object {
+ struct json_pair **pairs;
+ int pair_cnt;
+ struct json_value *parent;
+};
+
+struct json_pair {
+ char *name;
+ struct json_value *value;
+ struct json_object *parent;
+};
+
+struct json_object *json_create_object(void);
+struct json_array *json_create_array(void);
+
+void json_free_object(struct json_object *obj);
+
+int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...);
+#define json_object_add_value_int(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val))
+#define json_object_add_value_uint(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val))
+#define json_object_add_value_float(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val))
+#define json_object_add_value_string(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val))
+#define json_object_add_value_object(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val))
+#define json_object_add_value_array(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val))
+int json_array_add_value_type(struct json_array *array, int type, ...);
+#define json_array_add_value_int(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val))
+#define json_array_add_value_uint(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_UINT, (val))
+#define json_array_add_value_float(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val))
+#define json_array_add_value_string(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_STRING, (val))
+#define json_array_add_value_object(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val))
+#define json_array_add_value_array(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val))
+
+#define json_array_last_value_object(obj) \
+ (obj->values[obj->value_cnt - 1]->object)
+
+void json_print_object(struct json_object *obj, void *);
+#endif
Index: multipath-tools-130222/libmultipath/nvme/nvme.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/nvme.h
@@ -0,0 +1,163 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_H
+#define _NVME_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <endian.h>
+#include "plugin.h"
+#include "json.h"
+
+#define unlikely(x) x
+
+#ifdef LIBUUID
+#include <uuid/uuid.h>
+#else
+typedef struct {
+ uint8_t b[16];
+} uuid_t;
+#endif
+
+#include "linux/nvme.h"
+
+struct nvme_effects_log_page {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+struct nvme_error_log_page {
+ __u64 error_count;
+ __u16 sqid;
+ __u16 cmdid;
+ __u16 status_field;
+ __u16 parm_error_location;
+ __u64 lba;
+ __u32 nsid;
+ __u8 vs;
+ __u8 resv[3];
+ __u64 cs;
+ __u8 resv2[24];
+};
+
+struct nvme_firmware_log_page {
+ __u8 afi;
+ __u8 resv[7];
+ __u64 frs[7];
+ __u8 resv2[448];
+};
+
+/* idle and active power scales occupy the last 2 bits of the field */
+#define POWER_SCALE(s) ((s) >> 6)
+
+struct nvme_host_mem_buffer {
+ __u32 hsize;
+ __u32 hmdlal;
+ __u32 hmdlau;
+ __u32 hmdlec;
+ __u8 rsvd16[4080];
+};
+
+struct nvme_auto_pst {
+ __u32 data;
+ __u32 rsvd32;
+};
+
+struct nvme_timestamp {
+ __u8 timestamp[6];
+ __u8 attr;
+ __u8 rsvd;
+};
+
+struct nvme_controller_list {
+ __le16 num;
+ __le16 identifier[];
+};
+
+struct nvme_bar_cap {
+ __u16 mqes;
+ __u8 ams_cqr;
+ __u8 to;
+ __u16 bps_css_nssrs_dstrd;
+ __u8 mpsmax_mpsmin;
+ __u8 reserved;
+};
+
+#ifdef __CHECKER__
+#define __force __attribute__((force))
+#else
+#define __force
+#endif
+
+#define cpu_to_le16(x) \
+ ((__force __le16)htole16(x))
+#define cpu_to_le32(x) \
+ ((__force __le32)htole32(x))
+#define cpu_to_le64(x) \
+ ((__force __le64)htole64(x))
+
+#define le16_to_cpu(x) \
+ le16toh((__force __u16)(x))
+#define le32_to_cpu(x) \
+ le32toh((__force __u32)(x))
+#define le64_to_cpu(x) \
+ le64toh((__force __u64)(x))
+
+#define MAX_LIST_ITEMS 256
+struct list_item {
+ char node[1024];
+ struct nvme_id_ctrl ctrl;
+ int nsid;
+ struct nvme_id_ns ns;
+ unsigned block;
+};
+
+struct ctrl_list_item {
+ char *name;
+ char *address;
+ char *transport;
+ char *state;
+ char *ana_state;
+};
+
+struct subsys_list_item {
+ char *name;
+ char *subsysnqn;
+ int nctrls;
+ struct ctrl_list_item *ctrls;
+};
+
+enum {
+ NORMAL,
+ JSON,
+ BINARY,
+};
+
+void register_extension(struct plugin *plugin);
+
+#include "argconfig.h"
+int parse_and_open(int argc, char **argv, const char *desc,
+ const struct argconfig_commandline_options *clo, void *cfg, size_t size);
+
+extern const char *devicename;
+
+int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root));
+int validate_output_format(char *format);
+
+struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid);
+void free_subsys_list(struct subsys_list_item *slist, int n);
+char *nvme_char_from_block(char *block);
+#endif /* _NVME_H */
Index: multipath-tools-130222/libmultipath/nvme/plugin.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/plugin.h
@@ -0,0 +1,36 @@
+#ifndef PLUGIN_H
+#define PLUGIN_H
+
+#include <stdbool.h>
+
+struct program {
+ const char *name;
+ const char *version;
+ const char *usage;
+ const char *desc;
+ const char *more;
+ struct command **commands;
+ struct plugin *extensions;
+};
+
+struct plugin {
+ const char *name;
+ const char *desc;
+ struct command **commands;
+ struct program *parent;
+ struct plugin *next;
+ struct plugin *tail;
+};
+
+struct command {
+ char *name;
+ char *help;
+ int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin);
+ char *alias;
+};
+
+void usage(struct plugin *plugin);
+void general_help(struct plugin *plugin);
+int handle_plugin(int argc, char **argv, struct plugin *plugin);
+
+#endif
Index: multipath-tools-130222/libmultipath/nvme/linux/nvme.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/linux/nvme.h
@@ -0,0 +1,1450 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+
+/* NQN names in commands fields specified one size */
+#define NVMF_NQN_FIELD_LEN 256
+
+/* However the max length of a qualified name is another size */
+#define NVMF_NQN_SIZE 223
+
+#define NVMF_TRSVCID_SIZE 32
+#define NVMF_TRADDR_SIZE 256
+#define NVMF_TSAS_SIZE 256
+
+#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
+
+#define NVME_RDMA_IP_PORT 4420
+
+#define NVME_NSID_ALL 0xffffffff
+
+enum nvme_subsys_type {
+ NVME_NQN_DISC = 1, /* Discovery type target subsystem */
+ NVME_NQN_NVME = 2, /* NVME type target subsystem */
+};
+
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
+enum {
+ NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */
+ NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */
+ NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */
+ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */
+ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */
+};
+
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
+enum {
+ NVMF_TRTYPE_RDMA = 1, /* RDMA */
+ NVMF_TRTYPE_FC = 2, /* Fibre Channel */
+ NVMF_TRTYPE_TCP = 3, /* TCP */
+ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */
+ NVMF_TRTYPE_MAX,
+};
+
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
+enum {
+ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
+ NVMF_TREQ_REQUIRED = 1, /* Required */
+ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+ NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* SQ flow control disable supported */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */
+ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */
+ NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */
+ NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */
+ NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */
+ NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */
+};
+
+/* RDMA Connection Management Service Type codes for Discovery Log Page
+ * entry TSAS RDMA_CMS field
+ */
+enum {
+ NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
+};
+
+/* TCP port security type for Discovery Log Page entry TSAS
+ */
+enum {
+ NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
+ NVMF_TCP_SECTYPE_TLS = 1, /* Transport Layer Security */
+};
+
+#define NVME_AQ_DEPTH 32
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+
+/*
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
+ * NVM-Express 1.2 specification, section 4.1.2.
+ */
+#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1)
+
+enum {
+ NVME_REG_CAP = 0x0000, /* Controller Capabilities */
+ NVME_REG_VS = 0x0008, /* Version */
+ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */
+ NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */
+ NVME_REG_CC = 0x0014, /* Controller Configuration */
+ NVME_REG_CSTS = 0x001c, /* Controller Status */
+ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */
+ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */
+ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */
+ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
+ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
+ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */
+ NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */
+ NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer Location */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
+};
+
+#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
+#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
+#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
+#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
+#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
+
+#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
+#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
+#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff)
+#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf)
+
+#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10)
+#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8)
+#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4)
+#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2)
+#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1)
+
+/*
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
+ * (In bytes and specified as a power of two (2^n)).
+ */
+#define NVME_NVM_IOSQES 6
+#define NVME_NVM_IOCQES 4
+
+enum {
+ NVME_CC_ENABLE = 1 << 0,
+ NVME_CC_CSS_NVM = 0 << 4,
+ NVME_CC_EN_SHIFT = 0,
+ NVME_CC_CSS_SHIFT = 4,
+ NVME_CC_MPS_SHIFT = 7,
+ NVME_CC_AMS_SHIFT = 11,
+ NVME_CC_SHN_SHIFT = 14,
+ NVME_CC_IOSQES_SHIFT = 16,
+ NVME_CC_IOCQES_SHIFT = 20,
+ NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
+ NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
+ NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
+ NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
+ NVME_CSTS_RDY = 1 << 0,
+ NVME_CSTS_CFS = 1 << 1,
+ NVME_CSTS_NSSRO = 1 << 4,
+ NVME_CSTS_PP = 1 << 5,
+ NVME_CSTS_SHST_NORMAL = 0 << 2,
+ NVME_CSTS_SHST_OCCUR = 1 << 2,
+ NVME_CSTS_SHST_CMPLT = 2 << 2,
+ NVME_CSTS_SHST_MASK = 3 << 2,
+};
+
+struct nvme_id_power_state {
+ __le16 max_power; /* centiwatts */
+ __u8 rsvd2;
+ __u8 flags;
+ __le32 entry_lat; /* microseconds */
+ __le32 exit_lat; /* microseconds */
+ __u8 read_tput;
+ __u8 read_lat;
+ __u8 write_tput;
+ __u8 write_lat;
+ __le16 idle_power;
+ __u8 idle_scale;
+ __u8 rsvd19;
+ __le16 active_power;
+ __u8 active_work_scale;
+ __u8 rsvd23[9];
+};
+
+enum {
+ NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
+ NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
+};
+
+struct nvme_id_ctrl {
+ __le16 vid;
+ __le16 ssvid;
+ char sn[20];
+ char mn[40];
+ char fr[8];
+ __u8 rab;
+ __u8 ieee[3];
+ __u8 cmic;
+ __u8 mdts;
+ __le16 cntlid;
+ __le32 ver;
+ __le32 rtd3r;
+ __le32 rtd3e;
+ __le32 oaes;
+ __le32 ctratt;
+ __le16 rrls;
+ __u8 rsvd102[154];
+ __le16 oacs;
+ __u8 acl;
+ __u8 aerl;
+ __u8 frmw;
+ __u8 lpa;
+ __u8 elpe;
+ __u8 npss;
+ __u8 avscc;
+ __u8 apsta;
+ __le16 wctemp;
+ __le16 cctemp;
+ __le16 mtfa;
+ __le32 hmpre;
+ __le32 hmmin;
+ __u8 tnvmcap[16];
+ __u8 unvmcap[16];
+ __le32 rpmbs;
+ __le16 edstt;
+ __u8 dsto;
+ __u8 fwug;
+ __le16 kas;
+ __le16 hctma;
+ __le16 mntmt;
+ __le16 mxtmt;
+ __le32 sanicap;
+ __le32 hmminds;
+ __le16 hmmaxd;
+ __le16 nsetidmax;
+ __u8 rsvd340[2];
+ __u8 anatt;
+ __u8 anacap;
+ __le32 anagrpmax;
+ __le32 nanagrpid;
+ __u8 rsvd352[160];
+ __u8 sqes;
+ __u8 cqes;
+ __le16 maxcmd;
+ __le32 nn;
+ __le16 oncs;
+ __le16 fuses;
+ __u8 fna;
+ __u8 vwc;
+ __le16 awun;
+ __le16 awupf;
+ __u8 nvscc;
+ __u8 nwpc;
+ __le16 acwu;
+ __u8 rsvd534[2];
+ __le32 sgls;
+ __le32 mnan;
+ __u8 rsvd544[224];
+ char subnqn[256];
+ __u8 rsvd1024[768];
+ __le32 ioccsz;
+ __le32 iorcsz;
+ __le16 icdoff;
+ __u8 ctrattr;
+ __u8 msdbd;
+ __u8 rsvd1804[244];
+ struct nvme_id_power_state psd[32];
+ __u8 vs[1024];
+};
+
+enum {
+ NVME_CTRL_ONCS_COMPARE = 1 << 0,
+ NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
+ NVME_CTRL_ONCS_DSM = 1 << 2,
+ NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
+ NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_VWC_PRESENT = 1 << 0,
+ NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
+ NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
+ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+ NVME_CTRL_CTRATT_128_ID = 1 << 0,
+ NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1,
+ NVME_CTRL_CTRATT_NVM_SETS = 1 << 2,
+ NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3,
+ NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4,
+ NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5,
+};
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 dlfeat;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __u8 rsvd64[28];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[16];
+ __u8 rsvd192[192];
+ __u8 vs[3712];
+};
+
+enum {
+ NVME_ID_CNS_NS = 0x00,
+ NVME_ID_CNS_CTRL = 0x01,
+ NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
+ NVME_ID_CNS_NS_DESC_LIST = 0x03,
+ NVME_ID_CNS_NVMSET_LIST = 0x04,
+ NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
+ NVME_ID_CNS_NS_PRESENT = 0x11,
+ NVME_ID_CNS_CTRL_NS_LIST = 0x12,
+ NVME_ID_CNS_CTRL_LIST = 0x13,
+};
+
+enum {
+ NVME_DIR_IDENTIFY = 0x00,
+ NVME_DIR_STREAMS = 0x01,
+ NVME_DIR_SND_ID_OP_ENABLE = 0x01,
+ NVME_DIR_SND_ST_OP_REL_ID = 0x01,
+ NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
+ NVME_DIR_RCV_ID_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_STATUS = 0x02,
+ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
+ NVME_DIR_ENDIR = 0x01,
+};
+
+enum {
+ NVME_NS_FEAT_THIN = 1 << 0,
+ NVME_NS_FLBAS_LBA_MASK = 0xf,
+ NVME_NS_FLBAS_META_EXT = 0x10,
+ NVME_LBAF_RP_BEST = 0,
+ NVME_LBAF_RP_BETTER = 1,
+ NVME_LBAF_RP_GOOD = 2,
+ NVME_LBAF_RP_DEGRADED = 3,
+ NVME_NS_DPC_PI_LAST = 1 << 4,
+ NVME_NS_DPC_PI_FIRST = 1 << 3,
+ NVME_NS_DPC_PI_TYPE3 = 1 << 2,
+ NVME_NS_DPC_PI_TYPE2 = 1 << 1,
+ NVME_NS_DPC_PI_TYPE1 = 1 << 0,
+ NVME_NS_DPS_PI_FIRST = 1 << 3,
+ NVME_NS_DPS_PI_MASK = 0x7,
+ NVME_NS_DPS_PI_TYPE1 = 1,
+ NVME_NS_DPS_PI_TYPE2 = 2,
+ NVME_NS_DPS_PI_TYPE3 = 3,
+};
+
+struct nvme_ns_id_desc {
+ __u8 nidt;
+ __u8 nidl;
+ __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN 8
+#define NVME_NIDT_NGUID_LEN 16
+#define NVME_NIDT_UUID_LEN 16
+
+enum {
+ NVME_NIDT_EUI64 = 0x01,
+ NVME_NIDT_NGUID = 0x02,
+ NVME_NIDT_UUID = 0x03,
+};
+
+#define NVME_MAX_NVMSET 31
+
+struct nvme_nvmset_attr_entry {
+ __le16 id;
+ __le16 endurance_group_id;
+ __u8 rsvd4[4];
+ __le32 random_4k_read_typical;
+ __le32 opt_write_size;
+ __u8 total_nvmset_cap[16];
+ __u8 unalloc_nvmset_cap[16];
+ __u8 rsvd48[80];
+};
+
+struct nvme_id_nvmset {
+ __u8 nid;
+ __u8 rsvd1[127];
+ struct nvme_nvmset_attr_entry ent[NVME_MAX_NVMSET];
+};
+
+/* Derived from 1.3a Figure 101: Get Log Page – Telemetry Host
+ * -Initiated Log (Log Identifier 07h)
+ */
+struct nvme_telemetry_log_page_hdr {
+ __u8 lpi; /* Log page identifier */
+ __u8 rsvd[4];
+ __u8 iee_oui[3];
+ __u16 dalb1; /* Data area 1 last block */
+ __u16 dalb2; /* Data area 2 last block */
+ __u16 dalb3; /* Data area 3 last block */
+ __u8 rsvd1[368]; /* TODO verify */
+ __u8 ctrlavail; /* Controller initiated data avail?*/
+ __u8 ctrldgn; /* Controller initiated telemetry Data Gen # */
+ __u8 rsnident[128];
+ /* We'll have to double fetch so we can get the header,
+ * parse dalb1->3 determine how much size we need for the
+ * log then alloc below. Or just do a secondary non-struct
+ * allocation.
+ */
+ __u8 telemetry_dataarea[0];
+};
+
+struct nvme_endurance_group_log {
+ __u32 rsvd0;
+ __u8 avl_spare_threshold;
+ __u8 percent_used;
+ __u8 rsvd6[26];
+ __u8 endurance_estimate[16];
+ __u8 data_units_read[16];
+ __u8 data_units_written[16];
+ __u8 media_units_written[16];
+ __u8 rsvd96[416];
+};
+
+struct nvme_smart_log {
+ __u8 critical_warning;
+ __u8 temperature[2];
+ __u8 avail_spare;
+ __u8 spare_thresh;
+ __u8 percent_used;
+ __u8 rsvd6[26];
+ __u8 data_units_read[16];
+ __u8 data_units_written[16];
+ __u8 host_reads[16];
+ __u8 host_writes[16];
+ __u8 ctrl_busy_time[16];
+ __u8 power_cycles[16];
+ __u8 power_on_hours[16];
+ __u8 unsafe_shutdowns[16];
+ __u8 media_errors[16];
+ __u8 num_err_log_entries[16];
+ __le32 warning_temp_time;
+ __le32 critical_comp_time;
+ __le16 temp_sensor[8];
+ __le32 thm_temp1_trans_count;
+ __le32 thm_temp2_trans_count;
+ __le32 thm_temp1_total_time;
+ __le32 thm_temp2_total_time;
+ __u8 rsvd232[280];
+};
+
+struct nvme_self_test_res {
+ __u8 device_self_test_status;
+ __u8 segment_num;
+ __u8 valid_diagnostic_info;
+ __u8 rsvd;
+ __le64 power_on_hours;
+ __le32 nsid;
+ __le64 failing_lba;
+ __u8 status_code_type;
+ __u8 status_code;
+ __u8 vendor_specific[2];
+} __attribute__((packed));
+
+struct nvme_self_test_log {
+ __u8 crnt_dev_selftest_oprn;
+ __u8 crnt_dev_selftest_compln;
+ __u8 rsvd[2];
+ struct nvme_self_test_res result[20];
+} __attribute__((packed));
+
+struct nvme_fw_slot_info_log {
+ __u8 afi;
+ __u8 rsvd1[7];
+ __le64 frs[7];
+ __u8 rsvd64[448];
+};
+
+/* NVMe Namespace Write Protect State */
+enum {
+ NVME_NS_NO_WRITE_PROTECT = 0,
+ NVME_NS_WRITE_PROTECT,
+ NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+ NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
+#define NVME_MAX_CHANGED_NAMESPACES 1024
+
+struct nvme_changed_ns_list_log {
+ __le32 log[NVME_MAX_CHANGED_NAMESPACES];
+};
+
+enum {
+ NVME_CMD_EFFECTS_CSUPP = 1 << 0,
+ NVME_CMD_EFFECTS_LBCC = 1 << 1,
+ NVME_CMD_EFFECTS_NCC = 1 << 2,
+ NVME_CMD_EFFECTS_NIC = 1 << 3,
+ NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+};
+
+struct nvme_effects_log {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+enum nvme_ana_state {
+ NVME_ANA_OPTIMIZED = 0x01,
+ NVME_ANA_NONOPTIMIZED = 0x02,
+ NVME_ANA_INACCESSIBLE = 0x03,
+ NVME_ANA_PERSISTENT_LOSS = 0x04,
+ NVME_ANA_CHANGE = 0x0f,
+};
+
+struct nvme_ana_group_desc {
+ __le32 grpid;
+ __le32 nnsids;
+ __le64 chgcnt;
+ __u8 state;
+ __u8 rsvd17[15];
+ __le32 nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO (1 << 0)
+
+struct nvme_ana_rsp_hdr {
+ __le64 chgcnt;
+ __le16 ngrps;
+ __le16 rsvd10[3];
+};
+
+enum {
+ NVME_SMART_CRIT_SPARE = 1 << 0,
+ NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
+ NVME_SMART_CRIT_RELIABILITY = 1 << 2,
+ NVME_SMART_CRIT_MEDIA = 1 << 3,
+ NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
+};
+
+enum {
+ NVME_AER_ERROR = 0,
+ NVME_AER_SMART = 1,
+ NVME_AER_CSS = 6,
+ NVME_AER_VS = 7,
+ NVME_AER_NOTICE_NS_CHANGED = 0x0002,
+ NVME_AER_NOTICE_ANA = 0x0003,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+};
+
+struct nvme_lba_range_type {
+ __u8 type;
+ __u8 attributes;
+ __u8 rsvd2[14];
+ __u64 slba;
+ __u64 nlb;
+ __u8 guid[16];
+ __u8 rsvd48[16];
+};
+
+enum {
+ NVME_LBART_TYPE_FS = 0x01,
+ NVME_LBART_TYPE_RAID = 0x02,
+ NVME_LBART_TYPE_CACHE = 0x03,
+ NVME_LBART_TYPE_SWAP = 0x04,
+
+ NVME_LBART_ATTRIB_TEMP = 1 << 0,
+ NVME_LBART_ATTRIB_HIDE = 1 << 1,
+};
+
+struct nvme_plm_config {
+ __u16 enable_event;
+ __u8 rsvd2[30];
+ __u64 dtwin_reads_thresh;
+ __u64 dtwin_writes_thresh;
+ __u64 dtwin_time_thresh;
+ __u8 rsvd56[456];
+};
+
+struct nvme_reservation_status {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[13];
+ struct {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 resv3[5];
+ __le64 hostid;
+ __le64 rkey;
+ } regctl_ds[];
+};
+
+struct nvme_reservation_status_ext {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[14];
+ __u8 resv24[40];
+ struct {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 resv3[5];
+ __le64 rkey;
+ __u8 hostid[16];
+ __u8 resv32[32];
+ } regctl_eds[];
+};
+
+enum nvme_async_event_type {
+ NVME_AER_TYPE_ERROR = 0,
+ NVME_AER_TYPE_SMART = 1,
+ NVME_AER_TYPE_NOTICE = 2,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+ nvme_cmd_flush = 0x00,
+ nvme_cmd_write = 0x01,
+ nvme_cmd_read = 0x02,
+ nvme_cmd_write_uncor = 0x04,
+ nvme_cmd_compare = 0x05,
+ nvme_cmd_write_zeroes = 0x08,
+ nvme_cmd_dsm = 0x09,
+ nvme_cmd_resv_register = 0x0d,
+ nvme_cmd_resv_report = 0x0e,
+ nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_resv_release = 0x15,
+};
+
+/*
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * @NVME_SGL_FMT_ADDRESS: absolute address of the data block
+ * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
+ * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation
+ * request subtype
+ */
+enum {
+ NVME_SGL_FMT_ADDRESS = 0x00,
+ NVME_SGL_FMT_OFFSET = 0x01,
+ NVME_SGL_FMT_TRANSPORT_A = 0x0A,
+ NVME_SGL_FMT_INVALIDATE = 0x0f,
+};
+
+/*
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * For struct nvme_sgl_desc:
+ * @NVME_SGL_FMT_DATA_DESC: data block descriptor
+ * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor
+ * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor
+ *
+ * For struct nvme_keyed_sgl_desc:
+ * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor
+ */
+enum {
+ NVME_SGL_FMT_DATA_DESC = 0x00,
+ NVME_SGL_FMT_SEG_DESC = 0x02,
+ NVME_SGL_FMT_LAST_SEG_DESC = 0x03,
+ NVME_KEY_SGL_FMT_DATA_DESC = 0x04,
+ NVME_TRANSPORT_SGL_DATA_DESC = 0x05,
+};
+
+struct nvme_sgl_desc {
+ __le64 addr;
+ __le32 length;
+ __u8 rsvd[3];
+ __u8 type;
+};
+
+struct nvme_keyed_sgl_desc {
+ __le64 addr;
+ __u8 length[3];
+ __u8 key[4];
+ __u8 type;
+};
+
+union nvme_data_ptr {
+ struct {
+ __le64 prp1;
+ __le64 prp2;
+ };
+ struct nvme_sgl_desc sgl;
+ struct nvme_keyed_sgl_desc ksgl;
+};
+
+/*
+ * Lowest two bits of our flags field (FUSE field in the spec):
+ *
+ * @NVME_CMD_FUSE_FIRST: Fused Operation, first command
+ * @NVME_CMD_FUSE_SECOND: Fused Operation, second command
+ *
+ * Highest two bits in our flags field (PSDT field in the spec):
+ *
+ * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer,
+ * If used, MPTR contains addr of single physical buffer (byte aligned).
+ * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer,
+ * If used, MPTR contains an address of an SGL segment containing
+ * exactly 1 SGL descriptor (qword aligned).
+ */
+enum {
+ NVME_CMD_FUSE_FIRST = (1 << 0),
+ NVME_CMD_FUSE_SECOND = (1 << 1),
+
+ NVME_CMD_SGL_METABUF = (1 << 6),
+ NVME_CMD_SGL_METASEG = (1 << 7),
+ NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
+};
+
+struct nvme_common_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le32 cdw2[2];
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le32 cdw10[6];
+};
+
+struct nvme_rw_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+enum {
+ NVME_RW_LR = 1 << 15,
+ NVME_RW_FUA = 1 << 14,
+ NVME_RW_DEAC = 1 << 9,
+ NVME_RW_DSM_FREQ_UNSPEC = 0,
+ NVME_RW_DSM_FREQ_TYPICAL = 1,
+ NVME_RW_DSM_FREQ_RARE = 2,
+ NVME_RW_DSM_FREQ_READS = 3,
+ NVME_RW_DSM_FREQ_WRITES = 4,
+ NVME_RW_DSM_FREQ_RW = 5,
+ NVME_RW_DSM_FREQ_ONCE = 6,
+ NVME_RW_DSM_FREQ_PREFETCH = 7,
+ NVME_RW_DSM_FREQ_TEMP = 8,
+ NVME_RW_DSM_LATENCY_NONE = 0 << 4,
+ NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
+ NVME_RW_DSM_LATENCY_NORM = 2 << 4,
+ NVME_RW_DSM_LATENCY_LOW = 3 << 4,
+ NVME_RW_DSM_SEQ_REQ = 1 << 6,
+ NVME_RW_DSM_COMPRESSED = 1 << 7,
+ NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
+ NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
+ NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
+ NVME_RW_PRINFO_PRACT = 1 << 13,
+ NVME_RW_DTYPE_STREAMS = 1 << 4,
+};
+
+struct nvme_dsm_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 nr;
+ __le32 attributes;
+ __u32 rsvd12[4];
+};
+
+enum {
+ NVME_DSMGMT_IDR = 1 << 0,
+ NVME_DSMGMT_IDW = 1 << 1,
+ NVME_DSMGMT_AD = 1 << 2,
+};
+
+#define NVME_DSM_MAX_RANGES 256
+
+struct nvme_dsm_range {
+ __le32 cattr;
+ __le32 nlb;
+ __le64 slba;
+};
+
+struct nvme_write_zeroes_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+/* Features */
+
+struct nvme_feat_auto_pst {
+ __le64 entries[32];
+};
+
+enum {
+ NVME_HOST_MEM_ENABLE = (1 << 0),
+ NVME_HOST_MEM_RETURN = (1 << 1),
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+ nvme_admin_delete_sq = 0x00,
+ nvme_admin_create_sq = 0x01,
+ nvme_admin_get_log_page = 0x02,
+ nvme_admin_delete_cq = 0x04,
+ nvme_admin_create_cq = 0x05,
+ nvme_admin_identify = 0x06,
+ nvme_admin_abort_cmd = 0x08,
+ nvme_admin_set_features = 0x09,
+ nvme_admin_get_features = 0x0a,
+ nvme_admin_async_event = 0x0c,
+ nvme_admin_ns_mgmt = 0x0d,
+ nvme_admin_activate_fw = 0x10,
+ nvme_admin_download_fw = 0x11,
+ nvme_admin_dev_self_test = 0x14,
+ nvme_admin_ns_attach = 0x15,
+ nvme_admin_keep_alive = 0x18,
+ nvme_admin_directive_send = 0x19,
+ nvme_admin_directive_recv = 0x1a,
+ nvme_admin_virtual_mgmt = 0x1c,
+ nvme_admin_nvme_mi_send = 0x1d,
+ nvme_admin_nvme_mi_recv = 0x1e,
+ nvme_admin_dbbuf = 0x7C,
+ nvme_admin_format_nvm = 0x80,
+ nvme_admin_security_send = 0x81,
+ nvme_admin_security_recv = 0x82,
+ nvme_admin_sanitize_nvm = 0x84,
+};
+
+enum {
+ NVME_QUEUE_PHYS_CONTIG = (1 << 0),
+ NVME_CQ_IRQ_ENABLED = (1 << 1),
+ NVME_SQ_PRIO_URGENT = (0 << 1),
+ NVME_SQ_PRIO_HIGH = (1 << 1),
+ NVME_SQ_PRIO_MEDIUM = (2 << 1),
+ NVME_SQ_PRIO_LOW = (3 << 1),
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MGMT = 0x02,
+ NVME_FEAT_LBA_RANGE = 0x03,
+ NVME_FEAT_TEMP_THRESH = 0x04,
+ NVME_FEAT_ERR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WC = 0x06,
+ NVME_FEAT_NUM_QUEUES = 0x07,
+ NVME_FEAT_IRQ_COALESCE = 0x08,
+ NVME_FEAT_IRQ_CONFIG = 0x09,
+ NVME_FEAT_WRITE_ATOMIC = 0x0a,
+ NVME_FEAT_ASYNC_EVENT = 0x0b,
+ NVME_FEAT_AUTO_PST = 0x0c,
+ NVME_FEAT_HOST_MEM_BUF = 0x0d,
+ NVME_FEAT_TIMESTAMP = 0x0e,
+ NVME_FEAT_KATO = 0x0f,
+ NVME_FEAT_HCTM = 0X10,
+ NVME_FEAT_NOPSC = 0X11,
+ NVME_FEAT_RRL = 0x12,
+ NVME_FEAT_PLM_CONFIG = 0x13,
+ NVME_FEAT_PLM_WINDOW = 0x14,
+ NVME_FEAT_SW_PROGRESS = 0x80,
+ NVME_FEAT_HOST_ID = 0x81,
+ NVME_FEAT_RESV_MASK = 0x82,
+ NVME_FEAT_RESV_PERSIST = 0x83,
+ NVME_FEAT_WRITE_PROTECT = 0x84,
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_SMART = 0x02,
+ NVME_LOG_FW_SLOT = 0x03,
+ NVME_LOG_CHANGED_NS = 0x04,
+ NVME_LOG_CMD_EFFECTS = 0x05,
+ NVME_LOG_DEVICE_SELF_TEST = 0x06,
+ NVME_LOG_TELEMETRY_HOST = 0x07,
+ NVME_LOG_TELEMETRY_CTRL = 0x08,
+ NVME_LOG_ENDURANCE_GROUP = 0x09,
+ NVME_LOG_ANA = 0x0c,
+ NVME_LOG_DISC = 0x70,
+ NVME_LOG_RESERVATION = 0x80,
+ NVME_LOG_SANITIZE = 0x81,
+ NVME_FWACT_REPL = (0 << 3),
+ NVME_FWACT_REPL_ACTV = (1 << 3),
+ NVME_FWACT_ACTV = (2 << 3),
+};
+
+enum {
+ NVME_NO_LOG_LSP = 0x0,
+ NVME_NO_LOG_LPO = 0x0,
+ NVME_LOG_ANA_LSP_RGO = 0x1,
+ NVME_TELEM_LSP_CREATE = 0x1,
+};
+
+/* Sanitize and Sanitize Monitor/Log */
+enum {
+ /* Sanitize */
+ NVME_SANITIZE_NO_DEALLOC = 0x00000200,
+ NVME_SANITIZE_OIPBP = 0x00000100,
+ NVME_SANITIZE_OWPASS_SHIFT = 0x00000004,
+ NVME_SANITIZE_AUSE = 0x00000008,
+ NVME_SANITIZE_ACT_CRYPTO_ERASE = 0x00000004,
+ NVME_SANITIZE_ACT_OVERWRITE = 0x00000003,
+ NVME_SANITIZE_ACT_BLOCK_ERASE = 0x00000002,
+ NVME_SANITIZE_ACT_EXIT = 0x00000001,
+
+ /* Sanitize Monitor/Log */
+ NVME_SANITIZE_LOG_DATA_LEN = 0x0014,
+ NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED = 0x0100,
+ NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK = 0x00F8,
+ NVME_SANITIZE_LOG_STATUS_MASK = 0x0007,
+ NVME_SANITIZE_LOG_NEVER_SANITIZED = 0x0000,
+ NVME_SANITIZE_LOG_COMPLETED_SUCCESS = 0x0001,
+ NVME_SANITIZE_LOG_IN_PROGESS = 0x0002,
+ NVME_SANITIZE_LOG_COMPLETED_FAILED = 0x0003,
+};
+
+enum {
+ /* Self-test log Validation bits */
+ NVME_SELF_TEST_VALID_NSID = 1 << 0,
+ NVME_SELF_TEST_VALID_FLBA = 1 << 1,
+ NVME_SELF_TEST_VALID_SCT = 1 << 2,
+ NVME_SELF_TEST_VALID_SC = 1 << 3,
+ NVME_SELF_TEST_REPORTS = 20,
+};
+
+struct nvme_identify {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 cns;
+ __u8 rsvd3;
+ __le16 ctrlid;
+ __u32 rsvd11[5];
+};
+
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
+struct nvme_features {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 fid;
+ __le32 dword11;
+ __le32 dword12;
+ __le32 dword13;
+ __le32 dword14;
+ __le32 dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+ __le64 addr;
+ __le32 size;
+ __u32 rsvd;
+};
+
+struct nvme_create_cq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 cqid;
+ __le16 qsize;
+ __le16 cq_flags;
+ __le16 irq_vector;
+ __u32 rsvd12[4];
+};
+
+struct nvme_create_sq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 sqid;
+ __le16 qsize;
+ __le16 sq_flags;
+ __le16 cqid;
+ __u32 rsvd12[4];
+};
+
+struct nvme_delete_queue {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 qid;
+ __u16 rsvd10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_abort_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 sqid;
+ __u16 cid;
+ __u32 rsvd11[5];
+};
+
+struct nvme_download_firmware {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __le32 offset;
+ __u32 rsvd12[4];
+};
+
+struct nvme_format_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[4];
+ __le32 cdw10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_get_log_page_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 lid;
+ __u8 lsp;
+ __le16 numdl;
+ __le16 numdu;
+ __u16 rsvd11;
+ __le32 lpol;
+ __le32 lpou;
+ __u32 rsvd14[2];
+};
+
+struct nvme_directive_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __u8 doper;
+ __u8 dtype;
+ __le16 dspec;
+ __u8 endir;
+ __u8 tdtype;
+ __u16 rsvd15;
+
+ __u32 rsvd16[3];
+};
+
+/* Sanitize Log Page */
+struct nvme_sanitize_log_page {
+ __le16 progress;
+ __le16 status;
+ __le32 cdw10_info;
+ __le32 est_ovrwrt_time;
+ __le32 est_blk_erase_time;
+ __le32 est_crypto_erase_time;
+};
+
+/*
+ * Fabrics subcommands.
+ */
+enum nvmf_fabrics_opcode {
+ nvme_fabrics_command = 0x7f,
+};
+
+enum nvmf_capsule_command {
+ nvme_fabrics_type_property_set = 0x00,
+ nvme_fabrics_type_connect = 0x01,
+ nvme_fabrics_type_property_get = 0x04,
+};
+
+struct nvmf_common_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 ts[24];
+};
+
+/*
+ * The legal cntlid range a NVMe Target will provide.
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
+ * Devices based on earlier specs did not have the subsystem concept;
+ * therefore, those devices had their cntlid value set to 0 as a result.
+ */
+#define NVME_CNTLID_MIN 1
+#define NVME_CNTLID_MAX 0xffef
+#define NVME_CNTLID_DYNAMIC 0xffff
+
+#define MAX_DISC_LOGS 255
+
+/* Discovery log page entry */
+struct nvmf_disc_rsp_page_entry {
+ __u8 trtype;
+ __u8 adrfam;
+ __u8 subtype;
+ __u8 treq;
+ __le16 portid;
+ __le16 cntlid;
+ __le16 asqsz;
+ __u8 resv8[22];
+ char trsvcid[NVMF_TRSVCID_SIZE];
+ __u8 resv64[192];
+ char subnqn[NVMF_NQN_FIELD_LEN];
+ char traddr[NVMF_TRADDR_SIZE];
+ union tsas {
+ char common[NVMF_TSAS_SIZE];
+ struct rdma {
+ __u8 qptype;
+ __u8 prtype;
+ __u8 cms;
+ __u8 resv3[5];
+ __u16 pkey;
+ __u8 resv10[246];
+ } rdma;
+ struct tcp {
+ __u8 sectype;
+ } tcp;
+ } tsas;
+};
+
+/* Discovery log page header */
+struct nvmf_disc_rsp_page_hdr {
+ __le64 genctr;
+ __le64 numrec;
+ __le16 recfmt;
+ __u8 resv14[1006];
+ struct nvmf_disc_rsp_page_entry entries[0];
+};
+
+struct nvmf_connect_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[19];
+ union nvme_data_ptr dptr;
+ __le16 recfmt;
+ __le16 qid;
+ __le16 sqsize;
+ __u8 cattr;
+ __u8 resv3;
+ __le32 kato;
+ __u8 resv4[12];
+};
+
+struct nvmf_connect_data {
+ uuid_t hostid;
+ __le16 cntlid;
+ char resv4[238];
+ char subsysnqn[NVMF_NQN_FIELD_LEN];
+ char hostnqn[NVMF_NQN_FIELD_LEN];
+ char resv5[256];
+};
+
+struct nvmf_property_set_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __le64 value;
+ __u8 resv4[8];
+};
+
+struct nvmf_property_get_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __u8 resv4[16];
+};
+
+struct nvme_dbbuf {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __le64 prp2;
+ __u32 rsvd12[6];
+};
+
+struct streams_directive_params {
+ __le16 msl;
+ __le16 nssa;
+ __le16 nsso;
+ __u8 rsvd[10];
+ __le32 sws;
+ __le16 sgs;
+ __le16 nsa;
+ __le16 nso;
+ __u8 rsvd2[6];
+};
+
+struct nvme_command {
+ union {
+ struct nvme_common_command common;
+ struct nvme_rw_command rw;
+ struct nvme_identify identify;
+ struct nvme_features features;
+ struct nvme_create_cq create_cq;
+ struct nvme_create_sq create_sq;
+ struct nvme_delete_queue delete_queue;
+ struct nvme_download_firmware dlfw;
+ struct nvme_format_cmd format;
+ struct nvme_dsm_cmd dsm;
+ struct nvme_write_zeroes_cmd write_zeroes;
+ struct nvme_abort_cmd abort;
+ struct nvme_get_log_page_command get_log_page;
+ struct nvmf_common_command fabrics;
+ struct nvmf_connect_command connect;
+ struct nvmf_property_set_command prop_set;
+ struct nvmf_property_get_command prop_get;
+ struct nvme_dbbuf dbbuf;
+ struct nvme_directive_cmd directive;
+ };
+};
+
+static inline bool nvme_is_write(struct nvme_command *cmd)
+{
+ /*
+ * What a mess...
+ *
+ * Why can't we simply have a Fabrics In and Fabrics out command?
+ */
+ if (unlikely(cmd->common.opcode == nvme_fabrics_command))
+ return cmd->fabrics.fctype & 1;
+ return cmd->common.opcode & 1;
+}
+
+enum {
+ /*
+ * Generic Command Status:
+ */
+ NVME_SC_SUCCESS = 0x0,
+ NVME_SC_INVALID_OPCODE = 0x1,
+ NVME_SC_INVALID_FIELD = 0x2,
+ NVME_SC_CMDID_CONFLICT = 0x3,
+ NVME_SC_DATA_XFER_ERROR = 0x4,
+ NVME_SC_POWER_LOSS = 0x5,
+ NVME_SC_INTERNAL = 0x6,
+ NVME_SC_ABORT_REQ = 0x7,
+ NVME_SC_ABORT_QUEUE = 0x8,
+ NVME_SC_FUSED_FAIL = 0x9,
+ NVME_SC_FUSED_MISSING = 0xa,
+ NVME_SC_INVALID_NS = 0xb,
+ NVME_SC_CMD_SEQ_ERROR = 0xc,
+ NVME_SC_SGL_INVALID_LAST = 0xd,
+ NVME_SC_SGL_INVALID_COUNT = 0xe,
+ NVME_SC_SGL_INVALID_DATA = 0xf,
+ NVME_SC_SGL_INVALID_METADATA = 0x10,
+ NVME_SC_SGL_INVALID_TYPE = 0x11,
+
+ NVME_SC_SGL_INVALID_OFFSET = 0x16,
+ NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
+
+ NVME_SC_SANITIZE_FAILED = 0x1C,
+ NVME_SC_SANITIZE_IN_PROGRESS = 0x1D,
+
+ NVME_SC_NS_WRITE_PROTECTED = 0x20,
+
+ NVME_SC_LBA_RANGE = 0x80,
+ NVME_SC_CAP_EXCEEDED = 0x81,
+ NVME_SC_NS_NOT_READY = 0x82,
+ NVME_SC_RESERVATION_CONFLICT = 0x83,
+
+ /*
+ * Command Specific Status:
+ */
+ NVME_SC_CQ_INVALID = 0x100,
+ NVME_SC_QID_INVALID = 0x101,
+ NVME_SC_QUEUE_SIZE = 0x102,
+ NVME_SC_ABORT_LIMIT = 0x103,
+ NVME_SC_ABORT_MISSING = 0x104,
+ NVME_SC_ASYNC_LIMIT = 0x105,
+ NVME_SC_FIRMWARE_SLOT = 0x106,
+ NVME_SC_FIRMWARE_IMAGE = 0x107,
+ NVME_SC_INVALID_VECTOR = 0x108,
+ NVME_SC_INVALID_LOG_PAGE = 0x109,
+ NVME_SC_INVALID_FORMAT = 0x10a,
+ NVME_SC_FW_NEEDS_CONV_RESET = 0x10b,
+ NVME_SC_INVALID_QUEUE = 0x10c,
+ NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d,
+ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e,
+ NVME_SC_FEATURE_NOT_PER_NS = 0x10f,
+ NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110,
+ NVME_SC_FW_NEEDS_RESET = 0x111,
+ NVME_SC_FW_NEEDS_MAX_TIME = 0x112,
+ NVME_SC_FW_ACIVATE_PROHIBITED = 0x113,
+ NVME_SC_OVERLAPPING_RANGE = 0x114,
+ NVME_SC_NS_INSUFFICENT_CAP = 0x115,
+ NVME_SC_NS_ID_UNAVAILABLE = 0x116,
+ NVME_SC_NS_ALREADY_ATTACHED = 0x118,
+ NVME_SC_NS_IS_PRIVATE = 0x119,
+ NVME_SC_NS_NOT_ATTACHED = 0x11a,
+ NVME_SC_THIN_PROV_NOT_SUPP = 0x11b,
+ NVME_SC_CTRL_LIST_INVALID = 0x11c,
+ NVME_SC_BP_WRITE_PROHIBITED = 0x11e,
+
+ /*
+ * I/O Command Set Specific - NVM commands:
+ */
+ NVME_SC_BAD_ATTRIBUTES = 0x180,
+ NVME_SC_INVALID_PI = 0x181,
+ NVME_SC_READ_ONLY = 0x182,
+ NVME_SC_ONCS_NOT_SUPPORTED = 0x183,
+
+ /*
+ * I/O Command Set Specific - Fabrics commands:
+ */
+ NVME_SC_CONNECT_FORMAT = 0x180,
+ NVME_SC_CONNECT_CTRL_BUSY = 0x181,
+ NVME_SC_CONNECT_INVALID_PARAM = 0x182,
+ NVME_SC_CONNECT_RESTART_DISC = 0x183,
+ NVME_SC_CONNECT_INVALID_HOST = 0x184,
+
+ NVME_SC_DISCOVERY_RESTART = 0x190,
+ NVME_SC_AUTH_REQUIRED = 0x191,
+
+ /*
+ * Media and Data Integrity Errors:
+ */
+ NVME_SC_WRITE_FAULT = 0x280,
+ NVME_SC_READ_ERROR = 0x281,
+ NVME_SC_GUARD_CHECK = 0x282,
+ NVME_SC_APPTAG_CHECK = 0x283,
+ NVME_SC_REFTAG_CHECK = 0x284,
+ NVME_SC_COMPARE_FAILED = 0x285,
+ NVME_SC_ACCESS_DENIED = 0x286,
+ NVME_SC_UNWRITTEN_BLOCK = 0x287,
+
+ /*
+ * Path-related Errors:
+ */
+ NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
+ NVME_SC_ANA_INACCESSIBLE = 0x302,
+ NVME_SC_ANA_TRANSITION = 0x303,
+
+ NVME_SC_DNR = 0x4000,
+};
+
+struct nvme_completion {
+ /*
+ * Used by Admin and Fabrics commands to return data:
+ */
+ union nvme_result {
+ __le16 u16;
+ __le32 u32;
+ __le64 u64;
+ } result;
+ __le16 sq_head; /* how much of this queue may be reclaimed */
+ __le16 sq_id; /* submission queue that generated this entry */
+ __u16 command_id; /* of the command which completed */
+ __le16 status; /* did the command fail, and if so, why? */
+};
+
+#define NVME_VS(major, minor, tertiary) \
+ (((major) << 16) | ((minor) << 8) | (tertiary))
+
+#define NVME_MAJOR(ver) ((ver) >> 16)
+#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver) ((ver) & 0xff)
+
+#endif /* _LINUX_NVME_H */
Index: multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme/linux/nvme_ioctl.h
@@ -0,0 +1,67 @@
+/*
+ * Definitions for the NVM Express ioctl interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_LINUX_NVME_IOCTL_H
+#define _UAPI_LINUX_NVME_IOCTL_H
+
+#include <linux/types.h>
+#include <sys/ioctl.h>
+
+struct nvme_user_io {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+};
+
+struct nvme_passthru_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
+#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET _IO('N', 0x44)
+#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
+#define NVME_IOCTL_RESCAN _IO('N', 0x46)
+
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
Index: multipath-tools-130222/Makefile.inc
===================================================================
--- multipath-tools-130222.orig/Makefile.inc
+++ multipath-tools-130222/Makefile.inc
@@ -37,6 +37,7 @@ mpathpersistdir = $(TOPDIR)/libmpathpers
includedir = $(prefix)/usr/include
mpathcmddir = $(TOPDIR)/libmpathcmd
libdmmpdir = $(TOPDIR)/libdmmp
+nvmedir = $(TOPDIR)/libmultipath/nvme
pkgconfdir = $(prefix)/usr/$(LIB)/pkgconfig
GZIP = /bin/gzip -9 -c
Index: multipath-tools-130222/libmultipath/Makefile
===================================================================
--- multipath-tools-130222.orig/libmultipath/Makefile
+++ multipath-tools-130222/libmultipath/Makefile
@@ -8,7 +8,7 @@ SONAME=0
DEVLIB = libmultipath.so
LIBS = $(DEVLIB).$(SONAME)
LIBDEPS = -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -laio
-CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir)
+CFLAGS += -fPIC -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir)
OBJS = memory.o parser.o vector.o devmapper.o \
hwtable.o blacklist.o util.o dmparser.o config.o \
@@ -17,7 +17,7 @@ OBJS = memory.o parser.o vector.o devmap
switchgroup.o uxsock.o print.o alias.o log_pthread.o \
log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \
lock.o waiter.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
- io_err_stat.o
+ io_err_stat.o nvme-lib.o
LIBDM_API_FLUSH = $(shell grep -Ecs '^[a-z]*[[:space:]]+dm_task_no_flush' /usr/include/libdevmapper.h)
@@ -46,6 +46,9 @@ endif
all: $(LIBS)
+nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
+ $(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $<
+
$(LIBS): $(OBJS)
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ $(CFLAGS) -o $@ $(OBJS) $(LIBDEPS)
ln -sf $@ $(DEVLIB)
Index: multipath-tools-130222/libmultipath/nvme-ioctl.c
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme-ioctl.c
@@ -0,0 +1,869 @@
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "nvme-ioctl.h"
+
+static int nvme_verify_chr(int fd)
+{
+ static struct stat nvme_stat;
+ int err = fstat(fd, &nvme_stat);
+
+ if (err < 0) {
+ perror("fstat");
+ return errno;
+ }
+ if (!S_ISCHR(nvme_stat.st_mode)) {
+ fprintf(stderr,
+ "Error: requesting reset on non-controller handle\n");
+ return ENOTBLK;
+ }
+ return 0;
+}
+
+static int nvme_subsystem_reset(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_SUBSYS_RESET);
+}
+
+static int nvme_reset_controller(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_RESET);
+}
+
+static int nvme_ns_rescan(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_RESCAN);
+}
+
+static int nvme_get_nsid(int fd)
+{
+ static struct stat nvme_stat;
+ int err = fstat(fd, &nvme_stat);
+
+ if (err < 0)
+ return -errno;
+
+ if (!S_ISBLK(nvme_stat.st_mode)) {
+ fprintf(stderr,
+ "Error: requesting namespace-id from non-block device\n");
+ errno = ENOTBLK;
+ return -errno;
+ }
+ return ioctl(fd, NVME_IOCTL_ID);
+}
+
+static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+ struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, ioctl_cmd, cmd);
+}
+
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
+}
+
+static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, NVME_IOCTL_IO_CMD, cmd);
+}
+
+static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode,
+ __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = opcode,
+ .flags = flags,
+ .rsvd1 = rsvd,
+ .nsid = nsid,
+ .cdw2 = cdw2,
+ .cdw3 = cdw3,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .metadata_len = metadata_len,
+ .data_len = data_len,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .cdw13 = cdw13,
+ .cdw14 = cdw14,
+ .cdw15 = cdw15,
+ .timeout_ms = timeout_ms,
+ .result = 0,
+ };
+ int err;
+
+ err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ struct nvme_user_io io = {
+ .opcode = opcode,
+ .flags = 0,
+ .control = control,
+ .nblocks = nblocks,
+ .rsvd = 0,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .slba = slba,
+ .dsmgmt = dsmgmt,
+ .reftag = reftag,
+ .appmask = appmask,
+ .apptag = apptag,
+ };
+ return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
+}
+
+static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
+ __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14,
+ __u32 cdw15, __u32 data_len, void *data,
+ __u32 metadata_len, void *metadata, __u32 timeout_ms)
+{
+ return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid,
+ cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14,
+ cdw15, data_len, data, metadata_len, metadata,
+ timeout_ms, NULL);
+}
+
+static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_write_zeroes,
+ .nsid = nsid,
+ .cdw10 = slba & 0xffffffff,
+ .cdw11 = slba >> 32,
+ .cdw12 = nlb | (control << 16),
+ .cdw14 = reftag,
+ .cdw15 = apptag | (appmask << 16),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_write_uncor,
+ .nsid = nsid,
+ .cdw10 = slba & 0xffffffff,
+ .cdw11 = slba >> 32,
+ .cdw12 = nlb,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_flush(int fd, __u32 nsid)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_flush,
+ .nsid = nsid,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+ __u16 nr_ranges)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_dsm,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) dsm,
+ .data_len = nr_ranges * sizeof(*dsm),
+ .cdw10 = nr_ranges - 1,
+ .cdw11 = cdw11,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas,
+ __u64 *slbas, __u16 nr_ranges)
+{
+ int i;
+ struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm));
+
+ if (!dsm) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ return NULL;
+ }
+ for (i = 0; i < nr_ranges; i++) {
+ dsm[i].cattr = cpu_to_le32(ctx_attrs[i]);
+ dsm[i].nlb = cpu_to_le32(llbas[i]);
+ dsm[i].slba = cpu_to_le64(slbas[i]);
+ }
+ return dsm;
+}
+
+static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+ bool iekey, __u64 crkey, __u64 nrkey)
+{
+ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+ __u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_acquire,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+ bool iekey, __u64 crkey, __u64 nrkey)
+{
+ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+ __u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30;
+
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_register,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+ bool iekey, __u64 crkey)
+{
+ __le64 payload[1] = { cpu_to_le64(crkey) };
+ __u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_release,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_report,
+ .nsid = nsid,
+ .cdw10 = numd,
+ .cdw11 = cdw11,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = (numd + 1) << 2,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_identify,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = NVME_IDENTIFY_DATA_SIZE,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
+{
+ return nvme_identify13(fd, nsid, cdw10, 0, data);
+}
+
+static int nvme_identify_ctrl(int fd, void *data)
+{
+ return nvme_identify(fd, 0, 1, data);
+}
+
+static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data)
+{
+ int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS;
+
+ return nvme_identify(fd, nsid, cns, data);
+}
+
+static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data)
+{
+ int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST;
+
+ return nvme_identify(fd, nsid, cns, data);
+}
+
+static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data)
+{
+ int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST;
+
+ return nvme_identify(fd, nsid, (cntid << 16) | cns, data);
+}
+
+static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data)
+{
+
+ return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data);
+}
+
+static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data)
+{
+ return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data);
+}
+
+static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+ __u16 lsi, bool rae, __u32 data_len, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_get_log_page,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ __u32 numd = (data_len >> 2) - 1;
+ __u16 numdu = numd >> 16, numdl = numd & 0xffff;
+
+ cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0);
+ if (lsp)
+ cmd.cdw10 |= lsp << 8;
+
+ cmd.cdw11 = numdu | (lsi << 16);
+ cmd.cdw12 = lpo;
+ cmd.cdw13 = (lpo >> 32);
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+
+}
+
+static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+ __u32 data_len, void *data)
+{
+ void *ptr = data;
+ __u32 offset = 0, xfer_len = data_len;
+ int ret;
+
+ /*
+ * 4k is the smallest possible transfer unit, so by
+ * restricting ourselves for 4k transfers we avoid having
+ * to check the MDTS value of the controller.
+ */
+ do {
+ xfer_len = data_len - offset;
+ if (xfer_len > 4096)
+ xfer_len = 4096;
+
+ ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP,
+ offset, 0, rae, xfer_len, ptr);
+ if (ret)
+ return ret;
+
+ offset += xfer_len;
+ ptr += xfer_len;
+ } while (offset < data_len);
+
+ return 0;
+}
+
+static int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+ int ctrl_init, size_t log_page_size, __u64 offset)
+{
+ if (ctrl_init)
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL,
+ NVME_NO_LOG_LSP, offset,
+ 0, 1, log_page_size, lp);
+ if (generate_report)
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+ NVME_TELEM_LSP_CREATE, offset,
+ 0, 1, log_page_size, lp);
+ else
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+ NVME_NO_LOG_LSP, offset,
+ 0, 1, log_page_size, lp);
+}
+
+static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true,
+ sizeof(*fw_log), fw_log);
+}
+
+static int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true,
+ sizeof(changed_ns_list_log->log),
+ changed_ns_list_log->log);
+}
+
+static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false,
+ entries * sizeof(*err_log), err_log);
+}
+
+static int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log)
+{
+ return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0,
+ sizeof(*endurance_log), endurance_log);
+}
+
+static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log)
+{
+ return nvme_get_log(fd, nsid, NVME_LOG_SMART, false,
+ sizeof(*smart_log), smart_log);
+}
+
+static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+ __u64 lpo = 0;
+
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0,
+ true, ana_log_len, ana_log);
+}
+
+static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false,
+ sizeof(*self_test_log), self_test_log);
+}
+
+static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_CMD_EFFECTS, false,
+ sizeof(*effects_log), effects_log);
+}
+
+static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log);
+}
+
+static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false,
+ sizeof(*sanitize_log), sanitize_log);
+}
+
+static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = opcode,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+ bool save, __u32 data_len, void *data, __u32 *result)
+{
+ __u32 cdw10 = fid | (save ? 0x80000000 : 0);
+
+ return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value,
+ cdw12, data_len, data, result);
+}
+
+static int nvme_property(int fd, __u8 fctype, __le32 off, __le64 *value, __u8 attrib)
+{
+ int err;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_fabrics_command,
+ .cdw10 = attrib,
+ .cdw11 = off,
+ };
+
+ if (!value) {
+ errno = EINVAL;
+ return -errno;
+ }
+
+ if (fctype == nvme_fabrics_type_property_get){
+ cmd.nsid = nvme_fabrics_type_property_get;
+ } else if(fctype == nvme_fabrics_type_property_set) {
+ cmd.nsid = nvme_fabrics_type_property_set;
+ cmd.cdw12 = *value;
+ } else {
+ errno = EINVAL;
+ return -errno;
+ }
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && fctype == nvme_fabrics_type_property_get)
+ *value = cpu_to_le64(cmd.result);
+ return err;
+}
+
+static int get_property_helper(int fd, int offset, void *value, int *advance)
+{
+ __le64 value64;
+ int err = -EINVAL;
+
+ switch (offset) {
+ case NVME_REG_CAP:
+ case NVME_REG_ASQ:
+ case NVME_REG_ACQ:
+ *advance = 8;
+ break;
+ default:
+ *advance = 4;
+ }
+
+ if (!value)
+ return err;
+
+ err = nvme_property(fd, nvme_fabrics_type_property_get,
+ cpu_to_le32(offset), &value64, (*advance == 8));
+
+ if (!err) {
+ if (*advance == 8)
+ *((uint64_t *)value) = le64_to_cpu(value64);
+ else
+ *((uint32_t *)value) = le32_to_cpu(value64);
+ }
+
+ return err;
+}
+
+static int nvme_get_property(int fd, int offset, uint64_t *value)
+{
+ int advance;
+ return get_property_helper(fd, offset, value, &advance);
+}
+
+static int nvme_get_properties(int fd, void **pbar)
+{
+ int offset, advance;
+ int err, ret = -EINVAL;
+ int size = getpagesize();
+
+ *pbar = malloc(size);
+ if (!*pbar) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ return -ENOMEM;
+ }
+
+ memset(*pbar, 0xff, size);
+ for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ; offset += advance) {
+ err = get_property_helper(fd, offset, *pbar + offset, &advance);
+ if (!err)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int nvme_set_property(int fd, int offset, int value)
+{
+ __le64 val = cpu_to_le64(value);
+ __le32 off = cpu_to_le32(offset);
+ bool is64bit;
+
+ switch (off) {
+ case NVME_REG_CAP:
+ case NVME_REG_ASQ:
+ case NVME_REG_ACQ:
+ is64bit = true;
+ break;
+ default:
+ is64bit = false;
+ }
+
+ return nvme_property(fd, nvme_fabrics_type_property_set,
+ off, &val, is64bit ? 1: 0);
+}
+
+static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11,
+ __u32 data_len, void *data, __u32 *result)
+{
+ __u32 cdw10 = fid | sel << 8;
+
+ return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11,
+ 0, data_len, data, result);
+}
+
+static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+ __u8 pil, __u8 ms, __u32 timeout)
+{
+ __u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_format_nvm,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .timeout_ms = timeout,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+ __u8 dps, __u8 nmic, __u32 *result)
+{
+ struct nvme_id_ns ns = {
+ .nsze = cpu_to_le64(nsze),
+ .ncap = cpu_to_le64(ncap),
+ .flbas = flbas,
+ .dps = dps,
+ .nmic = nmic,
+ };
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_mgmt,
+ .addr = (__u64)(uintptr_t) ((void *)&ns),
+ .cdw10 = 0,
+ .data_len = 0x1000,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_ns_delete(int fd, __u32 nsid)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_mgmt,
+ .nsid = nsid,
+ .cdw10 = 1,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist,
+ bool attach)
+{
+ int i;
+ __u8 buf[0x1000];
+ struct nvme_controller_list *cntlist =
+ (struct nvme_controller_list *)buf;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_attach,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) cntlist,
+ .cdw10 = attach ? 0 : 1,
+ .data_len = 0x1000,
+ };
+
+ memset(buf, 0, sizeof(buf));
+ cntlist->num = cpu_to_le16(num_ctrls);
+ for (i = 0; i < num_ctrls; i++)
+ cntlist->identifier[i] = cpu_to_le16(ctrlist[i]);
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true);
+}
+
+static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false);
+}
+
+static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_download_fw,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .cdw10 = (data_len >> 2) - 1,
+ .cdw11 = offset >> 2,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_activate_fw,
+ .cdw10 = (bpid << 31) | (action << 3) | slot,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_security_send,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = secp << 24 | spsp << 8 | nssf,
+ .cdw11 = tl,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_security_recv,
+ .nsid = nsid,
+ .cdw10 = secp << 24 | spsp << 8 | nssf,
+ .cdw11 = al,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_directive_send,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = data_len? (data_len >> 2) - 1 : 0,
+ .cdw11 = dspec << 16 | dtype << 8 | doper,
+ .cdw12 = dw12,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_directive_recv,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = data_len? (data_len >> 2) - 1 : 0,
+ .cdw11 = dspec << 16 | dtype << 8 | doper,
+ .cdw12 = dw12,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+ __u8 no_dealloc, __u32 ovrpat)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_sanitize_nvm,
+ .cdw10 = no_dealloc << 9 | oipbp << 8 |
+ owpass << NVME_SANITIZE_OWPASS_SHIFT |
+ ause << 3 | sanact,
+ .cdw11 = ovrpat,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_dev_self_test,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
Index: multipath-tools-130222/libmultipath/nvme-ioctl.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme-ioctl.h
@@ -0,0 +1,139 @@
+#ifndef _NVME_LIB_H
+#define _NVME_LIB_H
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include "linux/nvme_ioctl.h"
+#include "nvme.h"
+
+static int nvme_get_nsid(int fd);
+
+/* Generic passthrough */
+static int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+ struct nvme_passthru_cmd *cmd);
+
+static int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags,
+ __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3,
+ __u32 cdw10, __u32 cdw11, __u32 cdw12,
+ __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result);
+
+/* NVME_SUBMIT_IO */
+static int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+static int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+static int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+static int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+/* NVME_IO_CMD */
+static int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3,
+ __u32 cdw10, __u32 cdw11, __u32 cdw12,
+ __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout);
+
+static int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask);
+
+static int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb);
+
+static int nvme_flush(int fd, __u32 nsid);
+
+static int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+ __u16 nr_ranges);
+static struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs,
+ __u32 *llbas, __u64 *slbas,
+ __u16 nr_ranges);
+
+static int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+ bool iekey, __u64 crkey, __u64 nrkey);
+static int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+ bool iekey, __u64 crkey, __u64 nrkey);
+static int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+ bool iekey, __u64 crkey);
+static int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data);
+
+static int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data);
+static int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data);
+static int nvme_identify_ctrl(int fd, void *data);
+static int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data);
+static int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data);
+static int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data);
+static int nvme_identify_ns_descs(int fd, __u32 nsid, void *data);
+static int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data);
+static int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+ __u16 group_id, bool rae, __u32 data_len, void *data);
+static int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+ __u32 data_len, void *data);
+
+
+static int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+ int ctrl_gen, size_t log_page_size, __u64 offset);
+static int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log);
+static int nvme_changed_ns_list_log(int fd,
+ struct nvme_changed_ns_list_log *changed_ns_list_log);
+static int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log);
+static int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log);
+static int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+static int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log);
+static int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size);
+static int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log);
+static int nvme_endurance_log(int fd, __u16 group_id,
+ struct nvme_endurance_group_log *endurance_log);
+
+static int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10,
+ __u32 cdw11, __u32 cdw12, __u32 data_len, void *data,
+ __u32 *result);
+static int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+ bool save, __u32 data_len, void *data, __u32 *result);
+static int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel,
+ __u32 cdw11, __u32 data_len, void *data, __u32 *result);
+
+static int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+ __u8 pil, __u8 ms, __u32 timeout);
+
+static int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+ __u8 dps, __u8 nmic, __u32 *result);
+static int nvme_ns_delete(int fd, __u32 nsid);
+
+static int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls,
+ __u16 *ctrlist, bool attach);
+static int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+static int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+
+static int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data);
+static int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid);
+
+static int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result);
+static int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result);
+
+static int nvme_subsystem_reset(int fd);
+static int nvme_reset_controller(int fd);
+static int nvme_ns_rescan(int fd);
+
+static int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result);
+static int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result);
+static int nvme_get_properties(int fd, void **pbar);
+static int nvme_set_property(int fd, int offset, int value);
+static int nvme_get_property(int fd, int offset, uint64_t *value);
+static int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+ __u8 no_dealloc, __u32 ovrpat);
+static int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10);
+static int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log);
+#endif /* _NVME_LIB_H */
Index: multipath-tools-130222/libmultipath/nvme-lib.c
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme-lib.c
@@ -0,0 +1,49 @@
+#include <sys/types.h>
+/* avoid inclusion of standard API */
+#define _NVME_LIB_C 1
+#include "nvme-lib.h"
+#include "nvme-ioctl.c"
+#include "debug.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg)
+{
+ if (err > 0)
+ condlog(3, "%s: %s: NVMe status %d", dev, msg, err);
+ else if (err < 0)
+ condlog(3, "%s: %s: %s", dev, msg, strerror(errno));
+ return err;
+}
+
+int libmp_nvme_get_nsid(int fd)
+{
+ return nvme_get_nsid(fd);
+}
+
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl)
+{
+ return nvme_identify_ctrl(fd, ctrl);
+}
+
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+ struct nvme_id_ns *ns)
+{
+ return nvme_identify_ns(fd, nsid, present, ns);
+}
+
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+ return nvme_ana_log(fd, ana_log, ana_log_len, rgo);
+}
+
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl)
+{
+ int rc;
+ struct nvme_id_ctrl c;
+
+ rc = nvme_identify_ctrl(fd, &c);
+ if (rc < 0)
+ return rc;
+ if (ctrl)
+ *ctrl = c;
+ return c.cmic & (1 << 3) ? 1 : 0;
+}
Index: multipath-tools-130222/libmultipath/nvme-lib.h
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/nvme-lib.h
@@ -0,0 +1,39 @@
+#ifndef NVME_LIB_H
+#define NVME_LIB_H
+
+#include "nvme.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg);
+int libmp_nvme_get_nsid(int fd);
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl);
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+ struct nvme_id_ns *ns);
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+/*
+ * Identify controller, and return true if ANA is supported
+ * ctrl will be filled in if controller is identified, even w/o ANA
+ * ctrl may be NULL
+ */
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl);
+
+#ifndef _NVME_LIB_C
+/*
+ * In all files except nvme-lib.c, the nvme functions can be called
+ * by their usual name.
+ */
+#define nvme_get_nsid libmp_nvme_get_nsid
+#define nvme_identify_ctrl libmp_nvme_identify_ctrl
+#define nvme_identify_ns libmp_nvme_identify_ns
+#define nvme_ana_log libmp_nvme_ana_log
+/*
+ * Undefine these to avoid clashes with libmultipath's byteorder.h
+ */
+#undef cpu_to_le16
+#undef cpu_to_le32
+#undef cpu_to_le64
+#undef le16_to_cpu
+#undef le32_to_cpu
+#undef le64_to_cpu
+#endif
+
+#endif /* NVME_LIB_H */
Index: multipath-tools-130222/libmultipath/prio.h
===================================================================
--- multipath-tools-130222.orig/libmultipath/prio.h
+++ multipath-tools-130222/libmultipath/prio.h
@@ -29,6 +29,7 @@ struct path;
#define PRIO_RDAC "rdac"
#define PRIO_DATACORE "datacore"
#define PRIO_WEIGHTED_PATH "weightedpath"
+#define PRIO_ANA "ana"
/*
* Value used to mark the fact prio was not defined
Index: multipath-tools-130222/libmultipath/prioritizers/Makefile
===================================================================
--- multipath-tools-130222.orig/libmultipath/prioritizers/Makefile
+++ multipath-tools-130222/libmultipath/prioritizers/Makefile
@@ -2,6 +2,7 @@
#
# Copyright (C) 2007 Christophe Varoqui, <christophe.varoqui@opensvc.com>
#
+TOPDIR = ../..
include ../../Makefile.inc
LIBS = \
@@ -15,9 +16,10 @@ LIBS = \
libpriodatacore.so \
libpriohds.so \
libprioweightedpath.so \
+ libprioana.so \
libprioiet.so
-CFLAGS += -fPIC -I..
+CFLAGS += -fPIC -I.. -I$(nvmedir)
all: $(LIBS)
Index: multipath-tools-130222/libmultipath/prioritizers/ana.c
===================================================================
--- /dev/null
+++ multipath-tools-130222/libmultipath/prioritizers/ana.c
@@ -0,0 +1,236 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017 All Rights Reserved.
+ *
+ * ana.c
+ * Version 1.00
+ *
+ * Tool to make use of a NVMe-feature called Asymmetric Namespace Access.
+ * It determines the ANA state of a device and prints a priority value to stdout.
+ *
+ * Author(s): Cheng Jike <chengjike.cheng@huawei.com>
+ * Li Jie <lijie34@huawei.com>
+ *
+ * This file is released under the GPL version 2, or any later version.
+ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <libudev.h>
+
+#include "debug.h"
+#include "nvme-lib.h"
+#include "prio.h"
+#include "util.h"
+#include "structs.h"
+#include "def_func.h"
+
+enum {
+ ANA_ERR_GETCTRL_FAILED = 1,
+ ANA_ERR_NOT_NVME,
+ ANA_ERR_NOT_SUPPORTED,
+ ANA_ERR_GETANAS_OVERFLOW,
+ ANA_ERR_GETANAS_NOTFOUND,
+ ANA_ERR_GETANALOG_FAILED,
+ ANA_ERR_GETNSID_FAILED,
+ ANA_ERR_GETNS_FAILED,
+ ANA_ERR_NO_MEMORY,
+ ANA_ERR_NO_INFORMATION,
+};
+
+static const char *ana_errmsg[] = {
+ [ANA_ERR_GETCTRL_FAILED] = "couldn't get ctrl info",
+ [ANA_ERR_NOT_NVME] = "not an NVMe device",
+ [ANA_ERR_NOT_SUPPORTED] = "ANA not supported",
+ [ANA_ERR_GETANAS_OVERFLOW] = "buffer overflow in ANA log",
+ [ANA_ERR_GETANAS_NOTFOUND] = "NSID or ANAGRPID not found",
+ [ANA_ERR_GETANALOG_FAILED] = "couldn't get ana log",
+ [ANA_ERR_GETNSID_FAILED] = "couldn't get NSID",
+ [ANA_ERR_GETNS_FAILED] = "couldn't get namespace info",
+ [ANA_ERR_NO_MEMORY] = "out of memory",
+ [ANA_ERR_NO_INFORMATION] = "invalid fd",
+};
+
+static const char *anas_string[] = {
+ [NVME_ANA_OPTIMIZED] = "ANA Optimized State",
+ [NVME_ANA_NONOPTIMIZED] = "ANA Non-Optimized State",
+ [NVME_ANA_INACCESSIBLE] = "ANA Inaccessible State",
+ [NVME_ANA_PERSISTENT_LOSS] = "ANA Persistent Loss State",
+ [NVME_ANA_CHANGE] = "ANA Change state",
+};
+
+static const char *aas_print_string(int rc)
+{
+ rc &= 0xff;
+ if (rc >= 0 && rc < ARRAY_SIZE(anas_string) &&
+ anas_string[rc] != NULL)
+ return anas_string[rc];
+
+ return "invalid ANA state";
+}
+
+static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log,
+ size_t ana_log_len)
+{
+ void *base = ana_log;
+ struct nvme_ana_rsp_hdr *hdr = base;
+ struct nvme_ana_group_desc *ana_desc;
+ size_t offset = sizeof(struct nvme_ana_rsp_hdr);
+ __u32 nr_nsids;
+ size_t nsid_buf_size;
+ int i, j;
+
+ for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) {
+ ana_desc = base + offset;
+
+ offset += sizeof(*ana_desc);
+ if (offset > ana_log_len)
+ return -ANA_ERR_GETANAS_OVERFLOW;
+
+ nr_nsids = le32_to_cpu(ana_desc->nnsids);
+ nsid_buf_size = nr_nsids * sizeof(__le32);
+
+ offset += nsid_buf_size;
+ if (offset > ana_log_len)
+ return -ANA_ERR_GETANAS_OVERFLOW;
+
+ for (j = 0; j < nr_nsids; j++) {
+ if (nsid == le32_to_cpu(ana_desc->nsids[j]))
+ return ana_desc->state;
+ }
+
+ if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid))
+ return ana_desc->state;
+
+ }
+ return -ANA_ERR_GETANAS_NOTFOUND;
+}
+
+int get_ana_info(struct path * pp, unsigned int timeout)
+{
+ int rc;
+ __u32 nsid;
+ struct nvme_id_ctrl ctrl;
+ struct nvme_id_ns ns;
+ void *ana_log;
+ size_t ana_log_len;
+ bool is_anagrpid_const;
+
+ rc = nvme_id_ctrl_ana(pp->fd, &ctrl);
+ if (rc < 0) {
+ log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl");
+ return -ANA_ERR_GETCTRL_FAILED;
+ } else if (rc == 0)
+ return -ANA_ERR_NOT_SUPPORTED;
+
+ nsid = nvme_get_nsid(pp->fd);
+ if (nsid <= 0) {
+ log_nvme_errcode(rc, pp->dev, "nvme_get_nsid");
+ return -ANA_ERR_GETNSID_FAILED;
+ }
+ is_anagrpid_const = ctrl.anacap & (1 << 6);
+
+ /*
+ * Code copied from nvme-cli/nvme.c. We don't need to allocate an
+ * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most
+ * in one ANA group.
+ */
+ ana_log_len = sizeof(struct nvme_ana_rsp_hdr) +
+ le32_to_cpu(ctrl.nanagrpid)
+ * sizeof(struct nvme_ana_group_desc);
+
+ if (is_anagrpid_const) {
+ rc = nvme_identify_ns(pp->fd, nsid, 0, &ns);
+ if (rc) {
+ log_nvme_errcode(rc, pp->dev, "nvme_identify_ns");
+ return -ANA_ERR_GETNS_FAILED;
+ }
+ } else
+ ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32);
+
+ ana_log = malloc(ana_log_len);
+ if (!ana_log)
+ return -ANA_ERR_NO_MEMORY;
+ pthread_cleanup_push(free, ana_log);
+ rc = nvme_ana_log(pp->fd, ana_log, ana_log_len,
+ is_anagrpid_const ? NVME_ANA_LOG_RGO : 0);
+ if (rc) {
+ log_nvme_errcode(rc, pp->dev, "nvme_ana_log");
+ rc = -ANA_ERR_GETANALOG_FAILED;
+ } else
+ rc = get_ana_state(nsid,
+ is_anagrpid_const ?
+ le32_to_cpu(ns.anagrpid) : 0,
+ ana_log, ana_log_len);
+ pthread_cleanup_pop(1);
+ if (rc >= 0)
+ condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc,
+ aas_print_string(rc));
+ return rc;
+}
+
+/*
+ * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c)
+ * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018).
+ *
+ * Differences:
+ *
+ * - The ANA base spec defines no implicit or explicit (STPG) state management.
+ * If a state is encountered that doesn't allow normal I/O (all except
+ * OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a
+ * Access State Change Notice (can't do that in multipathd as we don't receive
+ * those), or retry commands in regular time intervals until ANATT is expired
+ * (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we
+ * can currently do.
+ *
+ * FIXME: Waiting for ANATT could be implemented with a "delayed failback"
+ * mechanism. The current "failback" method can't be used, as it would
+ * affect failback to every state, and here only failback to UNAVAILABLE
+ * should be delayed.
+ *
+ * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state.
+ * FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state
+ * in any way makes no sense (e.g. §8.19.6 - paths in this state shouldn't
+ * even be checked under "all paths down" conditions). Device mapper can,
+ * and will, select a PG for IO if it has non-failed paths, even if the
+ * PG has priority 0. We could avoid that only with an "ANA path checker".
+ *
+ * - ALUA has no CHANGE state. The ANA TP §8.18.3 / §8.19.4 suggests
+ * that CHANGE state should be treated in roughly the same way as
+ * INACCESSIBLE. Therefore we assign the same prio to it.
+ *
+ * - ALUA's LBA-dependent state has no ANA equivalent.
+ */
+
+int getprio(struct path *pp, char *args)
+{
+ int rc;
+
+ if (pp->fd < 0)
+ rc = -ANA_ERR_NO_INFORMATION;
+ else
+ rc = get_ana_info(pp, get_prio_timeout(60000));
+
+ switch (rc) {
+ case NVME_ANA_OPTIMIZED:
+ return 50;
+ case NVME_ANA_NONOPTIMIZED:
+ return 10;
+ case NVME_ANA_INACCESSIBLE:
+ case NVME_ANA_CHANGE:
+ return 1;
+ case NVME_ANA_PERSISTENT_LOSS:
+ return 0;
+ default:
+ break;
+ }
+ if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg))
+ condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]);
+ else
+ condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc);
+ return -1;
+}
+
+declare_nop_prio(initprio)
+declare_nop_prio(freeprio)
Index: multipath-tools-130222/libmultipath/util.h
===================================================================
--- multipath-tools-130222.orig/libmultipath/util.h
+++ multipath-tools-130222/libmultipath/util.h
@@ -18,6 +18,8 @@ int parse_prkey(char *ptr, uint64_t *prk
int parse_prkey_flags(char *ptr, uint64_t *prkey, uint8_t *flags);
int safe_write(int fd, const void *buf, size_t count);
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
#define safe_sprintf(var, format, args...) \
snprintf(var, sizeof(var), format, ##args) >= sizeof(var)
#define safe_snprintf(var, size, format, args...) \
Index: multipath-tools-130222/multipath/multipath.conf.5
===================================================================
--- multipath-tools-130222.orig/multipath/multipath.conf.5
+++ multipath-tools-130222/multipath/multipath.conf.5
@@ -196,6 +196,9 @@ Generate the path priority for LSI/Engen
Generate the path priority for Compaq/HP controller in
active/standby mode.
.TP
+.B ana
+Generate the path priority based on the NVMe ANA settings.
+.TP
.B hds
Generate the path priority for Hitachi HDS Modular storage arrays.
.TP
Index: multipath-tools-130222/libmultipath/propsel.c
===================================================================
--- multipath-tools-130222.orig/libmultipath/propsel.c
+++ multipath-tools-130222/libmultipath/propsel.c
@@ -5,6 +5,7 @@
*/
#include <stdio.h>
+#include "nvme-lib.h"
#include "checkers.h"
#include "memory.h"
#include "vector.h"
@@ -489,8 +490,13 @@ select_getuid (struct path * pp)
void
detect_prio(struct path * pp)
{
- if (detect_alua(pp))
- prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS);
+ if (pp->bus == SYSFS_BUS_NVME) {
+ if (nvme_id_ctrl_ana(pp->fd, NULL) == 1)
+ prio_get(&pp->prio, PRIO_ANA, DEFAULT_PRIO_ARGS);
+ } else if (pp->bus == SYSFS_BUS_SCSI) {
+ if (detect_alua(pp))
+ prio_get(&pp->prio, PRIO_ALUA, DEFAULT_PRIO_ARGS);
+ }
}
extern int
Index: multipath-tools-130222/libmultipath/hwtable.c
===================================================================
--- multipath-tools-130222.orig/libmultipath/hwtable.c
+++ multipath-tools-130222/libmultipath/hwtable.c
@@ -1178,6 +1178,7 @@ static struct hwentry default_hw[] = {
.vendor = "NVME",
.product = ".*",
.uid_attribute = "ID_WWN",
+ .detect_prio = DETECT_PRIO_ON,
.checker_name = NONE,
},
/*