Browse Source

iproute package update

Signed-off-by: basebuilder_pel7x64builder0 <basebuilder@powerel.org>
master
basebuilder_pel7x64builder0 6 years ago
parent
commit
8f4babb044
  1. 105
      SOURCES/0001-Confirm-success-for-each-tc-batch-command.patch
  2. 88
      SOURCES/0002-Really-fix-get_addr-and-get_prefix-error-messages.patch
  3. 102
      SOURCES/0003-tc-simple-Fix-documentation.patch
  4. 37
      SOURCES/0004-tc-fix-m_simple-usage.patch
  5. 96
      SOURCES/0005-bpf-Make-bytecode-file-reading-a-little-more-robust.patch
  6. 93
      SOURCES/0006-ss-Fix-for-added-diag-support-check.patch
  7. 31
      SOURCES/0007-tc-simple.8-Fix-reference-to-non-existing-tc-actions.patch
  8. 74
      SOURCES/0008-lib-bpf-Fix-bytecode-file-parsing.patch
  9. 27
      SOURCES/0009-tc-simple.8-Fix-one-more-reference-to-non-existing-t.patch
  10. 49
      SOURCES/0010-tc-m_xt-Prevent-a-segfault-in-libipt.patch
  11. 51
      SOURCES/0011-link_gre6-really-support-encaplimit-option.patch
  12. 40
      SOURCES/0012-tc-fix-typo-in-manpage.patch
  13. 52
      SOURCES/0013-ip-neigh-allow-flush-FAILED-neighbour-entry.patch
  14. 58
      SOURCES/0014-netns-avoid-directory-traversal.patch
  15. 58
      SOURCES/0015-utils-return-default-family-when-rtm_family-is-not-R.patch
  16. 49
      SOURCES/0016-link_gre6-Fix-for-changing-tclass-flowlabel.patch
  17. 92
      SOURCES/0017-netlink-Change-rtnl_dump_done-to-always-show-error.patch
  18. 47
      SOURCES/0018-libnetlink-drop-unused-parameter-to-rtnl_dump_done.patch
  19. 255
      SOURCES/0019-iproute-Add-support-for-extended-ack-to-rtnl_talk.patch
  20. 51
      SOURCES/0020-iplink-check-for-message-truncation-in-iplink_get.patch
  21. 48
      SOURCES/0021-iplink-double-the-buffer-size-also-in-iplink_get.patch
  22. 252
      SOURCES/0022-lib-libnetlink-re-malloc-buff-if-size-is-not-enough.patch
  23. 1604
      SOURCES/0023-lib-libnetlink-update-rtnl_talk-to-support-malloc-bu.patch
  24. 2048
      SOURCES/0024-Update-linux-headers.patch
  25. 151
      SOURCES/0025-devlink-Change-netlink-attribute-validation.patch
  26. 1597
      SOURCES/0026-devlink-Add-support-for-pipeline-debug-dpipe.patch
  27. 104
      SOURCES/0027-tc-Reflect-HW-offload-status.patch
  28. 39
      SOURCES/0028-pedit-Fix-a-typo-in-warning.patch
  29. 58
      SOURCES/0029-pedit-Do-not-allow-using-retain-for-too-big-fields.patch
  30. 55
      SOURCES/0030-pedit-Check-for-extended-capability-in-protocol-pars.patch
  31. 304
      SOURCES/0031-pedit-Introduce-ipv6-support.patch
  32. 189
      SOURCES/0032-devlink-Add-option-to-set-and-show-eswitch-encapsula.patch
  33. 154
      SOURCES/0033-tc-flower-add-support-for-tcp-flags.patch
  34. 60
      SOURCES/0034-iplink-Update-usage-in-help-message.patch
  35. 177
      SOURCES/0035-tc-flower-add-support-for-matching-on-ip-tos-and-ttl.patch
  36. 33934
      SOURCES/0036-iproute-build-more-easily-on-Android.patch
  37. 64
      SOURCES/0037-uapi-add-include-linux-vm_sockets_diag.h.patch
  38. 213
      SOURCES/0038-ss-allow-AF_FAMILY-constants-32.patch
  39. 398
      SOURCES/0039-ss-add-AF_VSOCK-support.patch
  40. 39
      SOURCES/0040-link_gre6-Detect-invalid-encaplimit-values.patch
  41. 42
      SOURCES/0041-man-tc-csum.8-Fix-inconsistency-in-example-descripti.patch
  42. 42
      SOURCES/0042-tc-fix-command-tc-actions-del-hang-issue.patch
  43. 43
      SOURCES/0043-ip-link-Fix-use-after-free-in-nl_get_ll_addr_len.patch
  44. 1
      SOURCES/avpkt
  45. 5
      SOURCES/cbq-0000.example
  46. 17
      SOURCES/rt_dsfield.deprecated
  47. 1484
      SPECS/iproute.spec

105
SOURCES/0001-Confirm-success-for-each-tc-batch-command.patch

@ -0,0 +1,105 @@ @@ -0,0 +1,105 @@
From d9a1dc236a9bcc06f04d609e2654f76c6a9459e7 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Mon, 14 Dec 2015 21:02:18 +0100
Subject: [PATCH] Confirm success for each tc -batch command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=977844
Upstream Status: Rejected.

The original patch has been extended by the related man page additions
which were contained in another local patch.

commit 8c5024483cbbfdc092945a00be05d917485b9af3
Author: Petr Písař <ppisar@redhat.com>
Date: Thu Sep 19 11:25:49 2013 +0200

Confirm success for each tc -batch command

If `tc -force -batch' is fed by a controlling program from a pipe,
it's not possible to recognize when a command has been processes
successfully.

This patch adds an optional `-OK' option to the tc(8) tool, so `tc
-force -OK -batch' will print "OK\n" to standard output on each
successfully completed tc command.

Signed-off-by: Petr Písař <ppisar@redhat.com>

Signed-off-by: Phil Sutter <psutter@redhat.com>
---
man/man8/tc.8 | 8 +++++++-
tc/tc.c | 8 +++++++-
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/man/man8/tc.8 b/man/man8/tc.8
index f96911a..a341a8f 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -62,7 +62,7 @@ tc \- show / manipulate traffic control settings
.P
.ti 8
.IR OPTIONS " := {"
-\fB[ -force ] -b\fR[\fIatch\fR] \fB[ filename ] \fR|
+\fB[ -force ] [ -OK ] -b\fR[\fIatch\fR] \fB[ filename ] \fR|
\fB[ \fB-n\fR[\fIetns\fR] name \fB] \fR|
\fB[ \fB-nm \fR| \fB-nam\fR[\fIes\fR] \fB] \fR|
\fB[ \fR{ \fB-cf \fR| \fB-c\fR[\fIonf\fR] \fR} \fB[ filename ] \fB] \fR}
@@ -603,6 +603,12 @@ don't terminate tc on errors in batch mode.
If there were any errors during execution of the commands, the application return code will be non zero.
.TP
+.BR "\-OK"
+in batch mode, print
+.B OK
+and a new line on standard output after each successfully interpreted command.
+
+.TP
.BR "\-n" , " \-net" , " \-netns " <NETNS>
switches
.B tc
diff --git a/tc/tc.c b/tc/tc.c
index 8e64a82..360c9f1 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -42,6 +42,7 @@ int batch_mode;
int resolve_hosts;
int use_iec;
int force;
+int ok;
bool use_names;
static char *conf_file;
@@ -188,7 +189,7 @@ noexist:
static void usage(void)
{
fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n"
- " tc [-force] -batch filename\n"
+ " tc [-force] [-OK] -batch filename\n"
"where OBJECT := { qdisc | class | filter | action | monitor | exec }\n"
" OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -p[retty] | -b[atch] [filename] | -n[etns] name |\n"
" -nm | -nam[es] | { -cf | -conf } path }\n");
@@ -254,6 +255,9 @@ static int batch(const char *name)
ret = 1;
if (!force)
break;
+ } else if (ok) {
+ printf("OK\n");
+ fflush(stdout);
}
}
if (line)
@@ -293,6 +297,8 @@ int main(int argc, char **argv)
return 0;
} else if (matches(argv[1], "-force") == 0) {
++force;
+ } else if (matches(argv[1], "-OK") == 0) {
+ ++ok;
} else if (matches(argv[1], "-batch") == 0) {
argc--; argv++;
if (argc <= 1)
--
1.8.3.1

88
SOURCES/0002-Really-fix-get_addr-and-get_prefix-error-messages.patch

@ -0,0 +1,88 @@ @@ -0,0 +1,88 @@
From aed8229c0bec5c56deaf1ea2047ca0263732477f Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 11 Aug 2017 11:11:32 +0200
Subject: [PATCH] Really fix get_addr() and get_prefix() error messages

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477206
Upstream Status: iproute2.git commit 34705c807a389

commit 34705c807a38909247d1bb29ccdffe42e5c1dab3
Author: Phil Sutter <phil@nwl.cc>
Date: Tue Aug 1 18:36:11 2017 +0200

Really fix get_addr() and get_prefix() error messages

Both functions take the desired address family as a parameter. So using
that to notify the user what address family was expected is correct,
unlike using dst->family which will tell the user only what address
family was specified.

The situation which commit 334af76143368 tried to fix was when 'ip'
would accept addresses from multiple families. In that case, the family
parameter is set to AF_UNSPEC so that get_addr_1() may accept any valid
address.

This patch introduces a wrapper around family_name() which returns the
string "any valid" for AF_UNSPEC instead of the three question marks
unsuitable for use in error messages.

Tests for AF_UNSPEC:

| # ip a a 256.10.166.1/24 dev d0
| Error: any valid prefix is expected rather than "256.10.166.1/24".

| # ip neighbor add proxy 2001:db8::g dev d0
| Error: any valid address is expected rather than "2001:db8::g".

Tests for explicit address family:

| # ip -6 addrlabel add prefix 1.1.1.1/24 label 123
| Error: inet6 prefix is expected rather than "1.1.1.1/24".

| # ip -4 addrlabel add prefix dead:beef::1/24 label 123
| Error: inet prefix is expected rather than "dead:beef::1/24".

Reported-by: Jaroslav Aster <jaster@redhat.com>
Fixes: 334af76143368 ("fix get_addr() and get_prefix() error messages")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
lib/utils.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/utils.c b/lib/utils.c
index 6d5642f..7d6ee53 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -613,12 +613,19 @@ done:
return err;
}
+static const char *family_name_verbose(int family)
+{
+ if (family == AF_UNSPEC)
+ return "any valid";
+ return family_name(family);
+}
+
int get_addr(inet_prefix *dst, const char *arg, int family)
{
if (get_addr_1(dst, arg, family)) {
fprintf(stderr,
"Error: %s address is expected rather than \"%s\".\n",
- family_name(dst->family), arg);
+ family_name_verbose(family), arg);
exit(1);
}
return 0;
@@ -636,7 +643,7 @@ int get_prefix(inet_prefix *dst, char *arg, int family)
if (get_prefix_1(dst, arg, family)) {
fprintf(stderr,
"Error: %s prefix is expected rather than \"%s\".\n",
- family_name(dst->family), arg);
+ family_name_verbose(family), arg);
exit(1);
}
return 0;
--
1.8.3.1

102
SOURCES/0003-tc-simple-Fix-documentation.patch

@ -0,0 +1,102 @@ @@ -0,0 +1,102 @@
From 3d016b2ca5862b3f47da5b28aca43bd96d5c3c49 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 11 Aug 2017 11:13:26 +0200
Subject: [PATCH] tc-simple: Fix documentation

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477523
Upstream Status: iproute2.git commit e2a055dd23f0e

commit e2a055dd23f0e7527a987c24687cb6b0b86f0cde
Author: Phil Sutter <phil@nwl.cc>
Date: Thu Aug 3 17:00:51 2017 +0200

tc-simple: Fix documentation

- CONTROL has to come last, otherwise 'index' applies to gact and not
simple itself.
- Man page wasn't updated to reflect syntax changes.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
man/man8/tc-simple.8 | 29 ++++++++++++++++++++++++++---
tc/m_simple.c | 4 ++--
2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/man/man8/tc-simple.8 b/man/man8/tc-simple.8
index 2206dc3..7363ab5 100644
--- a/man/man8/tc-simple.8
+++ b/man/man8/tc-simple.8
@@ -6,15 +6,37 @@ simple - basic example action
.in +8
.ti -8
.BR tc " ... " "action simple"
-.I STRING
+[
+.BI sdata " STRING"
+] [
+.BI index " INDEX"
+] [
+.I CONTROL
+]
+
+.ti -8
+.IR CONTROL " := {"
+.BR reclassify " | " pipe " | " drop " | " continue " | " ok " }"
+
.SH DESCRIPTION
This is a pedagogical example rather than an actually useful action. Upon every access, it prints the given
.I STRING
which may be of arbitrary length.
.SH OPTIONS
.TP
-.I STRING
+.BI sdata " STRING"
The actual string to print.
+.TP
+.BI index " INDEX"
+Optional action index value.
+.TP
+.I CONTROL
+Indicate how
+.B tc
+should proceed after executing the action. For a description of the possible
+.I CONTROL
+values, see
+.BR tc-actions (8).
.SH EXAMPLES
The following example makes the kernel yell "Incoming ICMP!" every time it sees
an incoming ICMP on eth0. Steps are:
@@ -36,7 +58,7 @@ display stats again and observe increment by 1
.EX
hadi@noma1:$ tc qdisc add dev eth0 ingress
hadi@noma1:$tc filter add dev eth0 parent ffff: protocol ip prio 5 \\
- u32 match ip protocol 1 0xff flowid 1:1 action simple "Incoming ICMP"
+ u32 match ip protocol 1 0xff flowid 1:1 action simple sdata "Incoming ICMP"
hadi@noma1:$ sudo tc -s filter ls dev eth0 parent ffff:
filter protocol ip pref 5 u32
@@ -74,3 +96,4 @@ display stats again and observe increment by 1
.EE
.SH SEE ALSO
.BR tc (8)
+.BR tc-actions (8)
diff --git a/tc/m_simple.c b/tc/m_simple.c
index 3a8bd91..ab63384 100644
--- a/tc/m_simple.c
+++ b/tc/m_simple.c
@@ -81,10 +81,10 @@
#endif
static void explain(void)
{
- fprintf(stderr, "Usage:... simple [sdata STRING] [CONTROL] [index INDEX]\n");
+ fprintf(stderr, "Usage:... simple [sdata STRING] [index INDEX] [CONTROL]\n");
fprintf(stderr, "\tSTRING being an arbitrary string\n"
- "\tCONTROL := reclassify|pipe|drop|continue|ok\n"
"\tINDEX := optional index value used\n");
+ "\tCONTROL := reclassify|pipe|drop|continue|ok\n"
}
static void usage(void)
--
1.8.3.1

37
SOURCES/0004-tc-fix-m_simple-usage.patch

@ -0,0 +1,37 @@ @@ -0,0 +1,37 @@
From d203110b883afafa58b735a3e94c71f255db7608 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 11 Aug 2017 11:13:26 +0200
Subject: [PATCH] tc: fix m_simple usage

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477523
Upstream Status: iproute2.git commit 620fc6696d4f4

commit 620fc6696d4f4e9ad540a45892873b0382907739
Author: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu Aug 3 16:10:18 2017 -0700

tc: fix m_simple usage

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
tc/m_simple.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tc/m_simple.c b/tc/m_simple.c
index ab63384..65e48ad 100644
--- a/tc/m_simple.c
+++ b/tc/m_simple.c
@@ -83,8 +83,8 @@ static void explain(void)
{
fprintf(stderr, "Usage:... simple [sdata STRING] [index INDEX] [CONTROL]\n");
fprintf(stderr, "\tSTRING being an arbitrary string\n"
- "\tINDEX := optional index value used\n");
- "\tCONTROL := reclassify|pipe|drop|continue|ok\n"
+ "\tINDEX := optional index value used\n"
+ "\tCONTROL := reclassify|pipe|drop|continue|ok\n");
}
static void usage(void)
--
1.8.3.1

96
SOURCES/0005-bpf-Make-bytecode-file-reading-a-little-more-robust.patch

@ -0,0 +1,96 @@ @@ -0,0 +1,96 @@
From 91cda136ef27402256dbf85434374b43ab52d932 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 11 Aug 2017 11:15:30 +0200
Subject: [PATCH] bpf: Make bytecode-file reading a little more robust

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477491
Upstream Status: iproute2.git commit 3da3ebfca85b8

commit 3da3ebfca85b8f1e8252b898453d8cb383c5c398
Author: Phil Sutter <phil@nwl.cc>
Date: Wed Aug 2 14:57:56 2017 +0200

bpf: Make bytecode-file reading a little more robust

bpf_parse_string() will now correctly handle:

- Extraneous whitespace,
- OPs on multiple lines and
- overlong file names.

The added feature of allowing to have OPs on multiple lines (like e.g.
tcpdump prints them) is rather a side effect of fixing detection of
malformed bytecode files having random content on a second line, like
e.g.:

| 4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0
| foobar

Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
lib/bpf.c | 32 ++++++++++++++++++++++++--------
1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/lib/bpf.c b/lib/bpf.c
index 04ee1ab..73dac5c 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -160,11 +160,11 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
if (from_file) {
size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
- char *tmp_string, *last;
+ char *tmp_string, *pos, c, c_prev = ' ';
FILE *fp;
tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
- tmp_string = calloc(1, tmp_len);
+ tmp_string = pos = calloc(1, tmp_len);
if (tmp_string == NULL)
return -ENOMEM;
@@ -175,17 +175,33 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
return -ENOENT;
}
- if (!fgets(tmp_string, tmp_len, fp)) {
+ while ((c = fgetc(fp)) != EOF) {
+ switch (c) {
+ case '\n':
+ if (c_prev != ',')
+ *(pos++) = ',';
+ break;
+ case ' ':
+ case '\t':
+ if (c_prev != ' ')
+ *(pos++) = c;
+ break;
+ default:
+ *(pos++) = c;
+ }
+ if (pos - tmp_string == tmp_len)
+ break;
+ c_prev = c;
+ }
+
+ if (!feof(fp)) {
free(tmp_string);
fclose(fp);
- return -EIO;
+ return -E2BIG;
}
fclose(fp);
-
- last = &tmp_string[strlen(tmp_string) - 1];
- if (*last == '\n')
- *last = 0;
+ *pos = 0;
*need_release = true;
*bpf_string = tmp_string;
--
1.8.3.1

93
SOURCES/0006-ss-Fix-for-added-diag-support-check.patch

@ -0,0 +1,93 @@ @@ -0,0 +1,93 @@
From 2dc48cc4101b9788dcafd38b07a82f8c91b4d3f6 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Thu, 31 Aug 2017 14:23:11 +0200
Subject: [PATCH] ss: Fix for added diag support check

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1487152
Upstream Status: iproute2.git commit 6c6bbc30f4e7f

commit 6c6bbc30f4e7fedc74381627f7ec86d26050b404
Author: Phil Sutter <phil@nwl.cc>
Date: Mon Aug 28 19:31:22 2017 +0200

ss: Fix for added diag support check

Commit 9f66764e308e9 ("libnetlink: Add test for error code returned from
netlink reply") changed rtnl_dump_filter_l() to return an error in case
NLMSG_DONE would contain one, even if it was ENOENT.

This in turn breaks ss when it tries to dump DCCP sockets on a system
without support for it: The function tcp_show(), which is shared between
TCP and DCCP, will start parsing /proc since inet_show_netlink() returns
an error - yet it parses /proc/net/tcp which doesn't make sense for DCCP
sockets at all.

On my system, a call to 'ss' without further arguments prints the list
of connected TCP sockets twice.

Fix this by introducing a dedicated function dccp_show() which does not
have a fallback to /proc, just like sctp_show(). And since tcp_show()
is no longer "multi-purpose", drop it's socktype parameter.

Fixes: 9f66764e308e9 ("libnetlink: Add test for error code returned from netlink reply")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
misc/ss.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index 12763c9..b84baf3 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -2735,7 +2735,7 @@ static int tcp_show_netlink_file(struct filter *f)
}
}
-static int tcp_show(struct filter *f, int socktype)
+static int tcp_show(struct filter *f)
{
FILE *fp = NULL;
char *buf = NULL;
@@ -2750,7 +2750,7 @@ static int tcp_show(struct filter *f, int socktype)
return tcp_show_netlink_file(f);
if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT")
- && inet_show_netlink(f, NULL, socktype) == 0)
+ && inet_show_netlink(f, NULL, IPPROTO_TCP) == 0)
return 0;
/* Sigh... We have to parse /proc/net/tcp... */
@@ -2818,6 +2818,18 @@ outerr:
} while (0);
}
+static int dccp_show(struct filter *f)
+{
+ if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
+ return 0;
+
+ if (!getenv("PROC_NET_DCCP") && !getenv("PROC_ROOT")
+ && inet_show_netlink(f, NULL, IPPROTO_DCCP) == 0)
+ return 0;
+
+ return 0;
+}
+
static int sctp_show(struct filter *f)
{
if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
@@ -4368,9 +4380,9 @@ int main(int argc, char *argv[])
if (current_filter.dbs & (1<<UDP_DB))
udp_show(&current_filter);
if (current_filter.dbs & (1<<TCP_DB))
- tcp_show(&current_filter, IPPROTO_TCP);
+ tcp_show(&current_filter);
if (current_filter.dbs & (1<<DCCP_DB))
- tcp_show(&current_filter, IPPROTO_DCCP);
+ dccp_show(&current_filter);
if (current_filter.dbs & (1<<SCTP_DB))
sctp_show(&current_filter);
--
1.8.3.1

31
SOURCES/0007-tc-simple.8-Fix-reference-to-non-existing-tc-actions.patch

@ -0,0 +1,31 @@ @@ -0,0 +1,31 @@
From 4917021d1ebea2b75cdcf31272452aa5cc3ff7ec Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 1 Sep 2017 13:05:45 +0200
Subject: [PATCH] tc-simple.8: Fix reference to non-existing tc-actions.8

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477523
Upstream Status: RHEL-only

The referenced man page doesn't exist in RHEL iproute package, so better
refer to an existing one which also contains the CONTROL value
description.
---
man/man8/tc-simple.8 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/man8/tc-simple.8 b/man/man8/tc-simple.8
index 7363ab5..a0deb0b 100644
--- a/man/man8/tc-simple.8
+++ b/man/man8/tc-simple.8
@@ -36,7 +36,7 @@ Indicate how
should proceed after executing the action. For a description of the possible
.I CONTROL
values, see
-.BR tc-actions (8).
+.BR tc-pedit (8).
.SH EXAMPLES
The following example makes the kernel yell "Incoming ICMP!" every time it sees
an incoming ICMP on eth0. Steps are:
--
1.8.3.1

74
SOURCES/0008-lib-bpf-Fix-bytecode-file-parsing.patch

@ -0,0 +1,74 @@ @@ -0,0 +1,74 @@
From 584ca9f67952162dfdd02d984aa12640e45a4235 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Wed, 6 Sep 2017 11:53:53 +0200
Subject: [PATCH] lib/bpf: Fix bytecode-file parsing

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477491
Upstream Status: iproute2.git commit 7c87c7fed18d1

commit 7c87c7fed18d1162e045c8331cb68fa440bc5728
Author: Phil Sutter <phil@nwl.cc>
Date: Tue Aug 29 17:09:45 2017 +0200

lib/bpf: Fix bytecode-file parsing

The signedness of char type is implementation dependent, and there are
architectures on which it is unsigned by default. In that case, the
check whether fgetc() returned EOF failed because the return value was
assigned an (unsigned) char variable prior to comparison with EOF (which
is defined to -1). Fix this by using int as type for 'c' variable, which
also matches the declaration of fgetc().

While being at it, fix the parser logic to correctly handle multiple
empty lines and consecutive whitespace and tab characters to further
improve the parser's robustness. Note that this will still detect double
separator characters, so doesn't soften up the parser too much.

Fixes: 3da3ebfca85b8 ("bpf: Make bytecode-file reading a little more robust")
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Phil Sutter <phil@nwl.cc>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
lib/bpf.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/bpf.c b/lib/bpf.c
index 73dac5c..3aabf44 100644
--- a/lib/bpf.c
+++ b/lib/bpf.c
@@ -160,8 +160,9 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
if (from_file) {
size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
- char *tmp_string, *pos, c, c_prev = ' ';
+ char *tmp_string, *pos, c_prev = ' ';
FILE *fp;
+ int c;
tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
tmp_string = pos = calloc(1, tmp_len);
@@ -180,18 +181,20 @@ static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
case '\n':
if (c_prev != ',')
*(pos++) = ',';
+ c_prev = ',';
break;
case ' ':
case '\t':
if (c_prev != ' ')
*(pos++) = c;
+ c_prev = ' ';
break;
default:
*(pos++) = c;
+ c_prev = c;
}
if (pos - tmp_string == tmp_len)
break;
- c_prev = c;
}
if (!feof(fp)) {
--
1.8.3.1

27
SOURCES/0009-tc-simple.8-Fix-one-more-reference-to-non-existing-t.patch

@ -0,0 +1,27 @@ @@ -0,0 +1,27 @@
From 3905b2d8f676601c022804d197be9165dacff11c Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Wed, 6 Sep 2017 15:44:19 +0200
Subject: [PATCH] tc-simple.8: Fix one more reference to non-existing
tc-actions.8

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477523
Upstream Status: RHEL-only

Previous fix missed to update the SEE ALSO section as well.
---
man/man8/tc-simple.8 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/man8/tc-simple.8 b/man/man8/tc-simple.8
index a0deb0b..beab313 100644
--- a/man/man8/tc-simple.8
+++ b/man/man8/tc-simple.8
@@ -96,4 +96,4 @@ display stats again and observe increment by 1
.EE
.SH SEE ALSO
.BR tc (8)
-.BR tc-actions (8)
+.BR tc-pedit (8)
--
1.8.3.1

49
SOURCES/0010-tc-m_xt-Prevent-a-segfault-in-libipt.patch

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
From 074062808c630f2efb55c7093d510b44a38e74e5 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Thu, 14 Sep 2017 15:27:47 +0200
Subject: [PATCH] tc: m_xt: Prevent a segfault in libipt

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1465599
Upstream Status: iproute2.git commit f6fc1055e41a8

commit f6fc1055e41a8a924313c336b39b9ffe0c86938b
Author: Phil Sutter <psutter@redhat.com>
Date: Tue May 23 15:40:57 2017 +0200

tc: m_xt: Prevent a segfault in libipt

This happens with NAT targets, such as SNAT, DNAT and MASQUERADE. These
are still not usable with this patch, but at least tc doesn't crash
anymore when one tries to use them.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
tc/m_xt.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tc/m_xt.c b/tc/m_xt.c
index e59df8e..ad52d23 100644
--- a/tc/m_xt.c
+++ b/tc/m_xt.c
@@ -146,6 +146,9 @@ static int parse_ipt(struct action_util *a, int *argc_p,
char ***argv_p, int tca_id, struct nlmsghdr *n)
{
struct xtables_target *m = NULL;
+#if XTABLES_VERSION_CODE >= 6
+ struct ipt_entry fw = {};
+#endif
struct rtattr *tail;
int c;
@@ -206,7 +209,7 @@ static int parse_ipt(struct action_util *a, int *argc_p,
default:
#if XTABLES_VERSION_CODE >= 6
if (m != NULL && m->x6_parse != NULL) {
- xtables_option_tpcall(c, argv, 0, m, NULL);
+ xtables_option_tpcall(c, argv, 0, m, &fw);
#else
if (m != NULL && m->parse != NULL) {
m->parse(c - m->option_offset, argv, 0,
--
1.8.3.1

51
SOURCES/0011-link_gre6-really-support-encaplimit-option.patch

@ -0,0 +1,51 @@ @@ -0,0 +1,51 @@
From 2db276543a03633a61ba0815a01c8bb2846830ab Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Thu, 14 Sep 2017 15:30:37 +0200
Subject: [PATCH] link_gre6: really support encaplimit option

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1459600
Upstream Status: iproute2.git commit a11b7b71a6eba
Conflicts: Context change due to missing commit ad4b1425c3182
("iplink: Expose IFLA_*_FWMARK attributes for supported link
types").

commit a11b7b71a6eba4ee80e931e4f75321a0cf0116f1
Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed Jun 14 18:45:42 2017 +0200

link_gre6: really support encaplimit option

This option is documented in gre6 help, but was not supported.

Fixes: af89576d7a8c ("iproute2: GRE over IPv6 tunnel support.")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
ip/link_gre6.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index 1b4fb05..76416b2 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -339,6 +339,18 @@ get_failed:
encapflags |= TUNNEL_ENCAP_FLAG_REMCSUM;
} else if (strcmp(*argv, "noencap-remcsum") == 0) {
encapflags &= ~TUNNEL_ENCAP_FLAG_REMCSUM;
+ } else if (strcmp(*argv, "encaplimit") == 0) {
+ NEXT_ARG();
+ if (strcmp(*argv, "none") == 0) {
+ flags |= IP6_TNL_F_IGN_ENCAP_LIMIT;
+ } else {
+ __u8 uval;
+
+ if (get_u8(&uval, *argv, 0) < -1)
+ invarg("invalid ELIM", *argv);
+ encap_limit = uval;
+ flags &= ~IP6_TNL_F_IGN_ENCAP_LIMIT;
+ }
} else
usage();
argc--; argv++;
--
1.8.3.1

40
SOURCES/0012-tc-fix-typo-in-manpage.patch

@ -0,0 +1,40 @@ @@ -0,0 +1,40 @@
From beb8e1aa7ed08f86fb87ff58f7c69aaa2b68b862 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Thu, 14 Sep 2017 15:38:46 +0200
Subject: [PATCH] tc: fix typo in manpage

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1417162
Upstream Status: iproute2.git commit b09515553fded

commit b09515553fded944713955815a3f1cc855384abd
Author: Matteo Croce <mcroce@redhat.com>
Date: Fri Jul 7 15:08:33 2017 +0200

tc: fix typo in manpage

Fix a typo in the 'tc' manpage and reword some sentences.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
man/man8/tc-csum.8 | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/man8/tc-csum.8 b/man/man8/tc-csum.8
index 718301d..409ab71 100644
--- a/man/man8/tc-csum.8
+++ b/man/man8/tc-csum.8
@@ -29,9 +29,9 @@ csum - checksum update action
The
.B csum
action triggers checksum recalculation of specified packet headers. It is
-commonly used after packet editing using the
+commonly used to fix incorrect checksums after the
.B pedit
-action to fix for then incorrect checksums.
+action has modified the packet content.
.SH OPTIONS
.TP
.I TARGET
--
1.8.3.1

52
SOURCES/0013-ip-neigh-allow-flush-FAILED-neighbour-entry.patch

@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
From 3b6fd8227cbb03b5b211d2cb53534ad405673668 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Wed, 2 Aug 2017 13:57:17 +0200
Subject: [PATCH] ip neigh: allow flush FAILED neighbour entry

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1469945
Tested: locally using proper reproducer
Upstream Status: merged 37a5f7c5

commit 37a5f7c571623059ae671992f72feaa444a6ffc8
Author: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri Jun 16 11:31:52 2017 +0800

ip neigh: allow flush FAILED neighbour entry

After upstream commit 5071034e4af7 ('neigh: Really delete an arp/neigh entry
on "ip neigh delete" or "arp -d"'), we could delete a single FAILED neighbour
entry now. But `ip neigh flush` still skip the FAILED entry.

Move the filter after first round flush so we can flush FAILED entry on fixed
kernel and also do not keep retrying on old kernel.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
ip/ipneigh.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ip/ipneigh.c b/ip/ipneigh.c
index 4d8fc85..9c38a60 100644
--- a/ip/ipneigh.c
+++ b/ip/ipneigh.c
@@ -445,7 +445,6 @@ static int do_show_or_flush(int argc, char **argv, int flush)
filter.flushb = flushb;
filter.flushp = 0;
filter.flushe = sizeof(flushb);
- filter.state &= ~NUD_FAILED;
while (round < MAX_ROUNDS) {
if (rtnl_dump_request_n(&rth, &req.n) < 0) {
@@ -474,6 +473,7 @@ static int do_show_or_flush(int argc, char **argv, int flush)
printf("\n*** Round %d, deleting %d entries ***\n", round, filter.flushed);
fflush(stdout);
}
+ filter.state &= ~NUD_FAILED;
}
printf("*** Flush not complete bailing out after %d rounds\n",
MAX_ROUNDS);
--
1.8.3.1

58
SOURCES/0014-netns-avoid-directory-traversal.patch

@ -0,0 +1,58 @@ @@ -0,0 +1,58 @@
From 74061958f56a4626a3a146c72f16e43012e828f1 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Thu, 14 Sep 2017 15:39:23 +0200
Subject: [PATCH] netns: avoid directory traversal

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1468529
Upstream Status: iproute2.git commit 79928fd0552b5

commit 79928fd0552b520aa36a22e71144d10a32f7e4fe
Author: Matteo Croce <mcroce@redhat.com>
Date: Thu Jul 20 00:36:32 2017 +0200

netns: avoid directory traversal

ip netns keeps track of created namespaces with bind mounts named
/var/run/netns/<namespace>. No input sanitization is done, allowing creation and
deletion of files relatives to /var/run/netns or, if the path is non existent or
invalid, allows to create "untracked" namespaces (invisible to the tool).

This commit denies creation or deletion of namespaces with names contaning
"/" or matching exactly "." or "..".

Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
ip/ipnetns.c | 10 ++++++++++
1 file changed, 10 insertions(+)

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 0b0378a..4254994 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -766,6 +766,11 @@ static int netns_monitor(int argc, char **argv)
return 0;
}
+static int invalid_name(const char *name)
+{
+ return strchr(name, '/') || !strcmp(name, ".") || !strcmp(name, "..");
+}
+
int do_netns(int argc, char **argv)
{
netns_nsid_socket_init();
@@ -775,6 +780,11 @@ int do_netns(int argc, char **argv)
return netns_list(0, NULL);
}
+ if (argc > 1 && invalid_name(argv[1])) {
+ fprintf(stderr, "Invalid netns name \"%s\"\n", argv[1]);
+ exit(-1);
+ }
+
if ((matches(*argv, "list") == 0) || (matches(*argv, "show") == 0) ||
(matches(*argv, "lst") == 0)) {
netns_map_init();
--
1.8.3.1

58
SOURCES/0015-utils-return-default-family-when-rtm_family-is-not-R.patch

@ -0,0 +1,58 @@ @@ -0,0 +1,58 @@
From 420957e4c56f65703c6f2f24da0ea35c6b7bbcda Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 27 Jul 2017 21:52:30 +0200
Subject: [PATCH] utils: return default family when rtm_family is not
RTNL_FAMILY_IPMR/IP6MR

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1475762
Upstream Status: iproute2.git commit 5ce897a03bfd

commit 5ce897a03bfda76dc66dc1acfa014fc0e3d3022a
Author: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu Jul 27 17:44:15 2017 +0800

utils: return default family when rtm_family is not RTNL_FAMILY_IPMR/IP6MR

When we get a multicast route, the rtm_type is RTN_MULTICAST, but the
rtm_family may be AF_INET. If we only check the type with RTNL_FAMILY_IPMR,
we will get malformed address. e.g.

+ ip -4 route add multicast 172.111.1.1 dev em1 table main

Before fix:
+ ip route list type multicast table main
multicast ac6f:101:800:400:400:0:3c00:0 dev em1 scope link

After fix:
+ ip route list type multicast table main
multicast 172.111.1.1 dev em1 scope link

Fixes: 56e3eb4c3400 ("ip: route: fix multicast route dumps")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
lib/utils.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/lib/utils.c b/lib/utils.c
index 7d6ee53..9f55391 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -1219,5 +1219,11 @@ int get_real_family(int rtm_type, int rtm_family)
if (rtm_type != RTN_MULTICAST)
return rtm_family;
- return rtm_family == RTNL_FAMILY_IPMR ? AF_INET : AF_INET6;
+ if (rtm_family == RTNL_FAMILY_IPMR)
+ return AF_INET;
+
+ if (rtm_family == RTNL_FAMILY_IP6MR)
+ return AF_INET6;
+
+ return rtm_family;
}
--
1.8.3.1

49
SOURCES/0016-link_gre6-Fix-for-changing-tclass-flowlabel.patch

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
From 449517f7769dde4905564ce17e126bfd4e1f7147 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 6 Oct 2017 17:27:09 +0200
Subject: [PATCH] link_gre6: Fix for changing tclass/flowlabel

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1487486
Upstream Status: iproute2.git commit e7fefb3214b5a

commit e7fefb3214b5a1ed030cab9df513560c503a9851
Author: Phil Sutter <phil@nwl.cc>
Date: Fri Sep 1 16:08:08 2017 +0200

link_gre6: Fix for changing tclass/flowlabel

When trying to change tclass or flowlabel of a GREv6 tunnel which has
the respective value set already, the code accidentally bitwise OR'ed
the old and the new value, leading to unexpected results. Fix this by
clearing the relevant bits of flowinfo variable prior to assigning the
new value.

Fixes: af89576d7a8c4 ("iproute2: GRE over IPv6 tunnel support.")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
ip/link_gre6.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index 76416b2..fe3ab64 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -282,6 +282,7 @@ get_failed:
else {
if (get_u8(&uval, *argv, 16))
invarg("invalid TClass", *argv);
+ flowinfo &= ~IP6_FLOWINFO_TCLASS;
flowinfo |= htonl((__u32)uval << 20) & IP6_FLOWINFO_TCLASS;
flags &= ~IP6_TNL_F_USE_ORIG_TCLASS;
}
@@ -297,6 +298,7 @@ get_failed:
invarg("invalid Flowlabel", *argv);
if (uval > 0xFFFFF)
invarg("invalid Flowlabel", *argv);
+ flowinfo &= ~IP6_FLOWINFO_FLOWLABEL;
flowinfo |= htonl(uval) & IP6_FLOWINFO_FLOWLABEL;
flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL;
}
--
1.8.3.1

92
SOURCES/0017-netlink-Change-rtnl_dump_done-to-always-show-error.patch

@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
From 61ccf0f453306e727e254e6de1641bb934a3b7ec Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:07 +0800
Subject: [PATCH] netlink: Change rtnl_dump_done to always show error

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git commit 05a14fc12188

commit 05a14fc1218885ba6236b409fbf6b89976b8636e
Author: David Ahern <dsahern@gmail.com>
Date: Tue May 16 14:22:46 2017 -0700

netlink: Change rtnl_dump_done to always show error

The original code which became rtnl_dump_done only shows netlink errors
if the protocol is NETLINK_SOCK_DIAG, but netlink dumps always appends
the length which contains any error encountered during the dump. Update
rtnl_dump_done to always show the error if there is one.

As an *example* without this patch, dumping a route object that exceeds
the internal buffer size terminates with no message to the user -- the
dump just ends because the NLMSG_DONE attribute was received. With this
patch the user at least gets a message that the dump was aborted.

$ ip ro ls
default via 10.0.2.2 dev eth0
10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15
10.10.0.0/16 dev veth1 proto kernel scope link src 10.10.0.1
172.16.1.0/24 dev br0.11 proto kernel scope link src 172.16.1.1
Error: Buffer too small for object
Dump terminated

The point of this patch is to notify the user of a failure versus
silently exiting on a partial dump. Because the NLMSG_DONE attribute
was received, the entire dump needs to be restarted to use a larger
buffer for EMSGSIZE errors. That could be done automatically but it
has other user impacts (e.g., duplicate output if the dump is
restarted) and should be the subject of a different patch.

Signed-off-by: David Ahern <dsahern@gmail.com>

Signed-off-by: Hangbin Liu <haliu@redhat.com>
---
lib/libnetlink.c | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 9303b66..e91bd5a 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -266,21 +266,27 @@ static int rtnl_dump_done(const struct rtnl_handle *rth,
{
int len = *(int *)NLMSG_DATA(h);
- if (rth->proto == NETLINK_SOCK_DIAG) {
- if (h->nlmsg_len < NLMSG_LENGTH(sizeof(int))) {
- fprintf(stderr, "DONE truncated\n");
- return -1;
- }
-
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(int))) {
+ fprintf(stderr, "DONE truncated\n");
+ return -1;
+ }
- if (len < 0) {
- errno = -len;
- if (errno == ENOENT || errno == EOPNOTSUPP)
- return -1;
+ if (len < 0) {
+ errno = -len;
+ switch (errno) {
+ case ENOENT:
+ case EOPNOTSUPP:
+ return -1;
+ case EMSGSIZE:
+ fprintf(stderr,
+ "Error: Buffer too small for object.\n");
+ break;
+ default:
perror("RTNETLINK answers");
- return len;
}
+ return len;
}
+
return 0;
}
--
1.8.3.1

47
SOURCES/0018-libnetlink-drop-unused-parameter-to-rtnl_dump_done.patch

@ -0,0 +1,47 @@ @@ -0,0 +1,47 @@
From 9346e08c2f9059decf889fb89f2859e7ed61f573 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:08 +0800
Subject: [PATCH] libnetlink: drop unused parameter to rtnl_dump_done

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git commit 0efa625765b4

commit 0efa625765b4481e1e474526eb0feda747b720e5
Author: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu Aug 24 15:02:32 2017 -0700

libnetlink: drop unused parameter to rtnl_dump_done

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Signed-off-by: Hangbin Liu <haliu@redhat.com>
---
lib/libnetlink.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index e91bd5a..b08518d 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -261,8 +261,7 @@ int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n)
return sendmsg(rth->fd, &msg, 0);
}
-static int rtnl_dump_done(const struct rtnl_handle *rth,
- struct nlmsghdr *h)
+static int rtnl_dump_done(struct nlmsghdr *h)
{
int len = *(int *)NLMSG_DATA(h);
@@ -368,7 +367,7 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
dump_intr = 1;
if (h->nlmsg_type == NLMSG_DONE) {
- err = rtnl_dump_done(rth, h);
+ err = rtnl_dump_done(h);
if (err < 0)
return -1;
--
1.8.3.1

255
SOURCES/0019-iproute-Add-support-for-extended-ack-to-rtnl_talk.patch

@ -0,0 +1,255 @@ @@ -0,0 +1,255 @@
From a9f81b704c4e883a996927e77afdb960a7f47fd9 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:09 +0800
Subject: [PATCH] iproute: Add support for extended ack to rtnl_talk

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git commit b6432e68ac2f
Conflicts: Manually added NETLINK_EXT_ACK define to linux headers.

commit b6432e68ac2f1f6b4ea50aa0d6d47e72c445c71c
Author: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri Aug 4 09:52:15 2017 -0700

iproute: Add support for extended ack to rtnl_talk

Add support for extended ack error reporting via libmnl.
Add a new function rtnl_talk_extack that takes a callback as an input
arg. If a netlink response contains extack attributes, the callback is
is invoked with the the err string, offset in the message and a pointer
to the message returned by the kernel.

If iproute2 is built without libmnl, it will still work but
extended error reports from kernel will not be available.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Signed-off-by: Hangbin Liu <haliu@redhat.com>

squash! iproute: Add support for extended ack to rtnl_talk
---
include/libnetlink.h | 6 +++
include/linux/netlink.h | 1 +
lib/Makefile | 7 ++++
lib/libnetlink.c | 109 ++++++++++++++++++++++++++++++++++++++++++++----
4 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/include/libnetlink.h b/include/libnetlink.h
index bd0267d..654aebc 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -65,6 +65,9 @@ typedef int (*rtnl_listen_filter_t)(const struct sockaddr_nl *,
struct rtnl_ctrl_data *,
struct nlmsghdr *n, void *);
+typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
+ const struct nlmsghdr *inner_nlh);
+
struct rtnl_dump_filter_arg {
rtnl_filter_t filter;
void *arg1;
@@ -81,6 +84,9 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t len)
__attribute__((warn_unused_result));
+int rtnl_talk_extack(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t len, nl_ext_ack_fn_t errfn)
+ __attribute__((warn_unused_result));
int rtnl_talk_suppress_rtnl_errmsg(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t len)
__attribute__((warn_unused_result));
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index a982b3c..d1e26a2 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -113,6 +113,7 @@ struct nlmsgerr {
#define NETLINK_LISTEN_ALL_NSID 8
#define NETLINK_LIST_MEMBERSHIPS 9
#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
struct nl_pktinfo {
__u32 group;
diff --git a/lib/Makefile b/lib/Makefile
index 1d24ca2..f81888c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,6 +4,13 @@ ifeq ($(IP_CONFIG_SETNS),y)
CFLAGS += -DHAVE_SETNS
endif
+ifeq ($(HAVE_MNL),y)
+ CFLAGS += $(shell $(PKG_CONFIG) libmnl --cflags)
+ LDLIBS += $(shell $(PKG_CONFIG) libmnl --libs)
+else
+@warn "libmnl required for error support"
+endif
+
CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index b08518d..a057831 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -36,6 +36,79 @@
int rcvbuf = 1024 * 1024;
+#ifdef HAVE_LIBMNL
+#include <libmnl/libmnl.h>
+
+static const enum mnl_attr_data_type extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+ [NLMSGERR_ATTR_MSG] = MNL_TYPE_NUL_STRING,
+ [NLMSGERR_ATTR_OFFS] = MNL_TYPE_U32,
+};
+
+static int err_attr_cb(const struct nlattr *attr, void *data)
+{
+ const struct nlattr **tb = data;
+ uint16_t type;
+
+ if (mnl_attr_type_valid(attr, NLMSGERR_ATTR_MAX) < 0)
+ return MNL_CB_ERROR;
+
+ type = mnl_attr_get_type(attr);
+ if (mnl_attr_validate(attr, extack_policy[type]) < 0)
+ return MNL_CB_ERROR;
+
+
+ tb[type] = attr;
+ return MNL_CB_OK;
+}
+
+
+/* dump netlink extended ack error message */
+static int nl_dump_ext_err(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
+{
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1];
+ const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh);
+ const struct nlmsghdr *err_nlh = NULL;
+ unsigned int hlen = sizeof(*err);
+ const char *errmsg = NULL;
+ uint32_t off = 0;
+
+ if (!errfn)
+ return 0;
+
+ /* no TLVs, nothing to do here */
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += mnl_nlmsg_get_payload_len(&err->msg);
+
+ mnl_attr_parse(nlh, hlen, err_attr_cb, tb);
+
+ if (tb[NLMSGERR_ATTR_MSG])
+ errmsg = mnl_attr_get_str(tb[NLMSGERR_ATTR_MSG]);
+
+ if (tb[NLMSGERR_ATTR_OFFS]) {
+ off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]);
+
+ if (off > nlh->nlmsg_len) {
+ fprintf(stderr,
+ "Invalid offset for NLMSGERR_ATTR_OFFS\n");
+ off = 0;
+ } else if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ err_nlh = &err->msg;
+ }
+
+ return errfn(errmsg, off, err_nlh);
+}
+#else
+/* No extended error ack without libmnl */
+static int nl_dump_ext_err(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn)
+{
+ return 0;
+}
+#endif
+
void rtnl_close(struct rtnl_handle *rth)
{
if (rth->fd >= 0) {
@@ -49,6 +122,7 @@ int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
{
socklen_t addr_len;
int sndbuf = 32768;
+ int one = 1;
memset(rth, 0, sizeof(*rth));
@@ -71,6 +145,10 @@ int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
return -1;
}
+ /* Older kernels may no support extended ACK reporting */
+ setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+
memset(&rth->local, 0, sizeof(rth->local));
rth->local.nl_family = AF_NETLINK;
rth->local.nl_groups = subscriptions;
@@ -421,9 +499,19 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth,
return rtnl_dump_filter_l(rth, a);
}
+static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
+ nl_ext_ack_fn_t errfn)
+{
+ if (nl_dump_ext_err(h, errfn))
+ return;
+
+ fprintf(stderr, "RTNETLINK answers: %s\n",
+ strerror(-err->error));
+}
+
static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t maxlen,
- bool show_rtnl_err)
+ bool show_rtnl_err, nl_ext_ack_fn_t errfn)
{
int status;
unsigned int seq;
@@ -510,10 +598,10 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
return 0;
}
- if (rtnl->proto != NETLINK_SOCK_DIAG && show_rtnl_err)
- fprintf(stderr,
- "RTNETLINK answers: %s\n",
- strerror(-err->error));
+ if (rtnl->proto != NETLINK_SOCK_DIAG &&
+ show_rtnl_err)
+ rtnl_talk_error(h, err, errfn);
+
errno = -err->error;
return -1;
}
@@ -545,13 +633,20 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t maxlen)
{
- return __rtnl_talk(rtnl, n, answer, maxlen, true);
+ return __rtnl_talk(rtnl, n, answer, maxlen, true, NULL);
+}
+
+int rtnl_talk_extack(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr *answer, size_t maxlen,
+ nl_ext_ack_fn_t errfn)
+{
+ return __rtnl_talk(rtnl, n, answer, maxlen, true, errfn);
}
int rtnl_talk_suppress_rtnl_errmsg(struct rtnl_handle *rtnl, struct nlmsghdr *n,
struct nlmsghdr *answer, size_t maxlen)
{
- return __rtnl_talk(rtnl, n, answer, maxlen, false);
+ return __rtnl_talk(rtnl, n, answer, maxlen, false, NULL);
}
int rtnl_listen_all_nsid(struct rtnl_handle *rth)
--
1.8.3.1

51
SOURCES/0020-iplink-check-for-message-truncation-in-iplink_get.patch

@ -0,0 +1,51 @@ @@ -0,0 +1,51 @@
From 8372b7bb8f7211563d888fdd30e473c161f7d0a0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:10 +0800
Subject: [PATCH] iplink: check for message truncation in iplink_get()

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git commit 6599162b958e

commit 6599162b958ea5a43d729df4f30aad515db26ff4
Author: Michal Kubecek <mkubecek@suse.cz>
Date: Fri Sep 1 18:39:11 2017 +0200

iplink: check for message truncation in iplink_get()

If message length exceeds maxlen argument of rtnl_talk(), it is truncated
to maxlen but unlike in the case of truncation to the length of local
buffer in rtnl_talk(), the caller doesn't get any indication of a problem.

In particular, iplink_get() passes the truncated message on and parsing it
results in various warnings and sometimes even a segfault (observed with
"ip link show dev ..." for a NIC with 125 VFs).

Handle message truncation in iplink_get() the same way as truncation in
rtnl_talk() would be handled: return an error.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>

Signed-off-by: Hangbin Liu <haliu@redhat.com>
---
ip/iplink.c | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/ip/iplink.c b/ip/iplink.c
index da3f9a7..2b2421f 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -1031,6 +1031,11 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
if (rtnl_talk(&rth, &req.n, &answer.n, sizeof(answer)) < 0)
return -2;
+ if (answer.n.nlmsg_len > sizeof(answer.buf)) {
+ fprintf(stderr, "Message truncated from %u to %lu\n",
+ answer.n.nlmsg_len, sizeof(answer.buf));
+ return -2;
+ }
if (brief)
print_linkinfo_brief(NULL, &answer.n, stdout);
--
1.8.3.1

48
SOURCES/0021-iplink-double-the-buffer-size-also-in-iplink_get.patch

@ -0,0 +1,48 @@ @@ -0,0 +1,48 @@
From c560900fc16eeac064cc7c43a96c5343fe68ae76 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:11 +0800
Subject: [PATCH] iplink: double the buffer size also in iplink_get()

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git commit 460c03f3f3cc

commit 460c03f3f3cc436ff4673d75722ba68a6ec9343d
Author: Michal Kubecek <mkubecek@suse.cz>
Date: Fri Sep 1 18:39:16 2017 +0200

iplink: double the buffer size also in iplink_get()

Commit 72b365e8e0fd ("libnetlink: Double the dump buffer size") increased
the buffer size for "ip link show" command to 32 KB to handle NICs with
large number of VFs. With "dev" filter, a different code path is taken and
iplink_get() still uses only 16 KB buffer.

The size of 32768 is not very future-proof as NICs supporting 120-128 VFs
are already in use so that single RTM_NEWLINK message in the dump can
exceed 30000 bytes. But it's what rtnl_talk() and rtnl_dump_filter_l() use
so let's be consistent. Once this proves insufficient, all three sizes
should be increased.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>

Signed-off-by: Hangbin Liu <haliu@redhat.com>
---
ip/iplink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ip/iplink.c b/ip/iplink.c
index 2b2421f..5afbadf 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -1015,7 +1015,7 @@ int iplink_get(unsigned int flags, char *name, __u32 filt_mask)
};
struct {
struct nlmsghdr n;
- char buf[16384];
+ char buf[32768];
} answer;
if (name) {
--
1.8.3.1

252
SOURCES/0022-lib-libnetlink-re-malloc-buff-if-size-is-not-enough.patch

@ -0,0 +1,252 @@ @@ -0,0 +1,252 @@
From 49e7c0e7c8c9a982fd3aa69bbed4e306a1dcb331 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <haliu@redhat.com>
Date: Wed, 8 Nov 2017 14:39:12 +0800
Subject: [PATCH] lib/libnetlink: re malloc buff if size is not enough

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1380803
Upstream Status: iproute2.git net-next commit 2d34851cd341

commit 2d34851cd341f0e1b3fc17ca3e6e874229f3a1f8
Author: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu Oct 26 09:41:46 2017 +0800

lib/libnetlink: re malloc buff if size is not enough

With commit 72b365e8e0fd ("libnetlink: Double the dump buffer size")
we doubled the buffer size to support more VFs. But the VFs number is
increasing all the time. Some customers even use more than 200 VFs now.

We could not double it everytime when the buffer is not enough. Let's just
not hard code the buffer size and malloc the correct number when running.

Introduce function rtnl_recvmsg() to always return a newly allocated buffer.
The caller need to free it after using.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Phil Sutter <phil@nwl.cc>

Signed-off-by: Hangbin Liu <haliu@redhat.com>
---
lib/libnetlink.c | 114 ++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 80 insertions(+), 34 deletions(-)

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index a057831..446c960 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -386,6 +386,64 @@ static void rtnl_dump_error(const struct rtnl_handle *rth,
}
}
+static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(fd, msg, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0) {
+ fprintf(stderr, "netlink receive error %s (%d)\n",
+ strerror(errno), errno);
+ return -errno;
+ }
+
+ if (len == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -ENODATA;
+ }
+
+ return len;
+}
+
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+ struct iovec *iov = msg->msg_iov;
+ char *buf;
+ int len;
+
+ iov->iov_base = NULL;
+ iov->iov_len = 0;
+
+ len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
+ if (len < 0)
+ return len;
+
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "malloc error: not enough buffer\n");
+ return -ENOMEM;
+ }
+
+ iov->iov_base = buf;
+ iov->iov_len = len;
+
+ len = __rtnl_recvmsg(fd, msg, 0);
+ if (len < 0) {
+ free(buf);
+ return len;
+ }
+
+ if (answer)
+ *answer = buf;
+ else
+ free(buf);
+
+ return len;
+}
+
int rtnl_dump_filter_l(struct rtnl_handle *rth,
const struct rtnl_dump_filter_arg *arg)
{
@@ -397,31 +455,18 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
.msg_iov = &iov,
.msg_iovlen = 1,
};
- char buf[32768];
+ char *buf;
int dump_intr = 0;
- iov.iov_base = buf;
while (1) {
int status;
const struct rtnl_dump_filter_arg *a;
int found_done = 0;
int msglen = 0;
- iov.iov_len = sizeof(buf);
- status = recvmsg(rth->fd, &msg, 0);
-
- if (status < 0) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- fprintf(stderr, "netlink receive error %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
-
- if (status == 0) {
- fprintf(stderr, "EOF on netlink\n");
- return -1;
- }
+ status = rtnl_recvmsg(rth->fd, &msg, &buf);
+ if (status < 0)
+ return status;
if (rth->dump_fp)
fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);
@@ -446,8 +491,10 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
if (h->nlmsg_type == NLMSG_DONE) {
err = rtnl_dump_done(h);
- if (err < 0)
+ if (err < 0) {
+ free(buf);
return -1;
+ }
found_done = 1;
break; /* process next filter */
@@ -455,19 +502,23 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
if (h->nlmsg_type == NLMSG_ERROR) {
rtnl_dump_error(rth, h);
+ free(buf);
return -1;
}
if (!rth->dump_fp) {
err = a->filter(&nladdr, h, a->arg1);
- if (err < 0)
+ if (err < 0) {
+ free(buf);
return err;
+ }
}
skip_it:
h = NLMSG_NEXT(h, msglen);
}
}
+ free(buf);
if (found_done) {
if (dump_intr)
@@ -527,7 +578,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
.msg_iov = &iov,
.msg_iovlen = 1,
};
- char buf[32768] = {};
+ char *buf;
n->nlmsg_seq = seq = ++rtnl->seq;
@@ -540,22 +591,12 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
return -1;
}
- iov.iov_base = buf;
while (1) {
- iov.iov_len = sizeof(buf);
- status = recvmsg(rtnl->fd, &msg, 0);
+ status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+
+ if (status < 0)
+ return status;
- if (status < 0) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- fprintf(stderr, "netlink receive error %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- if (status == 0) {
- fprintf(stderr, "EOF on netlink\n");
- return -1;
- }
if (msg.msg_namelen != sizeof(nladdr)) {
fprintf(stderr,
"sender address length == %d\n",
@@ -569,6 +610,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
if (l < 0 || len > status) {
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Truncated message\n");
+ free(buf);
return -1;
}
fprintf(stderr,
@@ -595,6 +637,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
if (answer)
memcpy(answer, h,
MIN(maxlen, h->nlmsg_len));
+ free(buf);
return 0;
}
@@ -603,12 +646,14 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
rtnl_talk_error(h, err, errfn);
errno = -err->error;
+ free(buf);
return -1;
}
if (answer) {
memcpy(answer, h,
MIN(maxlen, h->nlmsg_len));
+ free(buf);
return 0;
}
@@ -617,6 +662,7 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
}
+ free(buf);
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
--
1.8.3.1

1604
SOURCES/0023-lib-libnetlink-update-rtnl_talk-to-support-malloc-bu.patch

File diff suppressed because it is too large Load Diff

2048
SOURCES/0024-Update-linux-headers.patch

File diff suppressed because it is too large Load Diff

151
SOURCES/0025-devlink-Change-netlink-attribute-validation.patch

@ -0,0 +1,151 @@ @@ -0,0 +1,151 @@
From 56a3a027d053ab592a3363a92108c93c150301f5 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] devlink: Change netlink attribute validation

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit 4f10cede93b758785f5b201774ed3e02eaf1a7bb
Author: Arkadi Sharshevsky <arkadis@mellanox.com>
Date: Wed May 3 13:25:22 2017 +0200

devlink: Change netlink attribute validation

Currently the netlink attribute resolving is done by a sequence of
if's. Change the attribute resolving to table lookup.

Signed-off-by: Arkadi Sharshevsky <arkadis@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
devlink/devlink.c | 103 ++++++++++++++++--------------------------------------
1 file changed, 30 insertions(+), 73 deletions(-)

diff --git a/devlink/devlink.c b/devlink/devlink.c
index e90226e..35220d8 100644
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
@@ -232,88 +232,45 @@ static bool dl_no_arg(struct dl *dl)
return dl_argc(dl) == 0;
}
+static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = MNL_TYPE_NUL_STRING,
+ [DEVLINK_ATTR_DEV_NAME] = MNL_TYPE_NUL_STRING,
+ [DEVLINK_ATTR_PORT_INDEX] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_PORT_TYPE] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_PORT_DESIRED_TYPE] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_PORT_NETDEV_IFINDEX] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_PORT_NETDEV_NAME] = MNL_TYPE_NUL_STRING,
+ [DEVLINK_ATTR_PORT_IBDEV_NAME] = MNL_TYPE_NUL_STRING,
+ [DEVLINK_ATTR_SB_INDEX] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_SB_SIZE] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_SB_INGRESS_POOL_COUNT] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_EGRESS_POOL_COUNT] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_INGRESS_TC_COUNT] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_EGRESS_TC_COUNT] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_POOL_INDEX] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_POOL_TYPE] = MNL_TYPE_U8,
+ [DEVLINK_ATTR_SB_POOL_SIZE] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = MNL_TYPE_U8,
+ [DEVLINK_ATTR_SB_THRESHOLD] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_SB_TC_INDEX] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_SB_OCC_CUR] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_SB_OCC_MAX] = MNL_TYPE_U32,
+ [DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
+};
+
static int attr_cb(const struct nlattr *attr, void *data)
{
const struct nlattr **tb = data;
int type;
- type = mnl_attr_get_type(attr);
-
if (mnl_attr_type_valid(attr, DEVLINK_ATTR_MAX) < 0)
return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_BUS_NAME &&
- mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_DEV_NAME &&
- mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_INDEX &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_TYPE &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_DESIRED_TYPE &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_NETDEV_IFINDEX &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_NETDEV_NAME &&
- mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_PORT_IBDEV_NAME &&
- mnl_attr_validate(attr, MNL_TYPE_NUL_STRING) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_INDEX &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_SIZE &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_INGRESS_POOL_COUNT &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_EGRESS_POOL_COUNT &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_INGRESS_TC_COUNT &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_EGRESS_TC_COUNT &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_POOL_INDEX &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_POOL_TYPE &&
- mnl_attr_validate(attr, MNL_TYPE_U8) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_POOL_SIZE &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE &&
- mnl_attr_validate(attr, MNL_TYPE_U8) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_THRESHOLD &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_TC_INDEX &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_OCC_CUR &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_SB_OCC_MAX &&
- mnl_attr_validate(attr, MNL_TYPE_U32) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_ESWITCH_MODE &&
- mnl_attr_validate(attr, MNL_TYPE_U16) < 0)
- return MNL_CB_ERROR;
- if (type == DEVLINK_ATTR_ESWITCH_INLINE_MODE &&
- mnl_attr_validate(attr, MNL_TYPE_U8) < 0)
+ type = mnl_attr_get_type(attr);
+ if (mnl_attr_validate(attr, devlink_policy[type]) < 0)
return MNL_CB_ERROR;
+
tb[type] = attr;
return MNL_CB_OK;
}
--
1.8.3.1

1597
SOURCES/0026-devlink-Add-support-for-pipeline-debug-dpipe.patch

File diff suppressed because it is too large Load Diff

104
SOURCES/0027-tc-Reflect-HW-offload-status.patch

@ -0,0 +1,104 @@ @@ -0,0 +1,104 @@
From 724d67b36f9e6bbbfac88b29fee019c05284a888 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] tc: Reflect HW offload status

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit e57285b81a098ed705d683ce94f9abd1cc53438a
Author: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu May 4 16:15:15 2017 +0300

tc: Reflect HW offload status

Currently there is no way of querying whether a filter is
offloaded to HW or not when using "both" policy (where none
of skip_sw or skip_hw flags are set by user-space).

Add two new flags, "in hw" and "not in hw" such that user
space can determine if a filter is actually offloaded to
hw or not. The "in hw" UAPI semantics was chosen so it's
similar to the "skip hw" flag logic.

If none of these two flags are set, this signals running
over older kernel.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
tc/f_bpf.c | 5 +++++
tc/f_flower.c | 5 +++++
tc/f_matchall.c | 5 +++++
tc/f_u32.c | 5 +++++
4 files changed, 20 insertions(+)

diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index df8a259..75c44c0 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -210,6 +210,11 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
fprintf(f, "skip_hw ");
if (flags & TCA_CLS_FLAGS_SKIP_SW)
fprintf(f, "skip_sw ");
+
+ if (flags & TCA_CLS_FLAGS_IN_HW)
+ fprintf(f, "in_hw ");
+ else if (flags & TCA_CLS_FLAGS_NOT_IN_HW)
+ fprintf(f, "not_in_hw ");
}
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 5aac4a0..ebc63ca 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -1171,6 +1171,11 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
fprintf(f, "\n skip_hw");
if (flags & TCA_CLS_FLAGS_SKIP_SW)
fprintf(f, "\n skip_sw");
+
+ if (flags & TCA_CLS_FLAGS_IN_HW)
+ fprintf(f, "\n in_hw");
+ else if (flags & TCA_CLS_FLAGS_NOT_IN_HW)
+ fprintf(f, "\n not_in_hw");
}
if (tb[TCA_FLOWER_ACT])
diff --git a/tc/f_matchall.c b/tc/f_matchall.c
index ac48630..5a51e75 100644
--- a/tc/f_matchall.c
+++ b/tc/f_matchall.c
@@ -137,6 +137,11 @@ static int matchall_print_opt(struct filter_util *qu, FILE *f,
fprintf(f, "\n skip_hw");
if (flags & TCA_CLS_FLAGS_SKIP_SW)
fprintf(f, "\n skip_sw");
+
+ if (flags & TCA_CLS_FLAGS_IN_HW)
+ fprintf(f, "\n in_hw");
+ else if (flags & TCA_CLS_FLAGS_NOT_IN_HW)
+ fprintf(f, "\n not_in_hw");
}
if (tb[TCA_MATCHALL_ACT])
diff --git a/tc/f_u32.c b/tc/f_u32.c
index 92c1fcd..ff700e9 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -1264,6 +1264,11 @@ static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
fprintf(f, "skip_hw ");
if (flags & TCA_CLS_FLAGS_SKIP_SW)
fprintf(f, "skip_sw ");
+
+ if (flags & TCA_CLS_FLAGS_IN_HW)
+ fprintf(f, "in_hw ");
+ else if (flags & TCA_CLS_FLAGS_NOT_IN_HW)
+ fprintf(f, "not_in_hw ");
}
if (tb[TCA_U32_PCNT]) {
--
1.8.3.1

39
SOURCES/0028-pedit-Fix-a-typo-in-warning.patch

@ -0,0 +1,39 @@ @@ -0,0 +1,39 @@
From b2e49d92325d876d29e2d4f1a83bd86adfc4bc73 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] pedit: Fix a typo in warning

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit 290cdc058d8bbcae3cfefafe83d8263e02ac5a6f
Author: Amir Vadai <amir@vadai.me>
Date: Sun May 14 11:17:43 2017 +0300

pedit: Fix a typo in warning

'ex' attribute should be placed after 'action pedit' and not after
'munge'.

Signed-off-by: Amir Vadai <amir@vadai.me>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
tc/m_pedit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tc/m_pedit.c b/tc/m_pedit.c
index 6498dd9..7ef2acc 100644
--- a/tc/m_pedit.c
+++ b/tc/m_pedit.c
@@ -146,7 +146,7 @@ int pack_key(struct m_pedit_sel *_sel, struct m_pedit_key *tkey)
if (tkey->htype != TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK ||
tkey->cmd != TCA_PEDIT_KEY_EX_CMD_SET) {
fprintf(stderr,
- "Munge parameters not supported. Use 'munge ex'.\n");
+ "Munge parameters not supported. Use 'pedit ex munge ...'.\n");
return -1;
}
}
--
1.8.3.1

58
SOURCES/0029-pedit-Do-not-allow-using-retain-for-too-big-fields.patch

@ -0,0 +1,58 @@ @@ -0,0 +1,58 @@
From 2bf855b076bbe5aa4665f7efd8bcaf882821cab5 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] pedit: Do not allow using retain for too big fields

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit cdca191862775c47533908301760edd55763e861
Author: Amir Vadai <amir@vadai.me>
Date: Sun May 14 11:17:44 2017 +0300

pedit: Do not allow using retain for too big fields

Using retain for fields longer than 32 bits is not supported.
Do not allow user to do it.

Signed-off-by: Amir Vadai <amir@vadai.me>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
man/man8/tc-pedit.8 | 3 ++-
tc/m_pedit.c | 6 ++++++
2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8
index 7f482ea..9c4d57b 100644
--- a/man/man8/tc-pedit.8
+++ b/man/man8/tc-pedit.8
@@ -266,7 +266,8 @@ Keep the addressed data as is.
.BI retain " RVAL"
This optional extra part of
.I CMD_SPEC
-allows to exclude bits from being changed.
+allows to exclude bits from being changed. Supported only for 32 bits fields
+or smaller.
.TP
.I CONTROL
The following keywords allow to control how the tree of qdisc, classes,
diff --git a/tc/m_pedit.c b/tc/m_pedit.c
index 7ef2acc..9b74c96 100644
--- a/tc/m_pedit.c
+++ b/tc/m_pedit.c
@@ -353,6 +353,12 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain,
argv++;
}
+ if (len > 4 && retain != ~0) {
+ fprintf(stderr,
+ "retain is not supported for fields longer the 32 bits\n");
+ return -1;
+ }
+
if (type == TMAC) {
res = pack_mac(sel, tkey, (__u8 *)val);
goto done;
--
1.8.3.1

55
SOURCES/0030-pedit-Check-for-extended-capability-in-protocol-pars.patch

@ -0,0 +1,55 @@ @@ -0,0 +1,55 @@
From 0bc6d74ce3291b669bc05524b404bc6914dab5ba Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] pedit: Check for extended capability in protocol parser

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit a13426fe1a2b0fdebacc33820105523934eb355f
Author: Amir Vadai <amir@vadai.me>
Date: Sun May 14 11:17:45 2017 +0300

pedit: Check for extended capability in protocol parser

Do not allow using eth and udp header types if non-extended pedit kABI
is being used. Other protocol parsers already have this check.

Signed-off-by: Amir Vadai <amir@vadai.me>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
tc/p_eth.c | 3 +++
tc/p_udp.c | 3 +++
2 files changed, 6 insertions(+)

diff --git a/tc/p_eth.c b/tc/p_eth.c
index ad3e28f..2d2f96c 100644
--- a/tc/p_eth.c
+++ b/tc/p_eth.c
@@ -34,6 +34,9 @@ parse_eth(int *argc_p, char ***argv_p,
if (argc < 2)
return -1;
+ if (!sel->extended)
+ return -1;
+
tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
if (strcmp(*argv, "type") == 0) {
diff --git a/tc/p_udp.c b/tc/p_udp.c
index a56a1b5..3916d95 100644
--- a/tc/p_udp.c
+++ b/tc/p_udp.c
@@ -34,6 +34,9 @@ parse_udp(int *argc_p, char ***argv_p,
if (argc < 2)
return -1;
+ if (!sel->extended)
+ return -1;
+
tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
if (strcmp(*argv, "sport") == 0) {
--
1.8.3.1

304
SOURCES/0031-pedit-Introduce-ipv6-support.patch

@ -0,0 +1,304 @@ @@ -0,0 +1,304 @@
From 26ab66d7c43c3ef60ab058d4c3da8989a5c1dd46 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] pedit: Introduce ipv6 support

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit f3e1b2448a95baef587965b08f48d49b6e1ec2cb
Author: Amir Vadai <amir@vadai.me>
Date: Sun May 14 11:17:46 2017 +0300

pedit: Introduce ipv6 support

Add support for modifying IPv6 headers using pedit.

Signed-off-by: Amir Vadai <amir@vadai.me>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
man/man8/tc-pedit.8 | 30 ++++++++++++++++++
tc/Makefile | 1 +
tc/m_pedit.c | 43 +++++++++++++++++++++++--
tc/p_ip.c | 17 +---------
tc/p_ip6.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 164 insertions(+), 18 deletions(-)
create mode 100644 tc/p_ip6.c

diff --git a/man/man8/tc-pedit.8 b/man/man8/tc-pedit.8
index 9c4d57b..82d4217 100644
--- a/man/man8/tc-pedit.8
+++ b/man/man8/tc-pedit.8
@@ -33,6 +33,8 @@ pedit - generic packet editor action
|
.BI ip " EX_IPHDR_FIELD"
|
+.BI ip6 " IP6HDR_FIELD"
+|
.BI tcp " TCPHDR_FIELD"
|
.BI udp " UDPHDR_FIELD"
@@ -55,6 +57,12 @@ pedit - generic packet editor action
.IR EX_IPHDR_FIELD " := { "
.BR ttl " }"
+
+.ti -8
+.IR IP6HDR_FIELD " := { "
+.BR src " | " dst " | " flow_lbl " | " payload_len " | " nexthdr " |"
+.BR hoplimit " }"
+
.ti -8
.IR TCPHDR_FIELD " := { "
.BR sport " | " dport " | " flags " }"
@@ -211,6 +219,25 @@ are:
.B ttl
.RE
.TP
+.BI ip6 " IP6HDR_FIELD"
+The supported keywords for
+.I IP6HDR_FIELD
+are:
+.RS
+.TP
+.B src
+.TQ
+.B dst
+.TQ
+.B flow_lbl
+.TQ
+.B payload_len
+.TQ
+.B nexthdr
+.TQ
+.B hoplimit
+.RE
+.TP
.BI tcp " TCPHDR_FIELD"
The supported keywords for
.I TCPHDR_FIELD
@@ -331,6 +358,9 @@ tc filter add dev eth0 parent ffff: u32 \\
action pedit ex munge ip dst set 192.168.1.199
tc filter add dev eth0 parent ffff: u32 \\
match ip sport 22 0xffff \\
+ action pedit ex munge ip6 dst set fe80::dacb:8aff:fec7:320e
+tc filter add dev eth0 parent ffff: u32 \\
+ match ip sport 22 0xffff \\
action pedit ex munge eth dst set 11:22:33:44:55:66
tc filter add dev eth0 parent ffff: u32 \\
match ip dport 23 0xffff \\
diff --git a/tc/Makefile b/tc/Makefile
index 446a113..9a6bb1d 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -53,6 +53,7 @@ TCMODULES += m_bpf.o
TCMODULES += m_tunnel_key.o
TCMODULES += m_sample.o
TCMODULES += p_ip.o
+TCMODULES += p_ip6.o
TCMODULES += p_icmp.o
TCMODULES += p_eth.o
TCMODULES += p_tcp.o
diff --git a/tc/m_pedit.c b/tc/m_pedit.c
index 9b74c96..dfa6b2c 100644
--- a/tc/m_pedit.c
+++ b/tc/m_pedit.c
@@ -257,6 +257,32 @@ static int pack_mac(struct m_pedit_sel *sel, struct m_pedit_key *tkey,
return ret;
}
+static int pack_ipv6(struct m_pedit_sel *sel, struct m_pedit_key *tkey,
+ __u32 *ipv6)
+{
+ int ret = 0;
+ int i;
+
+ if (tkey->off & 0x3) {
+ fprintf(stderr,
+ "pack_ipv6: IPv6 offsets must begin in 32bit boundaries\n");
+ return -1;
+ }
+
+ for (i = 0; i < 4; i++) {
+ tkey->mask = 0;
+ tkey->val = ntohl(ipv6[i]);
+
+ ret = pack_key32(~0, sel, tkey);
+ if (ret)
+ return ret;
+
+ tkey->off += 4;
+ }
+
+ return 0;
+}
+
int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type)
{
int argc = *argc_p;
@@ -281,8 +307,16 @@ int parse_val(int *argc_p, char ***argv_p, __u32 *val, int type)
return 0;
}
- if (type == TIPV6)
- return -1; /* not implemented yet */
+ if (type == TIPV6) {
+ inet_prefix addr;
+
+ if (get_prefix_1(&addr, *argv, AF_INET6))
+ return -1;
+
+ memcpy(val, addr.data, addr.bytelen);
+
+ return 0;
+ }
if (type == TMAC) {
#define MAC_ALEN 6
@@ -364,6 +398,11 @@ int parse_cmd(int *argc_p, char ***argv_p, __u32 len, int type, __u32 retain,
goto done;
}
+ if (type == TIPV6) {
+ res = pack_ipv6(sel, tkey, val);
+ goto done;
+ }
+
tkey->val = *v;
tkey->mask = *m;
diff --git a/tc/p_ip.c b/tc/p_ip.c
index 22fe650..0272a6e 100644
--- a/tc/p_ip.c
+++ b/tc/p_ip.c
@@ -1,5 +1,5 @@
/*
- * m_pedit.c packet editor: IPV4/6 header
+ * p_ip.c packet editor: IPV4 header
*
* This program is free software; you can distribute it and/or
* modify it under the terms of the GNU General Public License
@@ -156,23 +156,8 @@ done:
return res;
}
-static int
-parse_ip6(int *argc_p, char ***argv_p,
- struct m_pedit_sel *sel, struct m_pedit_key *tkey)
-{
- int res = -1;
- return res;
-}
-
struct m_pedit_util p_pedit_ip = {
NULL,
"ip",
parse_ip,
};
-
-
-struct m_pedit_util p_pedit_ip6 = {
- NULL,
- "ip6",
- parse_ip6,
-};
diff --git a/tc/p_ip6.c b/tc/p_ip6.c
new file mode 100644
index 0000000..a4824bd
--- /dev/null
+++ b/tc/p_ip6.c
@@ -0,0 +1,91 @@
+/*
+ * p_ip6.c packet editor: IPV6 header
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Amir Vadai <amir@vadai.me>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include "m_pedit.h"
+
+static int
+parse_ip6(int *argc_p, char ***argv_p,
+ struct m_pedit_sel *sel, struct m_pedit_key *tkey)
+{
+ int res = -1;
+ int argc = *argc_p;
+ char **argv = *argv_p;
+
+ if (argc < 2)
+ return -1;
+
+ if (!sel->extended)
+ return -1;
+
+ tkey->htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+
+ if (strcmp(*argv, "src") == 0) {
+ NEXT_ARG();
+ tkey->off = 8;
+ res = parse_cmd(&argc, &argv, 16, TIPV6, RU32, sel, tkey);
+ goto done;
+ }
+ if (strcmp(*argv, "dst") == 0) {
+ NEXT_ARG();
+ tkey->off = 24;
+ res = parse_cmd(&argc, &argv, 16, TIPV6, RU32, sel, tkey);
+ goto done;
+ }
+ if (strcmp(*argv, "flow_lbl") == 0) {
+ NEXT_ARG();
+ tkey->off = 0;
+ res = parse_cmd(&argc, &argv, 4, TU32, 0x0007ffff, sel, tkey);
+ goto done;
+ }
+ if (strcmp(*argv, "payload_len") == 0) {
+ NEXT_ARG();
+ tkey->off = 4;
+ res = parse_cmd(&argc, &argv, 2, TU32, RU16, sel, tkey);
+ goto done;
+ }
+ if (strcmp(*argv, "nexthdr") == 0) {
+ NEXT_ARG();
+ tkey->off = 6;
+ res = parse_cmd(&argc, &argv, 1, TU32, RU8, sel, tkey);
+ goto done;
+ }
+ if (strcmp(*argv, "hoplimit") == 0) {
+ NEXT_ARG();
+ tkey->off = 7;
+ res = parse_cmd(&argc, &argv, 1, TU32, RU8, sel, tkey);
+ goto done;
+ }
+
+ return -1;
+
+done:
+ *argc_p = argc;
+ *argv_p = argv;
+ return res;
+}
+
+struct m_pedit_util p_pedit_ip6 = {
+ NULL,
+ "ipv6",
+ parse_ip6,
+};
--
1.8.3.1

189
SOURCES/0032-devlink-Add-option-to-set-and-show-eswitch-encapsula.patch

@ -0,0 +1,189 @@ @@ -0,0 +1,189 @@
From d9857ffec0266aea1c56ee26369972ade68f501a Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] devlink: Add option to set and show eswitch encapsulation
support

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit d315b706e9d4a550096140aa298d46b2aa7733e9
Author: Roi Dayan <roid@mellanox.com>
Date: Sun May 21 08:37:27 2017 +0300

devlink: Add option to set and show eswitch encapsulation support

This is an e-switch global knob to enable HW support for applying
encapsulation/decapsulation to VF traffic as part of SRIOV e-switch offloading.

The actual encap/decap is carried out (along with the matching and other
actions) per offloaded e-switch rules, e.g as done when offloading the TC tunnel
key action.

Possible values are enable/disable.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
devlink/devlink.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
man/man8/devlink-dev.8 | 13 +++++++++++++
2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/devlink/devlink.c b/devlink/devlink.c
index e22ee0a..f9bc16c 100644
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
@@ -176,6 +176,7 @@ static void ifname_map_free(struct ifname_map *ifname_map)
#define DL_OPT_ESWITCH_INLINE_MODE BIT(12)
#define DL_OPT_DPIPE_TABLE_NAME BIT(13)
#define DL_OPT_DPIPE_TABLE_COUNTERS BIT(14)
+#define DL_OPT_ESWITCH_ENCAP_MODE BIT(15)
struct dl_opts {
uint32_t present; /* flags of present items */
@@ -195,6 +196,7 @@ struct dl_opts {
enum devlink_eswitch_inline_mode eswitch_inline_mode;
const char *dpipe_table_name;
bool dpipe_counters_enable;
+ bool eswitch_encap_mode;
};
struct dl {
@@ -299,6 +301,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SB_OCC_MAX] = MNL_TYPE_U32,
[DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = MNL_TYPE_U8,
[DEVLINK_ATTR_DPIPE_TABLES] = MNL_TYPE_NESTED,
[DEVLINK_ATTR_DPIPE_TABLE] = MNL_TYPE_NESTED,
[DEVLINK_ATTR_DPIPE_TABLE_NAME] = MNL_TYPE_STRING,
@@ -754,6 +757,19 @@ static int dpipe_counters_enable_get(const char *typestr,
return 0;
}
+static int eswitch_encap_mode_get(const char *typestr, bool *p_mode)
+{
+ if (strcmp(typestr, "enable") == 0) {
+ *p_mode = true;
+ } else if (strcmp(typestr, "disable") == 0) {
+ *p_mode = false;
+ } else {
+ pr_err("Unknown eswitch encap mode \"%s\"\n", typestr);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int dl_argv_parse(struct dl *dl, uint32_t o_required,
uint32_t o_optional)
{
@@ -908,7 +924,19 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required,
if (err)
return err;
o_found |= DL_OPT_DPIPE_TABLE_COUNTERS;
+ } else if (dl_argv_match(dl, "encap") &&
+ (o_all & DL_OPT_ESWITCH_ENCAP_MODE)) {
+ const char *typestr;
+ dl_arg_inc(dl);
+ err = dl_argv_str(dl, &typestr);
+ if (err)
+ return err;
+ err = eswitch_encap_mode_get(typestr,
+ &opts->eswitch_encap_mode);
+ if (err)
+ return err;
+ o_found |= DL_OPT_ESWITCH_ENCAP_MODE;
} else {
pr_err("Unknown option \"%s\"\n", dl_argv(dl));
return -EINVAL;
@@ -986,6 +1014,13 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required,
pr_err("Dpipe table counter state expected\n");
return -EINVAL;
}
+
+ if ((o_required & DL_OPT_ESWITCH_ENCAP_MODE) &&
+ !(o_found & DL_OPT_ESWITCH_ENCAP_MODE)) {
+ pr_err("E-Switch encapsulation option expected.\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1041,6 +1076,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
if (opts->present & DL_OPT_DPIPE_TABLE_COUNTERS)
mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED,
opts->dpipe_counters_enable);
+ if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE)
+ mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
+ opts->eswitch_encap_mode);
}
static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl,
@@ -1097,6 +1135,7 @@ static void cmd_dev_help(void)
pr_err("Usage: devlink dev show [ DEV ]\n");
pr_err(" devlink dev eswitch set DEV [ mode { legacy | switchdev } ]\n");
pr_err(" [ inline-mode { none | link | network | transport } ]\n");
+ pr_err(" [ encap { disable | enable } ]\n");
pr_err(" devlink dev eswitch show DEV\n");
}
@@ -1421,6 +1460,12 @@ static void pr_out_eswitch(struct dl *dl, struct nlattr **tb)
eswitch_inline_mode_name(mnl_attr_get_u8(
tb[DEVLINK_ATTR_ESWITCH_INLINE_MODE])));
+ if (tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ bool encap_mode = !!mnl_attr_get_u8(tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+
+ pr_out_str(dl, "encap", encap_mode ? "enable" : "disable");
+ }
+
pr_out_handle_end(dl);
}
@@ -1465,7 +1510,8 @@ static int cmd_dev_eswitch_set(struct dl *dl)
err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE,
DL_OPT_ESWITCH_MODE |
- DL_OPT_ESWITCH_INLINE_MODE);
+ DL_OPT_ESWITCH_INLINE_MODE |
+ DL_OPT_ESWITCH_ENCAP_MODE);
if (err)
return err;
diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8
index 6bfe66f..b074d57 100644
--- a/man/man8/devlink-dev.8
+++ b/man/man8/devlink-dev.8
@@ -34,6 +34,9 @@ devlink-dev \- devlink device configuration
.RI "[ "
.BR inline-mode " { " none " | " link " | " network " | " transport " } "
.RI "]"
+.RI "[ "
+.BR encap " { " disable " | " enable " } "
+.RI "]"
.ti -8
.BR "devlink dev eswitch show"
@@ -81,6 +84,16 @@ Some HWs need the VF driver to put part of the packet headers on the TX descript
.I transport
- L4 mode
+.TP
+.BR encap " { " disable " | " enable " } "
+Set eswitch encapsulation support
+
+.I disable
+- Disable encapsulation support
+
+.I enable
+- Enable encapsulation support
+
.SH "EXAMPLES"
.PP
devlink dev show
--
1.8.3.1

154
SOURCES/0033-tc-flower-add-support-for-tcp-flags.patch

@ -0,0 +1,154 @@ @@ -0,0 +1,154 @@
From 7cbf364a5f68ba008c5e0702266fe3dc606b1d6f Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] tc: flower: add support for tcp flags

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit 0c30d14d0a2fc2fb6b7fef62bea05f2e5c3eb26a
Author: Jiri Pirko <jiri@mellanox.com>
Date: Tue May 23 23:51:39 2017 +0200

tc: flower: add support for tcp flags

Allow user to insert a flower classifier filter rule which includes
match for tcp flags.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
man/man8/tc-flower.8 | 8 +++++++
tc/f_flower.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)

diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8
index ba29065..7648079 100644
--- a/man/man8/tc-flower.8
+++ b/man/man8/tc-flower.8
@@ -35,6 +35,8 @@ flower \- flow based traffic control filter
.IR PREFIX " | { "
.BR dst_port " | " src_port " } "
.IR port_number " } | "
+.B tcp_flags
+.IR MASKED_TCP_FLAGS " | "
.B type
.IR MASKED_TYPE " | "
.B code
@@ -136,6 +138,12 @@ Match on layer 4 protocol source or destination port number. Only available for
.BR ip_proto " values " udp ", " tcp " and " sctp
which have to be specified in beforehand.
.TP
+.BI tcp_flags " MASKED_TCP_FLAGS"
+Match on TCP flags represented as 12bit bitfield in in hexadecimal format.
+A mask may be optionally provided to limit the bits which are matched. A mask
+is provided by following the value with a slash and then the mask. If the mask
+is missing then a match on all bits is assumed.
+.TP
.BI type " MASKED_TYPE"
.TQ
.BI code " MASKED_CODE"
diff --git a/tc/f_flower.c b/tc/f_flower.c
index ebc63ca..1b6b46e 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -57,6 +57,7 @@ static void explain(void)
" src_ip PREFIX |\n"
" dst_port PORT-NUMBER |\n"
" src_port PORT-NUMBER |\n"
+ " tcp_flags MASKED-TCP_FLAGS |\n"
" type MASKED-ICMP-TYPE |\n"
" code MASKED-ICMP-CODE |\n"
" arp_tip IPV4-PREFIX |\n"
@@ -474,6 +475,41 @@ static int flower_parse_port(char *str, __u8 ip_proto,
return 0;
}
+#define TCP_FLAGS_MAX_MASK 0xfff
+
+static int flower_parse_tcp_flags(char *str, int flags_type, int mask_type,
+ struct nlmsghdr *n)
+{
+ char *slash;
+ int ret, err = -1;
+ __u16 flags;
+
+ slash = strchr(str, '/');
+ if (slash)
+ *slash = '\0';
+
+ ret = get_u16(&flags, str, 16);
+ if (ret < 0 || flags & ~TCP_FLAGS_MAX_MASK)
+ goto err;
+
+ addattr16(n, MAX_MSG, flags_type, htons(flags));
+
+ if (slash) {
+ ret = get_u16(&flags, slash + 1, 16);
+ if (ret < 0 || flags & ~TCP_FLAGS_MAX_MASK)
+ goto err;
+ } else {
+ flags = TCP_FLAGS_MAX_MASK;
+ }
+ addattr16(n, MAX_MSG, mask_type, htons(flags));
+
+ err = 0;
+err:
+ if (slash)
+ *slash = '/';
+ return err;
+}
+
static int flower_parse_key_id(const char *str, int type, struct nlmsghdr *n)
{
int ret;
@@ -671,6 +707,16 @@ static int flower_parse_opt(struct filter_util *qu, char *handle,
fprintf(stderr, "Illegal \"src_port\"\n");
return -1;
}
+ } else if (matches(*argv, "tcp_flags") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_tcp_flags(*argv,
+ TCA_FLOWER_KEY_TCP_FLAGS,
+ TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"tcp_flags\"\n");
+ return -1;
+ }
} else if (matches(*argv, "type") == 0) {
NEXT_ARG();
ret = flower_parse_icmp(*argv, eth_type, ip_proto,
@@ -1000,6 +1046,19 @@ static void flower_print_port(FILE *f, char *name, struct rtattr *attr)
fprintf(f, "\n %s %d", name, rta_getattr_be16(attr));
}
+static void flower_print_tcp_flags(FILE *f, char *name,
+ struct rtattr *flags_attr,
+ struct rtattr *mask_attr)
+{
+ if (!flags_attr)
+ return;
+ fprintf(f, "\n %s %x", name, rta_getattr_be16(flags_attr));
+ if (!mask_attr)
+ return;
+ fprintf(f, "/%x", rta_getattr_be16(mask_attr));
+}
+
+
static void flower_print_key_id(FILE *f, const char *name,
struct rtattr *attr)
{
@@ -1110,6 +1169,9 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
if (nl_type >= 0)
flower_print_port(f, "src_port", tb[nl_type]);
+ flower_print_tcp_flags(f, "tcp_flags", tb[TCA_FLOWER_KEY_TCP_FLAGS],
+ tb[TCA_FLOWER_KEY_TCP_FLAGS_MASK]);
+
nl_type = flower_icmp_attr_type(eth_type, ip_proto,
FLOWER_ICMP_FIELD_TYPE);
nl_mask_type = flower_icmp_attr_mask_type(eth_type, ip_proto,
--
1.8.3.1

60
SOURCES/0034-iplink-Update-usage-in-help-message.patch

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
From 41b38afb79a82eec66fea08fc021a35cf1d550fc Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] iplink: Update usage in help message

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit 5a3ec4ba64783a640e7716a37faae4be49489e51
Author: Eli Cohen <eli@mellanox.com>
Date: Sun Jun 4 15:36:48 2017 +0300

iplink: Update usage in help message

Add to usage message a description of how to configure Infiniband node
and port GUIDs. Also modify the man page to emphasize the GUIDs are
configured for Infiniband VFs.

Fixes: d91fb3f4c7e4 ("Add support for configuring Infiniband GUIDs")
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
ip/iplink.c | 2 ++
man/man8/ip-link.8.in | 4 ++--
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/ip/iplink.c b/ip/iplink.c
index b08d227..193997c 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -91,6 +91,8 @@ void iplink_usage(void)
" [ query_rss { on | off} ]\n"
" [ state { auto | enable | disable} ] ]\n"
" [ trust { on | off} ] ]\n"
+ " [ node_guid { eui64 } ]\n"
+ " [ port_guid { eui64 } ]\n"
" [ xdp { off |\n"
" object FILE [ section NAME ] [ verbose ] |\n"
" pinned FILE } ]\n"
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index a5ddfe7..48417db 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -1564,10 +1564,10 @@ sent by the VF.
which may impact security and/or performance. (e.g. VF multicast promiscuous mode)
.sp
.BI node_guid " eui64"
-- configure node GUID for the VF.
+- configure node GUID for Infiniband VFs.
.sp
.BI port_guid " eui64"
-- configure port GUID for the VF.
+- configure port GUID for Infiniband VFs.
.in -8
.TP
--
1.8.3.1

177
SOURCES/0035-tc-flower-add-support-for-matching-on-ip-tos-and-ttl.patch

@ -0,0 +1,177 @@ @@ -0,0 +1,177 @@
From f8e5b20689cdc1f488140d9da4adf6f3ca421d3f Mon Sep 17 00:00:00 2001
From: Kamal Heib <kheib@redhat.com>
Date: Thu, 9 Nov 2017 04:44:32 -0500
Subject: [PATCH] tc: flower: add support for matching on ip tos and ttl

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1456539

commit 6ea2c2b1cff676be2d01029a01cbd84d0675213c
Author: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed Jun 7 15:17:54 2017 +0300

tc: flower: add support for matching on ip tos and ttl

Allow users to set flower classifier filter rules which
include matches for ip tos and ttl.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
man/man8/tc-flower.8 | 17 +++++++++++-
tc/f_flower.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/man/man8/tc-flower.8 b/man/man8/tc-flower.8
index 7648079..be46f02 100644
--- a/man/man8/tc-flower.8
+++ b/man/man8/tc-flower.8
@@ -30,7 +30,11 @@ flower \- flow based traffic control filter
.BR vlan_ethtype " { " ipv4 " | " ipv6 " | "
.IR ETH_TYPE " } | "
.BR ip_proto " { " tcp " | " udp " | " sctp " | " icmp " | " icmpv6 " | "
-.IR IP_PROTO " } | { "
+.IR IP_PROTO " } | "
+.B ip_tos
+.IR MASKED_IP_TOS " | "
+.B ip_ttl
+.IR MASKED_IP_TTL " | { "
.BR dst_ip " | " src_ip " } "
.IR PREFIX " | { "
.BR dst_port " | " src_port " } "
@@ -122,6 +126,17 @@ may be
.BR tcp ", " udp ", " sctp ", " icmp ", " icmpv6
or an unsigned 8bit value in hexadecimal format.
.TP
+.BI ip_tos " MASKED_IP_TOS"
+Match on ipv4 TOS or ipv6 traffic-class - eight bits in hexadecimal format.
+A mask may be optionally provided to limit the bits which are matched. A mask
+is provided by following the value with a slash and then the mask. If the mask
+is missing then a match on all bits is assumed.
+.TP
+.BI ip_ttl " MASKED_IP_TTL"
+Match on ipv4 TTL or ipv6 hop-limit - eight bits value in decimal or hexadecimal format.
+A mask may be optionally provided to limit the bits which are matched. Same
+logic is used for the mask as with matching on ip_tos.
+.TP
.BI dst_ip " PREFIX"
.TQ
.BI src_ip " PREFIX"
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 1b6b46e..5be693a 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -53,6 +53,8 @@ static void explain(void)
" dst_mac MASKED-LLADDR |\n"
" src_mac MASKED-LLADDR |\n"
" ip_proto [tcp | udp | sctp | icmp | icmpv6 | IP-PROTO ] |\n"
+ " ip_tos MASKED-IP_TOS |\n"
+ " ip_ttl MASKED-IP_TTL |\n"
" dst_ip PREFIX |\n"
" src_ip PREFIX |\n"
" dst_port PORT-NUMBER |\n"
@@ -510,6 +512,41 @@ err:
return err;
}
+static int flower_parse_ip_tos_ttl(char *str, int key_type, int mask_type,
+ struct nlmsghdr *n)
+{
+ char *slash;
+ int ret, err = -1;
+ __u8 tos_ttl;
+
+ slash = strchr(str, '/');
+ if (slash)
+ *slash = '\0';
+
+ ret = get_u8(&tos_ttl, str, 10);
+ if (ret < 0)
+ ret = get_u8(&tos_ttl, str, 16);
+ if (ret < 0)
+ goto err;
+
+ addattr8(n, MAX_MSG, key_type, tos_ttl);
+
+ if (slash) {
+ ret = get_u8(&tos_ttl, slash + 1, 16);
+ if (ret < 0)
+ goto err;
+ } else {
+ tos_ttl = 0xff;
+ }
+ addattr8(n, MAX_MSG, mask_type, tos_ttl);
+
+ err = 0;
+err:
+ if (slash)
+ *slash = '/';
+ return err;
+}
+
static int flower_parse_key_id(const char *str, int type, struct nlmsghdr *n)
{
int ret;
@@ -665,6 +702,26 @@ static int flower_parse_opt(struct filter_util *qu, char *handle,
fprintf(stderr, "Illegal \"ip_proto\"\n");
return -1;
}
+ } else if (matches(*argv, "ip_tos") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_ip_tos_ttl(*argv,
+ TCA_FLOWER_KEY_IP_TOS,
+ TCA_FLOWER_KEY_IP_TOS_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"ip_tos\"\n");
+ return -1;
+ }
+ } else if (matches(*argv, "ip_ttl") == 0) {
+ NEXT_ARG();
+ ret = flower_parse_ip_tos_ttl(*argv,
+ TCA_FLOWER_KEY_IP_TTL,
+ TCA_FLOWER_KEY_IP_TTL_MASK,
+ n);
+ if (ret < 0) {
+ fprintf(stderr, "Illegal \"ip_ttl\"\n");
+ return -1;
+ }
} else if (matches(*argv, "dst_ip") == 0) {
NEXT_ARG();
ret = flower_parse_ip_addr(*argv, vlan_ethtype ?
@@ -963,6 +1020,19 @@ static void flower_print_ip_proto(FILE *f, __u8 *p_ip_proto,
*p_ip_proto = ip_proto;
}
+static void flower_print_ip_attr(FILE *f, char *name,
+ struct rtattr *key_attr,
+ struct rtattr *mask_attr)
+{
+ if (!key_attr)
+ return;
+
+ fprintf(f, "\n %s %x", name, rta_getattr_u8(key_attr));
+ if (!mask_attr)
+ return;
+ fprintf(f, "/%x", rta_getattr_u8(mask_attr));
+}
+
static void flower_print_matching_flags(FILE *f, char *name,
enum flower_matching_flags type,
struct rtattr *attr,
@@ -1150,6 +1220,11 @@ static int flower_print_opt(struct filter_util *qu, FILE *f,
flower_print_eth_type(f, &eth_type, tb[TCA_FLOWER_KEY_ETH_TYPE]);
flower_print_ip_proto(f, &ip_proto, tb[TCA_FLOWER_KEY_IP_PROTO]);
+ flower_print_ip_attr(f, "ip_tos", tb[TCA_FLOWER_KEY_IP_TOS],
+ tb[TCA_FLOWER_KEY_IP_TOS_MASK]);
+ flower_print_ip_attr(f, "ip_ttl", tb[TCA_FLOWER_KEY_IP_TTL],
+ tb[TCA_FLOWER_KEY_IP_TTL_MASK]);
+
flower_print_ip_addr(f, "dst_ip", eth_type,
tb[TCA_FLOWER_KEY_IPV4_DST],
tb[TCA_FLOWER_KEY_IPV4_DST_MASK],
--
1.8.3.1

33934
SOURCES/0036-iproute-build-more-easily-on-Android.patch

File diff suppressed because it is too large Load Diff

64
SOURCES/0037-uapi-add-include-linux-vm_sockets_diag.h.patch

@ -0,0 +1,64 @@ @@ -0,0 +1,64 @@
From 74e00895532b878a902f9b0477e1b00d1be9df59 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sun, 22 Oct 2017 21:44:25 +0200
Subject: [PATCH] uapi: add include linux/vm_sockets_diag.h

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1472759
Upstream Status: iproute2.git commit e9b0d82dfac2

commit e9b0d82dfac25912cf757945d9caf6fe2371f526
Author: Stephen Hemminger <stephen@networkplumber.org>
Date: Wed Oct 11 10:49:25 2017 -0700

uapi: add include linux/vm_sockets_diag.h

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
include/uapi/linux/vm_sockets_diag.h | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
create mode 100644 include/uapi/linux/vm_sockets_diag.h

diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h
new file mode 100644
index 0000000..a732a6f
--- /dev/null
+++ b/include/uapi/linux/vm_sockets_diag.h
@@ -0,0 +1,33 @@
+/* AF_VSOCK sock_diag(7) interface for querying open sockets */
+
+#ifndef __VM_SOCKETS_DIAG_H__
+#define __VM_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+
+/* Request */
+struct vsock_diag_req {
+ __u8 sdiag_family; /* must be AF_VSOCK */
+ __u8 sdiag_protocol; /* must be 0 */
+ __u16 pad; /* must be 0 */
+ __u32 vdiag_states; /* query bitmap (e.g. 1 << TCP_LISTEN) */
+ __u32 vdiag_ino; /* must be 0 (reserved) */
+ __u32 vdiag_show; /* must be 0 (reserved) */
+ __u32 vdiag_cookie[2];
+};
+
+/* Response */
+struct vsock_diag_msg {
+ __u8 vdiag_family; /* AF_VSOCK */
+ __u8 vdiag_type; /* SOCK_STREAM or SOCK_DGRAM */
+ __u8 vdiag_state; /* sk_state (e.g. TCP_LISTEN) */
+ __u8 vdiag_shutdown; /* local RCV_SHUTDOWN | SEND_SHUTDOWN */
+ __u32 vdiag_src_cid;
+ __u32 vdiag_src_port;
+ __u32 vdiag_dst_cid;
+ __u32 vdiag_dst_port;
+ __u32 vdiag_ino;
+ __u32 vdiag_cookie[2];
+};
+
+#endif /* __VM_SOCKETS_DIAG_H__ */
--
1.8.3.1

213
SOURCES/0038-ss-allow-AF_FAMILY-constants-32.patch

@ -0,0 +1,213 @@ @@ -0,0 +1,213 @@
From f59533eb3cb188a23456444aeb19ac3634eddd8c Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sun, 22 Oct 2017 21:44:26 +0200
Subject: [PATCH] ss: allow AF_FAMILY constants >32

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1472759
Upstream Status: iproute2.git commit b338a3e7e7d9

commit b338a3e7e7d95c9d46de9748604da06287664033
Author: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri Oct 6 11:48:39 2017 -0400

ss: allow AF_FAMILY constants >32

Linux has more than 32 address families defined in <bits/socket.h>. Use
a 64-bit type so all of them can be represented in the filter->families
bitmask.

It's easy to introduce bugs when using (1 << AF_FAMILY) because the
value is 32-bit. This can produce incorrect results from bitmask
operations so introduce the FAMILY_MASK() macro to eliminate these bugs.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
misc/ss.c | 54 ++++++++++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index d3fb9a7..0d64527 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -170,55 +170,57 @@ enum {
struct filter {
int dbs;
int states;
- int families;
+ uint64_t families;
struct ssfilter *f;
bool kill;
};
+#define FAMILY_MASK(family) ((uint64_t)1 << (family))
+
static const struct filter default_dbs[MAX_DB] = {
[TCP_DB] = {
.states = SS_CONN,
- .families = (1 << AF_INET) | (1 << AF_INET6),
+ .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[DCCP_DB] = {
.states = SS_CONN,
- .families = (1 << AF_INET) | (1 << AF_INET6),
+ .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[UDP_DB] = {
.states = (1 << SS_ESTABLISHED),
- .families = (1 << AF_INET) | (1 << AF_INET6),
+ .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[RAW_DB] = {
.states = (1 << SS_ESTABLISHED),
- .families = (1 << AF_INET) | (1 << AF_INET6),
+ .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
[UNIX_DG_DB] = {
.states = (1 << SS_CLOSE),
- .families = (1 << AF_UNIX),
+ .families = FAMILY_MASK(AF_UNIX),
},
[UNIX_ST_DB] = {
.states = SS_CONN,
- .families = (1 << AF_UNIX),
+ .families = FAMILY_MASK(AF_UNIX),
},
[UNIX_SQ_DB] = {
.states = SS_CONN,
- .families = (1 << AF_UNIX),
+ .families = FAMILY_MASK(AF_UNIX),
},
[PACKET_DG_DB] = {
.states = (1 << SS_CLOSE),
- .families = (1 << AF_PACKET),
+ .families = FAMILY_MASK(AF_PACKET),
},
[PACKET_R_DB] = {
.states = (1 << SS_CLOSE),
- .families = (1 << AF_PACKET),
+ .families = FAMILY_MASK(AF_PACKET),
},
[NETLINK_DB] = {
.states = (1 << SS_CLOSE),
- .families = (1 << AF_NETLINK),
+ .families = FAMILY_MASK(AF_NETLINK),
},
[SCTP_DB] = {
.states = SS_CONN,
- .families = (1 << AF_INET) | (1 << AF_INET6),
+ .families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
};
@@ -258,14 +260,14 @@ static void filter_db_set(struct filter *f, int db)
static void filter_af_set(struct filter *f, int af)
{
f->states |= default_afs[af].states;
- f->families |= 1 << af;
+ f->families |= FAMILY_MASK(af);
do_default = 0;
preferred_family = af;
}
static int filter_af_get(struct filter *f, int af)
{
- return f->families & (1 << af);
+ return !!(f->families & FAMILY_MASK(af));
}
static void filter_default_dbs(struct filter *f)
@@ -302,7 +304,7 @@ static void filter_merge_defaults(struct filter *f)
f->families |= default_dbs[db].families;
}
for (af = 0; af < AF_MAX; af++) {
- if (!(f->families & (1 << af)))
+ if (!(f->families & FAMILY_MASK(af)))
continue;
if (!(default_afs[af].dbs & f->dbs))
@@ -2599,7 +2601,7 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
struct inet_diag_msg *r = NLMSG_DATA(h);
struct sockstat s = {};
- if (!(diag_arg->f->families & (1 << r->idiag_family)))
+ if (!(diag_arg->f->families & FAMILY_MASK(r->idiag_family)))
return 0;
parse_diag_msg(h, &s);
@@ -2785,7 +2787,7 @@ static int tcp_show(struct filter *f)
return -1;
}
- if (f->families & (1<<AF_INET)) {
+ if (f->families & FAMILY_MASK(AF_INET)) {
if ((fp = net_tcp_open()) == NULL)
goto outerr;
@@ -2795,7 +2797,7 @@ static int tcp_show(struct filter *f)
fclose(fp);
}
- if ((f->families & (1<<AF_INET6)) &&
+ if ((f->families & FAMILY_MASK(AF_INET6)) &&
(fp = net_tcp6_open()) != NULL) {
setbuffer(fp, buf, bufsize);
if (generic_record_read(fp, tcp_show_line, f, AF_INET6))
@@ -2894,7 +2896,7 @@ static int udp_show(struct filter *f)
&& inet_show_netlink(f, NULL, IPPROTO_UDP) == 0)
return 0;
- if (f->families&(1<<AF_INET)) {
+ if (f->families&FAMILY_MASK(AF_INET)) {
if ((fp = net_udp_open()) == NULL)
goto outerr;
if (generic_record_read(fp, dgram_show_line, f, AF_INET))
@@ -2902,7 +2904,7 @@ static int udp_show(struct filter *f)
fclose(fp);
}
- if ((f->families&(1<<AF_INET6)) &&
+ if ((f->families&FAMILY_MASK(AF_INET6)) &&
(fp = net_udp6_open()) != NULL) {
if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
goto outerr;
@@ -2934,7 +2936,7 @@ static int raw_show(struct filter *f)
inet_show_netlink(f, NULL, IPPROTO_RAW) == 0)
return 0;
- if (f->families&(1<<AF_INET)) {
+ if (f->families&FAMILY_MASK(AF_INET)) {
if ((fp = net_raw_open()) == NULL)
goto outerr;
if (generic_record_read(fp, dgram_show_line, f, AF_INET))
@@ -2942,7 +2944,7 @@ static int raw_show(struct filter *f)
fclose(fp);
}
- if ((f->families&(1<<AF_INET6)) &&
+ if ((f->families&FAMILY_MASK(AF_INET6)) &&
(fp = net_raw6_open()) != NULL) {
if (generic_record_read(fp, dgram_show_line, f, AF_INET6))
goto outerr;
@@ -3682,13 +3684,13 @@ static int handle_follow_request(struct filter *f)
int groups = 0;
struct rtnl_handle rth;
- if (f->families & (1 << AF_INET) && f->dbs & (1 << TCP_DB))
+ if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << TCP_DB))
groups |= 1 << (SKNLGRP_INET_TCP_DESTROY - 1);
- if (f->families & (1 << AF_INET) && f->dbs & (1 << UDP_DB))
+ if (f->families & FAMILY_MASK(AF_INET) && f->dbs & (1 << UDP_DB))
groups |= 1 << (SKNLGRP_INET_UDP_DESTROY - 1);
- if (f->families & (1 << AF_INET6) && f->dbs & (1 << TCP_DB))
+ if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << TCP_DB))
groups |= 1 << (SKNLGRP_INET6_TCP_DESTROY - 1);
- if (f->families & (1 << AF_INET6) && f->dbs & (1 << UDP_DB))
+ if (f->families & FAMILY_MASK(AF_INET6) && f->dbs & (1 << UDP_DB))
groups |= 1 << (SKNLGRP_INET6_UDP_DESTROY - 1);
if (groups == 0)
--
1.8.3.1

398
SOURCES/0039-ss-add-AF_VSOCK-support.patch

@ -0,0 +1,398 @@ @@ -0,0 +1,398 @@
From fe898bd10be2bc527f81421f06afff77e8ba42eb Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sun, 22 Oct 2017 21:44:27 +0200
Subject: [PATCH] ss: add AF_VSOCK support

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1472759
Upstream Status: iproute2.git commit c759116a0b2b

commit c759116a0b2b6da8df9687b0a40ac69050132c77
Author: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri Oct 6 11:48:41 2017 -0400

ss: add AF_VSOCK support

The AF_VSOCK address family is a host<->guest communications channel
supported by VMware, KVM, and Hyper-V. Initial VMware support was
released in Linux 3.9 in 2013 and transports for other hypervisors were
added later.

AF_VSOCK addresses are <u32 cid, u32 port> tuples. The 32-bit cid
integer is comparable to an IP address. AF_VSOCK ports work like
TCP/UDP ports.

Both SOCK_STREAM and SOCK_DGRAM socket types are available.

This patch adds AF_VSOCK support to ss(8) so that sockets can be
observed.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
man/man8/ss.8 | 8 ++-
misc/ss.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 188 insertions(+), 4 deletions(-)

diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 81de69d..4323eee 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -125,14 +125,18 @@ Display Unix domain sockets (alias for -f unix).
.B \-S, \-\-sctp
Display SCTP sockets.
.TP
+.B \-\-vsock
+Display vsock sockets (alias for -f vsock).
+.TP
.B \-f FAMILY, \-\-family=FAMILY
Display sockets of type FAMILY.
-Currently the following families are supported: unix, inet, inet6, link, netlink.
+Currently the following families are supported: unix, inet, inet6, link, netlink, vsock.
.TP
.B \-A QUERY, \-\-query=QUERY, \-\-socket=QUERY
List of socket tables to dump, separated by commas. The following identifiers
are understood: all, inet, tcp, udp, raw, unix, packet, netlink, unix_dgram,
-unix_stream, unix_seqpacket, packet_raw, packet_dgram, dccp, sctp.
+unix_stream, unix_seqpacket, packet_raw, packet_dgram, dccp, sctp,
+vsock_stream, vsock_dgram.
.TP
.B \-D FILE, \-\-diag=FILE
Do not display anything, just dump raw information about TCP sockets to FILE after applying filters. If FILE is - stdout is used.
diff --git a/misc/ss.c b/misc/ss.c
index 0d64527..e922665 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -44,6 +44,7 @@
#include <linux/packet_diag.h>
#include <linux/netlink_diag.h>
#include <linux/sctp.h>
+#include <linux/vm_sockets_diag.h>
#define MAGIC_SEQ 123456
@@ -126,6 +127,8 @@ enum {
PACKET_R_DB,
NETLINK_DB,
SCTP_DB,
+ VSOCK_ST_DB,
+ VSOCK_DG_DB,
MAX_DB
};
@@ -134,6 +137,7 @@ enum {
#define ALL_DB ((1<<MAX_DB)-1)
#define INET_L4_DBM ((1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB))
#define INET_DBM (INET_L4_DBM | (1<<RAW_DB))
+#define VSOCK_DBM ((1<<VSOCK_ST_DB)|(1<<VSOCK_DG_DB))
enum {
SS_UNKNOWN,
@@ -222,6 +226,14 @@ static const struct filter default_dbs[MAX_DB] = {
.states = SS_CONN,
.families = FAMILY_MASK(AF_INET) | FAMILY_MASK(AF_INET6),
},
+ [VSOCK_ST_DB] = {
+ .states = SS_CONN,
+ .families = FAMILY_MASK(AF_VSOCK),
+ },
+ [VSOCK_DG_DB] = {
+ .states = SS_CONN,
+ .families = FAMILY_MASK(AF_VSOCK),
+ },
};
static const struct filter default_afs[AF_MAX] = {
@@ -245,6 +257,10 @@ static const struct filter default_afs[AF_MAX] = {
.dbs = (1 << NETLINK_DB),
.states = (1 << SS_CLOSE),
},
+ [AF_VSOCK] = {
+ .dbs = VSOCK_DBM,
+ .states = SS_CONN,
+ },
};
static int do_default = 1;
@@ -283,6 +299,8 @@ static void filter_default_dbs(struct filter *f)
filter_db_set(f, PACKET_DG_DB);
filter_db_set(f, NETLINK_DB);
filter_db_set(f, SCTP_DB);
+ filter_db_set(f, VSOCK_ST_DB);
+ filter_db_set(f, VSOCK_DG_DB);
}
static void filter_states_set(struct filter *f, int states)
@@ -792,6 +810,18 @@ static const char *proto_name(int protocol)
return "???";
}
+static const char *vsock_netid_name(int type)
+{
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ default:
+ return "???";
+ }
+}
+
static void sock_state_print(struct sockstat *s)
{
const char *sock_name;
@@ -824,6 +854,9 @@ static void sock_state_print(struct sockstat *s)
case AF_NETLINK:
sock_name = "nl";
break;
+ case AF_VSOCK:
+ sock_name = vsock_netid_name(s->type);
+ break;
default:
sock_name = "unknown";
}
@@ -1139,6 +1172,8 @@ static int run_ssfilter(struct ssfilter *f, struct sockstat *s)
return s->lport == 0 && s->local.data[0] == 0;
if (s->local.family == AF_NETLINK)
return s->lport < 0;
+ if (s->local.family == AF_VSOCK)
+ return s->lport > 1023;
return is_ephemeral(s->lport);
}
@@ -1515,6 +1550,15 @@ void *parse_devcond(char *name)
return res;
}
+static void vsock_set_inet_prefix(inet_prefix *a, __u32 cid)
+{
+ *a = (inet_prefix){
+ .bytelen = sizeof(cid),
+ .family = AF_VSOCK,
+ };
+ memcpy(a->data, &cid, sizeof(cid));
+}
+
void *parse_hostcond(char *addr, bool is_port)
{
char *port = NULL;
@@ -1589,6 +1633,37 @@ void *parse_hostcond(char *addr, bool is_port)
goto out;
}
+ if (fam == AF_VSOCK || strncmp(addr, "vsock:", 6) == 0) {
+ __u32 cid = ~(__u32)0;
+
+ a.addr.family = AF_VSOCK;
+ if (strncmp(addr, "vsock:", 6) == 0)
+ addr += 6;
+
+ if (is_port)
+ port = addr;
+ else {
+ port = strchr(addr, ':');
+ if (port) {
+ *port = '\0';
+ port++;
+ }
+ }
+
+ if (port && strcmp(port, "*") &&
+ get_u32((__u32 *)&a.port, port, 0))
+ return NULL;
+
+ if (addr[0] && strcmp(addr, "*")) {
+ a.addr.bitlen = 32;
+ if (get_u32(&cid, addr, 0))
+ return NULL;
+ }
+ vsock_set_inet_prefix(&a.addr, cid);
+ fam = AF_VSOCK;
+ goto out;
+ }
+
if (fam == AF_INET || !strncmp(addr, "inet:", 5)) {
fam = AF_INET;
if (!strncmp(addr, "inet:", 5))
@@ -3653,6 +3728,88 @@ static int netlink_show(struct filter *f)
return 0;
}
+static bool vsock_type_skip(struct sockstat *s, struct filter *f)
+{
+ if (s->type == SOCK_STREAM && !(f->dbs & (1 << VSOCK_ST_DB)))
+ return true;
+ if (s->type == SOCK_DGRAM && !(f->dbs & (1 << VSOCK_DG_DB)))
+ return true;
+ return false;
+}
+
+static void vsock_addr_print(inet_prefix *a, __u32 port)
+{
+ char cid_str[sizeof("4294967295")];
+ char port_str[sizeof("4294967295")];
+ __u32 cid;
+
+ memcpy(&cid, a->data, sizeof(cid));
+
+ if (cid == ~(__u32)0)
+ snprintf(cid_str, sizeof(cid_str), "*");
+ else
+ snprintf(cid_str, sizeof(cid_str), "%u", cid);
+
+ if (port == ~(__u32)0)
+ snprintf(port_str, sizeof(port_str), "*");
+ else
+ snprintf(port_str, sizeof(port_str), "%u", port);
+
+ sock_addr_print(cid_str, ":", port_str, NULL);
+}
+
+static void vsock_stats_print(struct sockstat *s, struct filter *f)
+{
+ sock_state_print(s);
+
+ vsock_addr_print(&s->local, s->lport);
+ vsock_addr_print(&s->remote, s->rport);
+
+ proc_ctx_print(s);
+
+ printf("\n");
+}
+
+static int vsock_show_sock(const struct sockaddr_nl *addr,
+ struct nlmsghdr *nlh, void *arg)
+{
+ struct filter *f = (struct filter *)arg;
+ struct vsock_diag_msg *r = NLMSG_DATA(nlh);
+ struct sockstat stat = {
+ .type = r->vdiag_type,
+ .lport = r->vdiag_src_port,
+ .rport = r->vdiag_dst_port,
+ .state = r->vdiag_state,
+ .ino = r->vdiag_ino,
+ };
+
+ vsock_set_inet_prefix(&stat.local, r->vdiag_src_cid);
+ vsock_set_inet_prefix(&stat.remote, r->vdiag_dst_cid);
+
+ if (vsock_type_skip(&stat, f))
+ return 0;
+
+ if (f->f && run_ssfilter(f->f, &stat) == 0)
+ return 0;
+
+ vsock_stats_print(&stat, f);
+
+ return 0;
+}
+
+static int vsock_show(struct filter *f)
+{
+ DIAG_REQUEST(req, struct vsock_diag_req r);
+
+ if (!filter_af_get(f, AF_VSOCK))
+ return 0;
+
+ req.r.sdiag_family = AF_VSOCK;
+ req.r.vdiag_states = f->states;
+
+ return handle_netlink_request(f, &req.nlh, sizeof(req), vsock_show_sock);
+}
+
struct sock_diag_msg {
__u8 sdiag_family;
};
@@ -3673,6 +3830,8 @@ static int generic_show_sock(const struct sockaddr_nl *addr,
return packet_show_sock(addr, nlh, arg);
case AF_NETLINK:
return netlink_show_sock(addr, nlh, arg);
+ case AF_VSOCK:
+ return vsock_show_sock(addr, nlh, arg);
default:
return -1;
}
@@ -3900,14 +4059,15 @@ static void _usage(FILE *dest)
" -d, --dccp display only DCCP sockets\n"
" -w, --raw display only RAW sockets\n"
" -x, --unix display only Unix domain sockets\n"
+" --vsock display only vsock sockets\n"
" -f, --family=FAMILY display sockets of type FAMILY\n"
-" FAMILY := {inet|inet6|link|unix|netlink|help}\n"
+" FAMILY := {inet|inet6|link|unix|netlink|vsock|help}\n"
"\n"
" -K, --kill forcibly close sockets, display what was closed\n"
" -H, --no-header Suppress header line\n"
"\n"
" -A, --query=QUERY, --socket=QUERY\n"
-" QUERY := {all|inet|tcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink}[,QUERY]\n"
+" QUERY := {all|inet|tcp|udp|raw|unix|unix_dgram|unix_stream|unix_seqpacket|packet|netlink|vsock_stream|vsock_dgram}[,QUERY]\n"
"\n"
" -D, --diag=FILE Dump raw information about TCP sockets to FILE\n"
" -F, --filter=FILE read filter information from FILE\n"
@@ -3980,6 +4140,9 @@ static int scan_state(const char *state)
exit(-1);
}
+/* Values 'v' and 'V' are already used so a non-character is used */
+#define OPT_VSOCK 256
+
static const struct option long_opts[] = {
{ "numeric", 0, 0, 'n' },
{ "resolve", 0, 0, 'r' },
@@ -3996,6 +4159,7 @@ static const struct option long_opts[] = {
{ "udp", 0, 0, 'u' },
{ "raw", 0, 0, 'w' },
{ "unix", 0, 0, 'x' },
+ { "vsock", 0, 0, OPT_VSOCK },
{ "all", 0, 0, 'a' },
{ "listening", 0, 0, 'l' },
{ "ipv4", 0, 0, '4' },
@@ -4081,6 +4245,9 @@ int main(int argc, char *argv[])
case 'x':
filter_af_set(&current_filter, AF_UNIX);
break;
+ case OPT_VSOCK:
+ filter_af_set(&current_filter, AF_VSOCK);
+ break;
case 'a':
state_filter = SS_ALL;
break;
@@ -4107,6 +4274,8 @@ int main(int argc, char *argv[])
filter_af_set(&current_filter, AF_UNIX);
else if (strcmp(optarg, "netlink") == 0)
filter_af_set(&current_filter, AF_NETLINK);
+ else if (strcmp(optarg, "vsock") == 0)
+ filter_af_set(&current_filter, AF_VSOCK);
else if (strcmp(optarg, "help") == 0)
help();
else {
@@ -4172,6 +4341,15 @@ int main(int argc, char *argv[])
filter_db_set(&current_filter, PACKET_DG_DB);
} else if (strcmp(p, "netlink") == 0) {
filter_db_set(&current_filter, NETLINK_DB);
+ } else if (strcmp(p, "vsock") == 0) {
+ filter_db_set(&current_filter, VSOCK_ST_DB);
+ filter_db_set(&current_filter, VSOCK_DG_DB);
+ } else if (strcmp(p, "vsock_stream") == 0 ||
+ strcmp(p, "v_str") == 0) {
+ filter_db_set(&current_filter, VSOCK_ST_DB);
+ } else if (strcmp(p, "vsock_dgram") == 0 ||
+ strcmp(p, "v_dgr") == 0) {
+ filter_db_set(&current_filter, VSOCK_DG_DB);
} else {
fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p);
usage();
@@ -4387,6 +4565,8 @@ int main(int argc, char *argv[])
dccp_show(&current_filter);
if (current_filter.dbs & (1<<SCTP_DB))
sctp_show(&current_filter);
+ if (current_filter.dbs & VSOCK_DBM)
+ vsock_show(&current_filter);
if (show_users || show_proc_ctx || show_sock_ctx)
user_ent_destroy();
--
1.8.3.1

39
SOURCES/0040-link_gre6-Detect-invalid-encaplimit-values.patch

@ -0,0 +1,39 @@ @@ -0,0 +1,39 @@
From 90a1430fca8d5165e1909de9f009aa9f4d6430ef Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Wed, 29 Nov 2017 18:36:17 +0100
Subject: [PATCH] link_gre6: Detect invalid encaplimit values

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1459600
Upstream Status: iproute2.git commit 56708ae7c9535

commit 56708ae7c9535859223c5b68097b35bf0fae677c
Author: Phil Sutter <phil@nwl.cc>
Date: Tue Nov 28 16:49:58 2017 +0100

link_gre6: Detect invalid encaplimit values

Looks like a typo: get_u8() returns 0 on success and -1 on error, so the
error checking here was ineffective.

Fixes: a11b7b71a6eba ("link_gre6: really support encaplimit option")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
ip/link_gre6.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index 932f9ee..a9d18ee 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -351,7 +351,7 @@ get_failed:
} else {
__u8 uval;
- if (get_u8(&uval, *argv, 0) < -1)
+ if (get_u8(&uval, *argv, 0))
invarg("invalid ELIM", *argv);
encap_limit = uval;
flags &= ~IP6_TNL_F_IGN_ENCAP_LIMIT;
--
1.8.3.1

42
SOURCES/0041-man-tc-csum.8-Fix-inconsistency-in-example-descripti.patch

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
From f08752c12351c79145e3a6caf346e3d971370a9c Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Wed, 6 Dec 2017 13:21:16 +0100
Subject: [PATCH] man: tc-csum.8: Fix inconsistency in example description

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1417162
Upstream Status: iproute2.git commit 6bf156415a588

commit 6bf156415a588fa1c975be9a18a1579f63a936a2
Author: Phil Sutter <phil@nwl.cc>
Date: Wed Nov 29 18:34:09 2017 +0100

man: tc-csum.8: Fix inconsistency in example description

Commit 6bbe5e6290db5 ("man: tc-csum.8: Fix example") changed both source
and destination IP addresses in example code but missed to update the
example's description accordingly.

Fixes: 6bbe5e6290db5 ("man: tc-csum.8: Fix example")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
man/man8/tc-csum.8 | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/man8/tc-csum.8 b/man/man8/tc-csum.8
index 409ab71..65724b8 100644
--- a/man/man8/tc-csum.8
+++ b/man/man8/tc-csum.8
@@ -53,8 +53,8 @@ SCTP header
.B SWEETS
These are merely syntactic sugar and ignored internally.
.SH EXAMPLES
-The following performs stateless NAT for incoming packets from 192.168.1.100 to
-new destination 18.52.86.120 (0x12345678 in hex). Assuming these are UDP
+The following performs stateless NAT for incoming packets from 192.0.2.100 to
+new destination 198.51.100.1. Assuming these are UDP
packets, both IP and UDP checksums have to be recalculated:
.RS
--
1.8.3.1

42
SOURCES/0042-tc-fix-command-tc-actions-del-hang-issue.patch

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
From dbc597c9d1e0e65cc9d989d8057f9a083c2f5779 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Fri, 15 Dec 2017 16:13:46 +0100
Subject: [PATCH] tc: fix command "tc actions del" hang issue

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1526394
Upstream Status: iproute2.git commit 83cf5bc73b858

commit 83cf5bc73b858608d59c3c6126a9f37e793e15dd
Author: Chris Mi <chrism@mellanox.com>
Date: Thu Dec 14 18:09:00 2017 +0900

tc: fix command "tc actions del" hang issue

If command is RTM_DELACTION, a non-NULL pointer is passed to rtnl_talk().
Then flag NLM_F_ACK is not set on n->nlmsg_flags and netlink_ack() will
not be called. Command tc will wait for the reply for ever.

Fixes: 86bf43c7c2fd ("lib/libnetlink: update rtnl_talk to support malloc buff at run time")
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Chris Mi <chrism@mellanox.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
tc/m_action.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tc/m_action.c b/tc/m_action.c
index 90b2a11..7cfd9e0 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -507,7 +507,7 @@ static int tc_action_gd(int cmd, unsigned int flags, int *argc_p, char ***argv_p
req.n.nlmsg_seq = rth.dump = ++rth.seq;
- if (rtnl_talk(&rth, &req.n, &ans) < 0) {
+ if (rtnl_talk(&rth, &req.n, cmd == RTM_DELACTION ? NULL : &ans) < 0) {
fprintf(stderr, "We have an error talking to the kernel\n");
return 1;
}
--
1.8.3.1

43
SOURCES/0043-ip-link-Fix-use-after-free-in-nl_get_ll_addr_len.patch

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
From dcafeb49b2538cc7118cb64f62c685980c106b48 Mon Sep 17 00:00:00 2001
From: Phil Sutter <psutter@redhat.com>
Date: Tue, 6 Mar 2018 11:35:28 +0100
Subject: [PATCH] ip-link: Fix use after free in nl_get_ll_addr_len()

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1550097
Upstream Status: iproute2.git commit 06867c3719587

commit 06867c371958773e39b4ccac07cfe3e2fff2ea55
Author: Phil Sutter <phil@nwl.cc>
Date: Thu Mar 1 10:35:12 2018 +0100

ip-link: Fix use after free in nl_get_ll_addr_len()

Immediately after freeing the buffer returned from rtnl_talk(), it is
accessed again via pointer in struct rtattr array. This leads to some
builds not allowing to set an interface's MAC address because the
expected length value is garbage.

Fixes: 86bf43c7c2fdc ("lib/libnetlink: update rtnl_talk to support malloc buff at run time")
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
ip/iplink.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ip/iplink.c b/ip/iplink.c
index 193997c..db5b2c9 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -268,8 +268,9 @@ static int nl_get_ll_addr_len(unsigned int dev_index)
return -1;
}
+ len = RTA_PAYLOAD(tb[IFLA_ADDRESS]);
free(answer);
- return RTA_PAYLOAD(tb[IFLA_ADDRESS]);
+ return len;
}
static void iplink_parse_vf_vlan_info(int vf, int *argcp, char ***argvp,
--
1.8.3.1

1
SOURCES/avpkt

@ -0,0 +1 @@ @@ -0,0 +1 @@
AVPKT=3000

5
SOURCES/cbq-0000.example

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
DEVICE=eth0,10Mbit,1Mbit
RATE=128Kbit
WEIGHT=10Kbit
PRIO=5
RULE=192.168.1.0/24

17
SOURCES/rt_dsfield.deprecated

@ -0,0 +1,17 @@ @@ -0,0 +1,17 @@

# Deprecated values dropped upstream
# Kept in RHEL for backwards-compatibility
0x00 default
0x10 lowdelay
0x08 throughput
0x04 reliability
# This value overlap with ECT, do not use it!
0x02 mincost
# These values seems do not want to die, Cisco likes them by a strange reason.
0x20 priority
0x40 immediate
0x60 flash
0x80 flash-override
0xa0 critical
0xc0 internet
0xe0 network

1484
SPECS/iproute.spec

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save