diff options
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/00-INDEX | 8 | ||||
-rw-r--r-- | Documentation/networking/can.txt | 2 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 22 | ||||
-rw-r--r-- | Documentation/networking/netlink_mmap.txt | 13 | ||||
-rw-r--r-- | Documentation/networking/pktgen.txt | 62 | ||||
-rw-r--r-- | Documentation/networking/timestamping.txt | 21 | ||||
-rw-r--r-- | Documentation/networking/timestamping/txtimestamp.c | 28 |
7 files changed, 109 insertions, 47 deletions
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 557b6ef70c26..df27a1a50776 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -1,7 +1,5 @@ 00-INDEX - this file -3c505.txt - - information on the 3Com EtherLink Plus (3c505) driver. 3c509.txt - information on the 3Com Etherlink III Series Ethernet cards. 6pack.txt @@ -24,6 +22,8 @@ README.sb1000 - info on General Instrument/NextLevel SURFboard1000 cable modem. alias.txt - info on using alias network devices. +altera_tse.txt + - Altera Triple-Speed Ethernet controller. arcnet-hardware.txt - tons of info on ARCnet, hubs, jumper settings for ARCnet cards, etc. arcnet.txt @@ -42,6 +42,8 @@ bridge.txt - where to get user space programs for ethernet bridging with Linux. can.txt - documentation on CAN protocol family. +cdc_mbim.txt + - 3G/LTE USB modem (Mobile Broadband Interface Model) cops.txt - info on the COPS LocalTalk Linux driver cs89x0.txt @@ -54,6 +56,8 @@ cxgb.txt - Release Notes for the Chelsio N210 Linux device driver. dccp.txt - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42). +dctcp.txt + - DataCenter TCP congestion control de4x5.txt - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver decnet.txt diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 2236d6dcb7da..0a2859a8ee7e 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -234,7 +234,7 @@ solution for a couple of reasons: mechanisms. Inside this filter definition the (interested) type of errors may be selected. The reception of error messages is disabled by default. The format of the CAN error message frame is briefly - described in the Linux header file "include/linux/can/error.h". + described in the Linux header file "include/uapi/linux/can/error.h". 4. How to use SocketCAN ------------------------ diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a5e4c813f17f..1b8c964b0d17 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -290,6 +290,28 @@ tcp_frto - INTEGER By default it's enabled with a non-zero value. 0 disables F-RTO. +tcp_invalid_ratelimit - INTEGER + Limit the maximal rate for sending duplicate acknowledgments + in response to incoming TCP packets that are for an existing + connection but that are invalid due to any of these reasons: + + (a) out-of-window sequence number, + (b) out-of-window acknowledgment number, or + (c) PAWS (Protection Against Wrapped Sequence numbers) check failure + + This can help mitigate simple "ack loop" DoS attacks, wherein + a buggy or malicious middlebox or man-in-the-middle can + rewrite TCP header fields in manner that causes each endpoint + to think that the other is sending invalid TCP segments, thus + causing each side to send an unterminating stream of duplicate + acknowledgments for invalid segments. + + Using 0 disables rate-limiting of dupacks in response to + invalid segments; otherwise this value specifies the minimal + space between sending such dupacks, in milliseconds. + + Default: 500 (milliseconds). + tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index c6af4bac5aa8..54f10478e8e3 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -199,16 +199,9 @@ frame header. TX limitations -------------- -Kernel processing usually involves validation of the message received by -user-space, then processing its contents. The kernel must assure that -userspace is not able to modify the message contents after they have been -validated. In order to do so, the message is copied from the ring frame -to an allocated buffer if either of these conditions is false: - -- only a single mapping of the ring exists -- the file descriptor is not shared between processes - -This means that for threaded programs, the kernel will fall back to copying. +As of Jan 2015 the message is always copied from the ring frame to an +allocated buffer due to unresolved security concerns. +See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). Example ------- diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 6915c6b27869..0344f1d45b37 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -3,13 +3,11 @@ HOWTO for the linux packet generator ------------------------------------ -Date: 041221 - -Enable CONFIG_NET_PKTGEN to compile and build pktgen.o either in kernel -or as module. Module is preferred. insmod pktgen if needed. Once running -pktgen creates a thread on each CPU where each thread has affinity to its CPU. -Monitoring and controlling is done via /proc. Easiest to select a suitable -a sample script and configure. +Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel +or as a module. A module is preferred; modprobe pktgen if needed. Once +running, pktgen creates a thread for each CPU with affinity to that CPU. +Monitoring and controlling is done via /proc. It is easiest to select a +suitable sample script and configure that. On a dual CPU: @@ -27,7 +25,7 @@ For monitoring and control pktgen creates: Tuning NIC for max performance ============================== -The default NIC setting are (likely) not tuned for pktgen's artificial +The default NIC settings are (likely) not tuned for pktgen's artificial overload type of benchmarking, as this could hurt the normal use-case. Specifically increasing the TX ring buffer in the NIC: @@ -35,20 +33,20 @@ Specifically increasing the TX ring buffer in the NIC: A larger TX ring can improve pktgen's performance, while it can hurt in the general case, 1) because the TX ring buffer might get larger -than the CPUs L1/L2 cache, 2) because it allow more queueing in the +than the CPU's L1/L2 cache, 2) because it allows more queueing in the NIC HW layer (which is bad for bufferbloat). -One should be careful to conclude, that packets/descriptors in the HW +One should hesitate to conclude that packets/descriptors in the HW TX ring cause delay. Drivers usually delay cleaning up the -ring-buffers (for various performance reasons), thus packets stalling -the TX ring, might just be waiting for cleanup. +ring-buffers for various performance reasons, and packets stalling +the TX ring might just be waiting for cleanup. -This cleanup issues is specifically the case, for the driver ixgbe -(Intel 82599 chip). This driver (ixgbe) combine TX+RX ring cleanups, +This cleanup issue is specifically the case for the driver ixgbe +(Intel 82599 chip). This driver (ixgbe) combines TX+RX ring cleanups, and the cleanup interval is affected by the ethtool --coalesce setting of parameter "rx-usecs". -For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6): +For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6): # ethtool -C ethX rx-usecs 30 @@ -60,15 +58,16 @@ Running: Stopped: eth1 Result: OK: max_before_softirq=10000 -Most important the devices assigned to thread. Note! A device can only belong -to one thread. +Most important are the devices assigned to the thread. Note that a +device can only belong to one thread. Viewing devices =============== -Parm section holds configured info. Current hold running stats. -Result is printed after run or after interruption. Example: +The Params section holds configured information. The Current section +holds running statistics. The Result is printed after a run or after +interruption. Example: /proc/net/pktgen/eth1 @@ -93,7 +92,8 @@ Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags) Configuring threads and devices ================================ -This is done via the /proc interface easiest done via pgset in the scripts +This is done via the /proc interface, and most easily done via pgset +as defined in the sample scripts. Examples: @@ -192,10 +192,11 @@ Examples: pgset "rate 300M" set rate to 300 Mb/s pgset "ratep 1000000" set rate to 1Mpps -Example scripts -=============== +Sample scripts +============== -A collection of small tutorial scripts for pktgen is in examples dir. +A collection of small tutorial scripts for pktgen is in the +samples/pktgen directory: pktgen.conf-1-1 # 1 CPU 1 dev pktgen.conf-1-2 # 1 CPU 2 dev @@ -206,25 +207,26 @@ pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6 pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows. -Run in shell: ./pktgen.conf-X-Y It does all the setup including sending. +Run in shell: ./pktgen.conf-X-Y +This does all the setup including sending. Interrupt affinity =================== -Note when adding devices to a specific CPU there good idea to also assign -/proc/irq/XX/smp_affinity so the TX-interrupts gets bound to the same CPU. -as this reduces cache bouncing when freeing skb's. +Note that when adding devices to a specific CPU it is a good idea to +also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound +to the same CPU. This reduces cache bouncing when freeing skbs. Enable IPsec ============ -Default IPsec transformation with ESP encapsulation plus Transport mode -could be enabled by simply setting: +Default IPsec transformation with ESP encapsulation plus transport mode +can be enabled by simply setting: pgset "flag IPSEC" pgset "flows 1" To avoid breaking existing testbed scripts for using AH type and tunnel mode, -user could use "pgset spi SPI_VALUE" to specify which formal of transformation +you can use "pgset spi SPI_VALUE" to specify which transformation mode to employ. diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index a5c784c89312..5f0922613f1a 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG: option IP_PKTINFO simultaneously. +SOF_TIMESTAMPING_OPT_TSONLY: + + Applies to transmit timestamps only. Makes the kernel return the + timestamp as a cmsg alongside an empty packet, as opposed to + alongside the original packet. This reduces the amount of memory + charged to the socket's receive budget (SO_RCVBUF) and delivers + the timestamp even if sysctl net.core.tstamp_allow_data is 0. + This option disables SOF_TIMESTAMPING_OPT_CMSG. + + +New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to +disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate +regardless of the setting of sysctl net.core.tstamp_allow_data. + +An exception is when a process needs additional cmsg data, for +instance SOL_IP/IP_PKTINFO to detect the egress network interface. +Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on +having access to the contents of the original packet, so cannot be +combined with SOF_TIMESTAMPING_OPT_TSONLY. + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index 05694febc238..8217510d3842 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -70,6 +70,7 @@ static int do_ipv6 = 1; static int cfg_payload_len = 10; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_loop_nodata; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -141,6 +142,9 @@ static void print_payload(char *data, int len) { int i; + if (!len) + return; + if (len > 70) len = 70; @@ -177,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) struct sock_extended_err *serr = NULL; struct scm_timestamping *tss = NULL; struct cmsghdr *cm; + int batch = 0; for (cm = CMSG_FIRSTHDR(msg); cm && cm->cmsg_len; @@ -209,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } } - if (serr && tss) - print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len); + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); } static int recv_errmsg(int fd) @@ -244,7 +257,7 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - if (ret > 0) { + if (ret >= 0) { __recv_errmsg_cmsg(&msg, ret); if (cfg_show_payload) print_payload(data, cfg_payload_len); @@ -309,6 +322,9 @@ static void do_test(int family, unsigned int opt) opt |= SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; + if (cfg_loop_nodata) + opt |= SOF_TIMESTAMPING_OPT_TSONLY; + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) error(1, 0, "setsockopt timestamping"); @@ -378,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -h: show this message\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -n: set no-payload option\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" @@ -392,7 +409,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { + while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -403,6 +420,9 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'n': + cfg_loop_nodata = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; |