summaryrefslogtreecommitdiff
path: root/target/linux/patches/2.6.30.4/natt.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/patches/2.6.30.4/natt.patch')
-rw-r--r--target/linux/patches/2.6.30.4/natt.patch2668
1 files changed, 2668 insertions, 0 deletions
diff --git a/target/linux/patches/2.6.30.4/natt.patch b/target/linux/patches/2.6.30.4/natt.patch
new file mode 100644
index 000000000..83103a369
--- /dev/null
+++ b/target/linux/patches/2.6.30.4/natt.patch
@@ -0,0 +1,2668 @@
+diff -Nur linux-2.6.30.1.orig/include/net/xfrmudp.h linux-2.6.30.1/include/net/xfrmudp.h
+--- linux-2.6.30.1.orig/include/net/xfrmudp.h 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30.1/include/net/xfrmudp.h 2009-07-24 22:00:56.771280384 +0200
+@@ -0,0 +1,10 @@
++/*
++ * pointer to function for type that xfrm4_input wants, to permit
++ * decoupling of XFRM from udp.c
++ */
++#define HAVE_XFRM4_UDP_REGISTER
++
++typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type);
++extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func
++ , xfrm4_rcv_encap_t *oldfunc);
++extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func);
+diff -Nur linux-2.6.30.1.orig/net/ipv4/Kconfig linux-2.6.30.1/net/ipv4/Kconfig
+--- linux-2.6.30.1.orig/net/ipv4/Kconfig 2009-07-03 01:52:38.000000000 +0200
++++ linux-2.6.30.1/net/ipv4/Kconfig 2009-07-24 22:00:56.751278392 +0200
+@@ -379,6 +379,12 @@
+ tristate
+ default n
+
++config IPSEC_NAT_TRAVERSAL
++ bool "IPSEC NAT-Traversal (KLIPS compatible)"
++ depends on INET
++ ---help---
++ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP.
++
+ config INET_XFRM_MODE_TRANSPORT
+ tristate "IP: IPsec transport mode"
+ default y
+diff -Nur linux-2.6.30.1.orig/net/ipv4/Kconfig.orig linux-2.6.30.1/net/ipv4/Kconfig.orig
+--- linux-2.6.30.1.orig/net/ipv4/Kconfig.orig 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30.1/net/ipv4/Kconfig.orig 2009-07-03 01:52:38.000000000 +0200
+@@ -0,0 +1,638 @@
++#
++# IP configuration
++#
++config IP_MULTICAST
++ bool "IP: multicasting"
++ help
++ This is code for addressing several networked computers at once,
++ enlarging your kernel by about 2 KB. You need multicasting if you
++ intend to participate in the MBONE, a high bandwidth network on top
++ of the Internet which carries audio and video broadcasts. More
++ information about the MBONE is on the WWW at
++ <http://www.savetz.com/mbone/>. Information about the multicast
++ capabilities of the various network cards is contained in
++ <file:Documentation/networking/multicast.txt>. For most people, it's
++ safe to say N.
++
++config IP_ADVANCED_ROUTER
++ bool "IP: advanced router"
++ ---help---
++ If you intend to run your Linux box mostly as a router, i.e. as a
++ computer that forwards and redistributes network packets, say Y; you
++ will then be presented with several options that allow more precise
++ control about the routing process.
++
++ The answer to this question won't directly affect the kernel:
++ answering N will just cause the configurator to skip all the
++ questions about advanced routing.
++
++ Note that your box can only act as a router if you enable IP
++ forwarding in your kernel; you can do that by saying Y to "/proc
++ file system support" and "Sysctl support" below and executing the
++ line
++
++ echo "1" > /proc/sys/net/ipv4/ip_forward
++
++ at boot time after the /proc file system has been mounted.
++
++ If you turn on IP forwarding, you should consider the rp_filter, which
++ automatically rejects incoming packets if the routing table entry
++ for their source address doesn't match the network interface they're
++ arriving on. This has security advantages because it prevents the
++ so-called IP spoofing, however it can pose problems if you use
++ asymmetric routing (packets from you to a host take a different path
++ than packets from that host to you) or if you operate a non-routing
++ host which has several IP addresses on different interfaces. To turn
++ rp_filter on use:
++
++ echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
++ and
++ echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
++
++ Note that some distributions enable it in startup scripts.
++ For details about rp_filter strict and loose mode read
++ <file:Documentation/networking/ip-sysctl.txt>.
++
++ If unsure, say N here.
++
++choice
++ prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
++ depends on IP_ADVANCED_ROUTER
++ default ASK_IP_FIB_HASH
++
++config ASK_IP_FIB_HASH
++ bool "FIB_HASH"
++ ---help---
++ Current FIB is very proven and good enough for most users.
++
++config IP_FIB_TRIE
++ bool "FIB_TRIE"
++ ---help---
++ Use new experimental LC-trie as FIB lookup algorithm.
++ This improves lookup performance if you have a large
++ number of routes.
++
++ LC-trie is a longest matching prefix lookup algorithm which
++ performs better than FIB_HASH for large routing tables.
++ But, it consumes more memory and is more complex.
++
++ LC-trie is described in:
++
++ IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
++ IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
++ June 1999
++
++ An experimental study of compression methods for dynamic tries
++ Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
++ http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
++
++endchoice
++
++config IP_FIB_HASH
++ def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER
++
++config IP_FIB_TRIE_STATS
++ bool "FIB TRIE statistics"
++ depends on IP_FIB_TRIE
++ ---help---
++ Keep track of statistics on structure of FIB TRIE table.
++ Useful for testing and measuring TRIE performance.
++
++config IP_MULTIPLE_TABLES
++ bool "IP: policy routing"
++ depends on IP_ADVANCED_ROUTER
++ select FIB_RULES
++ ---help---
++ Normally, a router decides what to do with a received packet based
++ solely on the packet's final destination address. If you say Y here,
++ the Linux router will also be able to take the packet's source
++ address into account. Furthermore, the TOS (Type-Of-Service) field
++ of the packet can be used for routing decisions as well.
++
++ If you are interested in this, please see the preliminary
++ documentation at <http://www.compendium.com.ar/policy-routing.txt>
++ and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
++ You will need supporting software from
++ <ftp://ftp.tux.org/pub/net/ip-routing/>.
++
++ If unsure, say N.
++
++config IP_ROUTE_MULTIPATH
++ bool "IP: equal cost multipath"
++ depends on IP_ADVANCED_ROUTER
++ help
++ Normally, the routing tables specify a single action to be taken in
++ a deterministic manner for a given packet. If you say Y here
++ however, it becomes possible to attach several actions to a packet
++ pattern, in effect specifying several alternative paths to travel
++ for those packets. The router considers all these paths to be of
++ equal "cost" and chooses one of them in a non-deterministic fashion
++ if a matching packet arrives.
++
++config IP_ROUTE_VERBOSE
++ bool "IP: verbose route monitoring"
++ depends on IP_ADVANCED_ROUTER
++ help
++ If you say Y here, which is recommended, then the kernel will print
++ verbose messages regarding the routing, for example warnings about
++ received packets which look strange and could be evidence of an
++ attack or a misconfigured system somewhere. The information is
++ handled by the klogd daemon which is responsible for kernel messages
++ ("man klogd").
++
++config IP_PNP
++ bool "IP: kernel level autoconfiguration"
++ help
++ This enables automatic configuration of IP addresses of devices and
++ of the routing table during kernel boot, based on either information
++ supplied on the kernel command line or by BOOTP or RARP protocols.
++ You need to say Y only for diskless machines requiring network
++ access to boot (in which case you want to say Y to "Root file system
++ on NFS" as well), because all other machines configure the network
++ in their startup scripts.
++
++config IP_PNP_DHCP
++ bool "IP: DHCP support"
++ depends on IP_PNP
++ ---help---
++ If you want your Linux box to mount its whole root file system (the
++ one containing the directory /) from some other computer over the
++ net via NFS and you want the IP address of your computer to be
++ discovered automatically at boot time using the DHCP protocol (a
++ special protocol designed for doing this job), say Y here. In case
++ the boot ROM of your network card was designed for booting Linux and
++ does DHCP itself, providing all necessary information on the kernel
++ command line, you can say N here.
++
++ If unsure, say Y. Note that if you want to use DHCP, a DHCP server
++ must be operating on your network. Read
++ <file:Documentation/filesystems/nfsroot.txt> for details.
++
++config IP_PNP_BOOTP
++ bool "IP: BOOTP support"
++ depends on IP_PNP
++ ---help---
++ If you want your Linux box to mount its whole root file system (the
++ one containing the directory /) from some other computer over the
++ net via NFS and you want the IP address of your computer to be
++ discovered automatically at boot time using the BOOTP protocol (a
++ special protocol designed for doing this job), say Y here. In case
++ the boot ROM of your network card was designed for booting Linux and
++ does BOOTP itself, providing all necessary information on the kernel
++ command line, you can say N here. If unsure, say Y. Note that if you
++ want to use BOOTP, a BOOTP server must be operating on your network.
++ Read <file:Documentation/filesystems/nfsroot.txt> for details.
++
++config IP_PNP_RARP
++ bool "IP: RARP support"
++ depends on IP_PNP
++ help
++ If you want your Linux box to mount its whole root file system (the
++ one containing the directory /) from some other computer over the
++ net via NFS and you want the IP address of your computer to be
++ discovered automatically at boot time using the RARP protocol (an
++ older protocol which is being obsoleted by BOOTP and DHCP), say Y
++ here. Note that if you want to use RARP, a RARP server must be
++ operating on your network. Read
++ <file:Documentation/filesystems/nfsroot.txt> for details.
++
++# not yet ready..
++# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
++config NET_IPIP
++ tristate "IP: tunneling"
++ select INET_TUNNEL
++ ---help---
++ Tunneling means encapsulating data of one protocol type within
++ another protocol and sending it over a channel that understands the
++ encapsulating protocol. This particular tunneling driver implements
++ encapsulation of IP within IP, which sounds kind of pointless, but
++ can be useful if you want to make your (or some other) machine
++ appear on a different network than it physically is, or to use
++ mobile-IP facilities (allowing laptops to seamlessly move between
++ networks without changing their IP addresses).
++
++ Saying Y to this option will produce two modules ( = code which can
++ be inserted in and removed from the running kernel whenever you
++ want). Most people won't need this and can say N.
++
++config NET_IPGRE
++ tristate "IP: GRE tunnels over IP"
++ help
++ Tunneling means encapsulating data of one protocol type within
++ another protocol and sending it over a channel that understands the
++ encapsulating protocol. This particular tunneling driver implements
++ GRE (Generic Routing Encapsulation) and at this time allows
++ encapsulating of IPv4 or IPv6 over existing IPv4 infrastructure.
++ This driver is useful if the other endpoint is a Cisco router: Cisco
++ likes GRE much better than the other Linux tunneling driver ("IP
++ tunneling" above). In addition, GRE allows multicast redistribution
++ through the tunnel.
++
++config NET_IPGRE_BROADCAST
++ bool "IP: broadcast GRE over IP"
++ depends on IP_MULTICAST && NET_IPGRE
++ help
++ One application of GRE/IP is to construct a broadcast WAN (Wide Area
++ Network), which looks like a normal Ethernet LAN (Local Area
++ Network), but can be distributed all over the Internet. If you want
++ to do that, say Y here and to "IP multicast routing" below.
++
++config IP_MROUTE
++ bool "IP: multicast routing"
++ depends on IP_MULTICAST
++ help
++ This is used if you want your machine to act as a router for IP
++ packets that have several destination addresses. It is needed on the
++ MBONE, a high bandwidth network on top of the Internet which carries
++ audio and video broadcasts. In order to do that, you would most
++ likely run the program mrouted. Information about the multicast
++ capabilities of the various network cards is contained in
++ <file:Documentation/networking/multicast.txt>. If you haven't heard
++ about it, you don't need it.
++
++config IP_PIMSM_V1
++ bool "IP: PIM-SM version 1 support"
++ depends on IP_MROUTE
++ help
++ Kernel side support for Sparse Mode PIM (Protocol Independent
++ Multicast) version 1. This multicast routing protocol is used widely
++ because Cisco supports it. You need special software to use it
++ (pimd-v1). Please see <http://netweb.usc.edu/pim/> for more
++ information about PIM.
++
++ Say Y if you want to use PIM-SM v1. Note that you can say N here if
++ you just want to use Dense Mode PIM.
++
++config IP_PIMSM_V2
++ bool "IP: PIM-SM version 2 support"
++ depends on IP_MROUTE
++ help
++ Kernel side support for Sparse Mode PIM version 2. In order to use
++ this, you need an experimental routing daemon supporting it (pimd or
++ gated-5). This routing protocol is not used widely, so say N unless
++ you want to play with it.
++
++config ARPD
++ bool "IP: ARP daemon support (EXPERIMENTAL)"
++ depends on EXPERIMENTAL
++ ---help---
++ Normally, the kernel maintains an internal cache which maps IP
++ addresses to hardware addresses on the local network, so that
++ Ethernet/Token Ring/ etc. frames are sent to the proper address on
++ the physical networking layer. For small networks having a few
++ hundred directly connected hosts or less, keeping this address
++ resolution (ARP) cache inside the kernel works well. However,
++ maintaining an internal ARP cache does not work well for very large
++ switched networks, and will use a lot of kernel memory if TCP/IP
++ connections are made to many machines on the network.
++
++ If you say Y here, the kernel's internal ARP cache will never grow
++ to more than 256 entries (the oldest entries are expired in a LIFO
++ manner) and communication will be attempted with the user space ARP
++ daemon arpd. Arpd then answers the address resolution request either
++ from its own cache or by asking the net.
++
++ This code is experimental and also obsolete. If you want to use it,
++ you need to find a version of the daemon arpd on the net somewhere,
++ and you should also say Y to "Kernel/User network link driver",
++ below. If unsure, say N.
++
++config SYN_COOKIES
++ bool "IP: TCP syncookie support (disabled per default)"
++ ---help---
++ Normal TCP/IP networking is open to an attack known as "SYN
++ flooding". This denial-of-service attack prevents legitimate remote
++ users from being able to connect to your computer during an ongoing
++ attack and requires very little work from the attacker, who can
++ operate from anywhere on the Internet.
++
++ SYN cookies provide protection against this type of attack. If you
++ say Y here, the TCP/IP stack will use a cryptographic challenge
++ protocol known as "SYN cookies" to enable legitimate users to
++ continue to connect, even when your machine is under attack. There
++ is no need for the legitimate users to change their TCP/IP software;
++ SYN cookies work transparently to them. For technical information
++ about SYN cookies, check out <http://cr.yp.to/syncookies.html>.
++
++ If you are SYN flooded, the source address reported by the kernel is
++ likely to have been forged by the attacker; it is only reported as
++ an aid in tracing the packets to their actual source and should not
++ be taken as absolute truth.
++
++ SYN cookies may prevent correct error reporting on clients when the
++ server is really overloaded. If this happens frequently better turn
++ them off.
++
++ If you say Y here, note that SYN cookies aren't enabled by default;
++ you can enable them by saying Y to "/proc file system support" and
++ "Sysctl support" below and executing the command
++
++ echo 1 >/proc/sys/net/ipv4/tcp_syncookies
++
++ at boot time after the /proc file system has been mounted.
++
++ If unsure, say N.
++
++config INET_AH
++ tristate "IP: AH transformation"
++ select XFRM
++ select CRYPTO
++ select CRYPTO_HMAC
++ select CRYPTO_MD5
++ select CRYPTO_SHA1
++ ---help---
++ Support for IPsec AH.
++
++ If unsure, say Y.
++
++config INET_ESP
++ tristate "IP: ESP transformation"
++ select XFRM
++ select CRYPTO
++ select CRYPTO_AUTHENC
++ select CRYPTO_HMAC
++ select CRYPTO_MD5
++ select CRYPTO_CBC
++ select CRYPTO_SHA1
++ select CRYPTO_DES
++ ---help---
++ Support for IPsec ESP.
++
++ If unsure, say Y.
++
++config INET_IPCOMP
++ tristate "IP: IPComp transformation"
++ select INET_XFRM_TUNNEL
++ select XFRM_IPCOMP
++ ---help---
++ Support for IP Payload Compression Protocol (IPComp) (RFC3173),
++ typically needed for IPsec.
++
++ If unsure, say Y.
++
++config INET_XFRM_TUNNEL
++ tristate
++ select INET_TUNNEL
++ default n
++
++config INET_TUNNEL
++ tristate
++ default n
++
++config INET_XFRM_MODE_TRANSPORT
++ tristate "IP: IPsec transport mode"
++ default y
++ select XFRM
++ ---help---
++ Support for IPsec transport mode.
++
++ If unsure, say Y.
++
++config INET_XFRM_MODE_TUNNEL
++ tristate "IP: IPsec tunnel mode"
++ default y
++ select XFRM
++ ---help---
++ Support for IPsec tunnel mode.
++
++ If unsure, say Y.
++
++config INET_XFRM_MODE_BEET
++ tristate "IP: IPsec BEET mode"
++ default y
++ select XFRM
++ ---help---
++ Support for IPsec BEET mode.
++
++ If unsure, say Y.
++
++config INET_LRO
++ bool "Large Receive Offload (ipv4/tcp)"
++ default y
++ ---help---
++ Support for Large Receive Offload (ipv4/tcp).
++
++ If unsure, say Y.
++
++config INET_DIAG
++ tristate "INET: socket monitoring interface"
++ default y
++ ---help---
++ Support for INET (TCP, DCCP, etc) socket monitoring interface used by
++ native Linux tools such as ss. ss is included in iproute2, currently
++ downloadable at <http://linux-net.osdl.org/index.php/Iproute2>.
++
++ If unsure, say Y.
++
++config INET_TCP_DIAG
++ depends on INET_DIAG
++ def_tristate INET_DIAG
++
++menuconfig TCP_CONG_ADVANCED
++ bool "TCP: advanced congestion control"
++ ---help---
++ Support for selection of various TCP congestion control
++ modules.
++
++ Nearly all users can safely say no here, and a safe default
++ selection will be made (CUBIC with new Reno as a fallback).
++
++ If unsure, say N.
++
++if TCP_CONG_ADVANCED
++
++config TCP_CONG_BIC
++ tristate "Binary Increase Congestion (BIC) control"
++ default m
++ ---help---
++ BIC-TCP is a sender-side only change that ensures a linear RTT
++ fairness under large windows while offering both scalability and
++ bounded TCP-friendliness. The protocol combines two schemes
++ called additive increase and binary search increase. When the
++ congestion window is large, additive increase with a large
++ increment ensures linear RTT fairness as well as good
++ scalability. Under small congestion windows, binary search
++ increase provides TCP friendliness.
++ See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
++
++config TCP_CONG_CUBIC
++ tristate "CUBIC TCP"
++ default y
++ ---help---
++ This is version 2.0 of BIC-TCP which uses a cubic growth function
++ among other techniques.
++ See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
++
++config TCP_CONG_WESTWOOD
++ tristate "TCP Westwood+"
++ default m
++ ---help---
++ TCP Westwood+ is a sender-side only modification of the TCP Reno
++ protocol stack that optimizes the performance of TCP congestion
++ control. It is based on end-to-end bandwidth estimation to set
++ congestion window and slow start threshold after a congestion
++ episode. Using this estimation, TCP Westwood+ adaptively sets a
++ slow start threshold and a congestion window which takes into
++ account the bandwidth used at the time congestion is experienced.
++ TCP Westwood+ significantly increases fairness wrt TCP Reno in
++ wired networks and throughput over wireless links.
++
++config TCP_CONG_HTCP
++ tristate "H-TCP"
++ default m
++ ---help---
++ H-TCP is a send-side only modifications of the TCP Reno
++ protocol stack that optimizes the performance of TCP
++ congestion control for high speed network links. It uses a
++ modeswitch to change the alpha and beta parameters of TCP Reno
++ based on network conditions and in a way so as to be fair with
++ other Reno and H-TCP flows.
++
++config TCP_CONG_HSTCP
++ tristate "High Speed TCP"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ Sally Floyd's High Speed TCP (RFC 3649) congestion control.
++ A modification to TCP's congestion control mechanism for use
++ with large congestion windows. A table indicates how much to
++ increase the congestion window by when an ACK is received.
++ For more detail see http://www.icir.org/floyd/hstcp.html
++
++config TCP_CONG_HYBLA
++ tristate "TCP-Hybla congestion control algorithm"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ TCP-Hybla is a sender-side only change that eliminates penalization of
++ long-RTT, large-bandwidth connections, like when satellite legs are
++ involved, especially when sharing a common bottleneck with normal
++ terrestrial connections.
++
++config TCP_CONG_VEGAS
++ tristate "TCP Vegas"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ TCP Vegas is a sender-side only change to TCP that anticipates
++ the onset of congestion by estimating the bandwidth. TCP Vegas
++ adjusts the sending rate by modifying the congestion
++ window. TCP Vegas should provide less packet loss, but it is
++ not as aggressive as TCP Reno.
++
++config TCP_CONG_SCALABLE
++ tristate "Scalable TCP"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ Scalable TCP is a sender-side only change to TCP which uses a
++ MIMD congestion control algorithm which has some nice scaling
++ properties, though is known to have fairness issues.
++ See http://www.deneholme.net/tom/scalable/
++
++config TCP_CONG_LP
++ tristate "TCP Low Priority"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
++ to utilize only the excess network bandwidth as compared to the
++ ``fair share`` of bandwidth as targeted by TCP.
++ See http://www-ece.rice.edu/networks/TCP-LP/
++
++config TCP_CONG_VENO
++ tristate "TCP Veno"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ TCP Veno is a sender-side only enhancement of TCP to obtain better
++ throughput over wireless networks. TCP Veno makes use of state
++ distinguishing to circumvent the difficult judgment of the packet loss
++ type. TCP Veno cuts down less congestion window in response to random
++ loss packets.
++ See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
++
++config TCP_CONG_YEAH
++ tristate "YeAH TCP"
++ depends on EXPERIMENTAL
++ select TCP_CONG_VEGAS
++ default n
++ ---help---
++ YeAH-TCP is a sender-side high-speed enabled TCP congestion control
++ algorithm, which uses a mixed loss/delay approach to compute the
++ congestion window. It's design goals target high efficiency,
++ internal, RTT and Reno fairness, resilience to link loss while
++ keeping network elements load as low as possible.
++
++ For further details look here:
++ http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
++
++config TCP_CONG_ILLINOIS
++ tristate "TCP Illinois"
++ depends on EXPERIMENTAL
++ default n
++ ---help---
++ TCP-Illinois is a sender-side modification of TCP Reno for
++ high speed long delay links. It uses round-trip-time to
++ adjust the alpha and beta parameters to achieve a higher average
++ throughput and maintain fairness.
++
++ For further details see:
++ http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
++
++choice
++ prompt "Default TCP congestion control"
++ default DEFAULT_CUBIC
++ help
++ Select the TCP congestion control that will be used by default
++ for all connections.
++
++ config DEFAULT_BIC
++ bool "Bic" if TCP_CONG_BIC=y
++
++ config DEFAULT_CUBIC
++ bool "Cubic" if TCP_CONG_CUBIC=y
++
++ config DEFAULT_HTCP
++ bool "Htcp" if TCP_CONG_HTCP=y
++
++ config DEFAULT_VEGAS
++ bool "Vegas" if TCP_CONG_VEGAS=y
++
++ config DEFAULT_WESTWOOD
++ bool "Westwood" if TCP_CONG_WESTWOOD=y
++
++ config DEFAULT_RENO
++ bool "Reno"
++
++endchoice
++
++endif
++
++config TCP_CONG_CUBIC
++ tristate
++ depends on !TCP_CONG_ADVANCED
++ default y
++
++config DEFAULT_TCP_CONG
++ string
++ default "bic" if DEFAULT_BIC
++ default "cubic" if DEFAULT_CUBIC
++ default "htcp" if DEFAULT_HTCP
++ default "vegas" if DEFAULT_VEGAS
++ default "westwood" if DEFAULT_WESTWOOD
++ default "reno" if DEFAULT_RENO
++ default "cubic"
++
++config TCP_MD5SIG
++ bool "TCP: MD5 Signature Option support (RFC2385) (EXPERIMENTAL)"
++ depends on EXPERIMENTAL
++ select CRYPTO
++ select CRYPTO_MD5
++ ---help---
++ RFC2385 specifies a method of giving MD5 protection to TCP sessions.
++ Its main (only?) use is to protect BGP sessions between core routers
++ on the Internet.
++
++ If unsure, say N.
++
+diff -Nur linux-2.6.30.1.orig/net/ipv4/udp.c linux-2.6.30.1/net/ipv4/udp.c
+--- linux-2.6.30.1.orig/net/ipv4/udp.c 2009-07-03 01:52:38.000000000 +0200
++++ linux-2.6.30.1/net/ipv4/udp.c 2009-07-24 22:00:56.755270521 +0200
+@@ -104,6 +104,7 @@
+ #include <net/route.h>
+ #include <net/checksum.h>
+ #include <net/xfrm.h>
++#include <net/xfrmudp.h>
+ #include "udp_impl.h"
+
+ struct udp_table udp_table;
+@@ -1035,6 +1036,128 @@
+ return -1;
+ }
+
++#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
++
++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL;
++
++/*
++ * de-encapsulate and pass to the registered xfrm4_rcv_encap_func function.
++ * Most of this code stolen from net/ipv4/xfrm4_input.c
++ * which is attributed to YOSHIFUJI Hideaki @USAGI, and
++ * Derek Atkins <derek@ihtfp.com>
++ */
++
++static int xfrm4_udp_encap_rcv_wrapper(struct sock *sk, struct sk_buff *skb)
++{
++ struct udp_sock *up = udp_sk(sk);
++ struct udphdr *uh;
++ struct iphdr *iph;
++ int iphlen, len;
++ int ret;
++
++ __u8 *udpdata;
++ __be32 *udpdata32;
++ __u16 encap_type = up->encap_type;
++
++ /* if this is not encapsulated socket, then just return now */
++ if (!encap_type && !xfrm4_rcv_encap_func)
++ return 1;
++
++ /* If this is a paged skb, make sure we pull up
++ * whatever data we need to look at. */
++ len = skb->len - sizeof(struct udphdr);
++ if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
++ return 1;
++
++ /* Now we can get the pointers */
++ uh = udp_hdr(skb);
++ udpdata = (__u8 *)uh + sizeof(struct udphdr);
++ udpdata32 = (__be32 *)udpdata;
++
++ switch (encap_type) {
++ default:
++ case UDP_ENCAP_ESPINUDP:
++ /* Check if this is a keepalive packet. If so, eat it. */
++ if (len == 1 && udpdata[0] == 0xff) {
++ goto drop;
++ } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
++ /* ESP Packet without Non-ESP header */
++ len = sizeof(struct udphdr);
++ } else
++ /* Must be an IKE packet.. pass it through */
++ return 1;
++ break;
++ case UDP_ENCAP_ESPINUDP_NON_IKE:
++ /* Check if this is a keepalive packet. If so, eat it. */
++ if (len == 1 && udpdata[0] == 0xff) {
++ goto drop;
++ } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
++ udpdata32[0] == 0 && udpdata32[1] == 0) {
++
++ /* ESP Packet with Non-IKE marker */
++ len = sizeof(struct udphdr) + 2 * sizeof(u32);
++ } else
++ /* Must be an IKE packet.. pass it through */
++ return 1;
++ break;
++ }
++
++ /* At this point we are sure that this is an ESPinUDP packet,
++ * so we need to remove 'len' bytes from the packet (the UDP
++ * header and optional ESP marker bytes) and then modify the
++ * protocol to ESP, and then call into the transform receiver.
++ */
++ if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++ goto drop;
++
++ /* Now we can update and verify the packet length... */
++ iph = ip_hdr(skb);
++ iphlen = iph->ihl << 2;
++ iph->tot_len = htons(ntohs(iph->tot_len) - len);
++ if (skb->len < iphlen + len) {
++ /* packet is too small!?! */
++ goto drop;
++ }
++
++ /* pull the data buffer up to the ESP header and set the
++ * transport header to point to ESP. Keep UDP on the stack
++ * for later.
++ */
++ __skb_pull(skb, len);
++ skb_reset_transport_header(skb);
++
++ /* modify the protocol (it's ESP!) */
++ iph->protocol = IPPROTO_ESP;
++
++ /* process ESP */
++ ret = (*xfrm4_rcv_encap_func)(skb, encap_type);
++ return ret;
++
++drop:
++ kfree_skb(skb);
++ return 0;
++}
++
++int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func,
++ xfrm4_rcv_encap_t *oldfunc)
++{
++ if (oldfunc != NULL)
++ *oldfunc = xfrm4_rcv_encap_func;
++ xfrm4_rcv_encap_func = func;
++ return 0;
++}
++
++int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func)
++{
++ if (xfrm4_rcv_encap_func != func)
++ return -1;
++
++ xfrm4_rcv_encap_func = NULL;
++ return 0;
++}
++
++#endif /* CONFIG_XFRM_MODULE || CONFIG_IPSEC_NAT_TRAVERSAL */
++
+ /* returns:
+ * -1: error
+ * 0: success
+@@ -1377,6 +1500,11 @@
+ case 0:
+ case UDP_ENCAP_ESPINUDP:
+ case UDP_ENCAP_ESPINUDP_NON_IKE:
++#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL)
++ if (xfrm4_rcv_encap_func)
++ up->encap_rcv = xfrm4_udp_encap_rcv_wrapper;
++ else
++#endif
+ up->encap_rcv = xfrm4_udp_encap_rcv;
+ /* FALLTHROUGH */
+ case UDP_ENCAP_L2TPINUDP:
+@@ -1828,3 +1956,9 @@
+ EXPORT_SYMBOL(udp_proc_register);
+ EXPORT_SYMBOL(udp_proc_unregister);
+ #endif
++
++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL)
++EXPORT_SYMBOL(udp4_register_esp_rcvencap);
++EXPORT_SYMBOL(udp4_unregister_esp_rcvencap);
++#endif
++
+diff -Nur linux-2.6.30.1.orig/net/ipv4/udp.c.orig linux-2.6.30.1/net/ipv4/udp.c.orig
+--- linux-2.6.30.1.orig/net/ipv4/udp.c.orig 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30.1/net/ipv4/udp.c.orig 2009-07-03 01:52:38.000000000 +0200
+@@ -0,0 +1,1830 @@
++/*
++ * INET An implementation of the TCP/IP protocol suite for the LINUX
++ * operating system. INET is implemented using the BSD Socket
++ * interface as the means of communication with the user level.
++ *
++ * The User Datagram Protocol (UDP).
++ *
++ * Authors: Ross Biro
++ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
++ * Alan Cox, <alan@lxorguk.ukuu.org.uk>
++ * Hirokazu Takahashi, <taka@valinux.co.jp>
++ *
++ * Fixes:
++ * Alan Cox : verify_area() calls
++ * Alan Cox : stopped close while in use off icmp
++ * messages. Not a fix but a botch that
++ * for udp at least is 'valid'.
++ * Alan Cox : Fixed icmp handling properly
++ * Alan Cox : Correct error for oversized datagrams
++ * Alan Cox : Tidied select() semantics.
++ * Alan Cox : udp_err() fixed properly, also now
++ * select and read wake correctly on errors
++ * Alan Cox : udp_send verify_area moved to avoid mem leak
++ * Alan Cox : UDP can count its memory
++ * Alan Cox : send to an unknown connection causes
++ * an ECONNREFUSED off the icmp, but
++ * does NOT close.
++ * Alan Cox : Switched to new sk_buff handlers. No more backlog!
++ * Alan Cox : Using generic datagram code. Even smaller and the PEEK
++ * bug no longer crashes it.
++ * Fred Van Kempen : Net2e support for sk->broadcast.
++ * Alan Cox : Uses skb_free_datagram
++ * Alan Cox : Added get/set sockopt support.
++ * Alan Cox : Broadcasting without option set returns EACCES.
++ * Alan Cox : No wakeup calls. Instead we now use the callbacks.
++ * Alan Cox : Use ip_tos and ip_ttl
++ * Alan Cox : SNMP Mibs
++ * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
++ * Matt Dillon : UDP length checks.
++ * Alan Cox : Smarter af_inet used properly.
++ * Alan Cox : Use new kernel side addressing.
++ * Alan Cox : Incorrect return on truncated datagram receive.
++ * Arnt Gulbrandsen : New udp_send and stuff
++ * Alan Cox : Cache last socket
++ * Alan Cox : Route cache
++ * Jon Peatfield : Minor efficiency fix to sendto().
++ * Mike Shaver : RFC1122 checks.
++ * Alan Cox : Nonblocking error fix.
++ * Willy Konynenberg : Transparent proxying support.
++ * Mike McLagan : Routing by source
++ * David S. Miller : New socket lookup architecture.
++ * Last socket cache retained as it
++ * does have a high hit rate.
++ * Olaf Kirch : Don't linearise iovec on sendmsg.
++ * Andi Kleen : Some cleanups, cache destination entry
++ * for connect.
++ * Vitaly E. Lavrov : Transparent proxy revived after year coma.
++ * Melvin Smith : Check msg_name not msg_namelen in sendto(),
++ * return ENOTCONN for unconnected sockets (POSIX)
++ * Janos Farkas : don't deliver multi/broadcasts to a different
++ * bound-to-device socket
++ * Hirokazu Takahashi : HW checksumming for outgoing UDP
++ * datagrams.
++ * Hirokazu Takahashi : sendfile() on UDP works now.
++ * Arnaldo C. Melo : convert /proc/net/udp to seq_file
++ * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
++ * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
++ * a single port at the same time.
++ * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
++ * James Chapman : Add L2TP encapsulation type.
++ *
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/ioctls.h>
++#include <linux/bootmem.h>
++#include <linux/highmem.h>
++#include <linux/swap.h>
++#include <linux/types.h>
++#include <linux/fcntl.h>
++#include <linux/module.h>
++#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/igmp.h>
++#include <linux/in.h>
++#include <linux/errno.h>
++#include <linux/timer.h>
++#include <linux/mm.h>
++#include <linux/inet.h>
++#include <linux/netdevice.h>
++#include <net/tcp_states.h>
++#include <linux/skbuff.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <net/net_namespace.h>
++#include <net/icmp.h>
++#include <net/route.h>
++#include <net/checksum.h>
++#include <net/xfrm.h>
++#include "udp_impl.h"
++
++struct udp_table udp_table;
++EXPORT_SYMBOL(udp_table);
++
++int sysctl_udp_mem[3] __read_mostly;
++int sysctl_udp_rmem_min __read_mostly;
++int sysctl_udp_wmem_min __read_mostly;
++
++EXPORT_SYMBOL(sysctl_udp_mem);
++EXPORT_SYMBOL(sysctl_udp_rmem_min);
++EXPORT_SYMBOL(sysctl_udp_wmem_min);
++
++atomic_t udp_memory_allocated;
++EXPORT_SYMBOL(udp_memory_allocated);
++
++#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
++
++static int udp_lib_lport_inuse(struct net *net, __u16 num,
++ const struct udp_hslot *hslot,
++ unsigned long *bitmap,
++ struct sock *sk,
++ int (*saddr_comp)(const struct sock *sk1,
++ const struct sock *sk2))
++{
++ struct sock *sk2;
++ struct hlist_nulls_node *node;
++
++ sk_nulls_for_each(sk2, node, &hslot->head)
++ if (net_eq(sock_net(sk2), net) &&
++ sk2 != sk &&
++ (bitmap || sk2->sk_hash == num) &&
++ (!sk2->sk_reuse || !sk->sk_reuse) &&
++ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
++ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
++ (*saddr_comp)(sk, sk2)) {
++ if (bitmap)
++ __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
++ bitmap);
++ else
++ return 1;
++ }
++ return 0;
++}
++
++/**
++ * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
++ *
++ * @sk: socket struct in question
++ * @snum: port number to look up
++ * @saddr_comp: AF-dependent comparison of bound local IP addresses
++ */
++int udp_lib_get_port(struct sock *sk, unsigned short snum,
++ int (*saddr_comp)(const struct sock *sk1,
++ const struct sock *sk2 ) )
++{
++ struct udp_hslot *hslot;
++ struct udp_table *udptable = sk->sk_prot->h.udp_table;
++ int error = 1;
++ struct net *net = sock_net(sk);
++
++ if (!snum) {
++ int low, high, remaining;
++ unsigned rand;
++ unsigned short first, last;
++ DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
++
++ inet_get_local_port_range(&low, &high);
++ remaining = (high - low) + 1;
++
++ rand = net_random();
++ first = (((u64)rand * remaining) >> 32) + low;
++ /*
++ * force rand to be an odd multiple of UDP_HTABLE_SIZE
++ */
++ rand = (rand | 1) * UDP_HTABLE_SIZE;
++ for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
++ hslot = &udptable->hash[udp_hashfn(net, first)];
++ bitmap_zero(bitmap, PORTS_PER_CHAIN);
++ spin_lock_bh(&hslot->lock);
++ udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
++ saddr_comp);
++
++ snum = first;
++ /*
++ * Iterate on all possible values of snum for this hash.
++ * Using steps of an odd multiple of UDP_HTABLE_SIZE
++ * give us randomization and full range coverage.
++ */
++ do {
++ if (low <= snum && snum <= high &&
++ !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
++ goto found;
++ snum += rand;
++ } while (snum != first);
++ spin_unlock_bh(&hslot->lock);
++ }
++ goto fail;
++ } else {
++ hslot = &udptable->hash[udp_hashfn(net, snum)];
++ spin_lock_bh(&hslot->lock);
++ if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
++ goto fail_unlock;
++ }
++found:
++ inet_sk(sk)->num = snum;
++ sk->sk_hash = snum;
++ if (sk_unhashed(sk)) {
++ sk_nulls_add_node_rcu(sk, &hslot->head);
++ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
++ }
++ error = 0;
++fail_unlock:
++ spin_unlock_bh(&hslot->lock);
++fail:
++ return error;
++}
++
++static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
++{
++ struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
++
++ return ( !ipv6_only_sock(sk2) &&
++ (!inet1->rcv_saddr || !inet2->rcv_saddr ||
++ inet1->rcv_saddr == inet2->rcv_saddr ));
++}
++
++int udp_v4_get_port(struct sock *sk, unsigned short snum)
++{
++ return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
++}
++
++static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
++ unsigned short hnum,
++ __be16 sport, __be32 daddr, __be16 dport, int dif)
++{
++ int score = -1;
++
++ if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
++ !ipv6_only_sock(sk)) {
++ struct inet_sock *inet = inet_sk(sk);
++
++ score = (sk->sk_family == PF_INET ? 1 : 0);
++ if (inet->rcv_saddr) {
++ if (inet->rcv_saddr != daddr)
++ return -1;
++ score += 2;
++ }
++ if (inet->daddr) {
++ if (inet->daddr != saddr)
++ return -1;
++ score += 2;
++ }
++ if (inet->dport) {
++ if (inet->dport != sport)
++ return -1;
++ score += 2;
++ }
++ if (sk->sk_bound_dev_if) {
++ if (sk->sk_bound_dev_if != dif)
++ return -1;
++ score += 2;
++ }
++ }
++ return score;
++}
++
++/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
++ * harder than this. -DaveM
++ */
++static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
++ __be16 sport, __be32 daddr, __be16 dport,
++ int dif, struct udp_table *udptable)
++{
++ struct sock *sk, *result;
++ struct hlist_nulls_node *node;
++ unsigned short hnum = ntohs(dport);
++ unsigned int hash = udp_hashfn(net, hnum);
++ struct udp_hslot *hslot = &udptable->hash[hash];
++ int score, badness;
++
++ rcu_read_lock();
++begin:
++ result = NULL;
++ badness = -1;
++ sk_nulls_for_each_rcu(sk, node, &hslot->head) {
++ score = compute_score(sk, net, saddr, hnum, sport,
++ daddr, dport, dif);
++ if (score > badness) {
++ result = sk;
++ badness = score;
++ }
++ }
++ /*
++ * if the nulls value we got at the end of this lookup is
++ * not the expected one, we must restart lookup.
++ * We probably met an item that was moved to another chain.
++ */
++ if (get_nulls_value(node) != hash)
++ goto begin;
++
++ if (result) {
++ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
++ result = NULL;
++ else if (unlikely(compute_score(result, net, saddr, hnum, sport,
++ daddr, dport, dif) < badness)) {
++ sock_put(result);
++ goto begin;
++ }
++ }
++ rcu_read_unlock();
++ return result;
++}
++
++static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
++ __be16 sport, __be16 dport,
++ struct udp_table *udptable)
++{
++ struct sock *sk;
++ const struct iphdr *iph = ip_hdr(skb);
++
++ if (unlikely(sk = skb_steal_sock(skb)))
++ return sk;
++ else
++ return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
++ iph->daddr, dport, inet_iif(skb),
++ udptable);
++}
++
++struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
++ __be32 daddr, __be16 dport, int dif)
++{
++ return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
++}
++EXPORT_SYMBOL_GPL(udp4_lib_lookup);
++
++static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
++ __be16 loc_port, __be32 loc_addr,
++ __be16 rmt_port, __be32 rmt_addr,
++ int dif)
++{
++ struct hlist_nulls_node *node;
++ struct sock *s = sk;
++ unsigned short hnum = ntohs(loc_port);
++
++ sk_nulls_for_each_from(s, node) {
++ struct inet_sock *inet = inet_sk(s);
++
++ if (!net_eq(sock_net(s), net) ||
++ s->sk_hash != hnum ||
++ (inet->daddr && inet->daddr != rmt_addr) ||
++ (inet->dport != rmt_port && inet->dport) ||
++ (inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
++ ipv6_only_sock(s) ||
++ (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
++ continue;
++ if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
++ continue;
++ goto found;
++ }
++ s = NULL;
++found:
++ return s;
++}
++
++/*
++ * This routine is called by the ICMP module when it gets some
++ * sort of error condition. If err < 0 then the socket should
++ * be closed and the error returned to the user. If err > 0
++ * it's just the icmp type << 8 | icmp code.
++ * Header points to the ip header of the error packet. We move
++ * on past this. Then (as it used to claim before adjustment)
++ * header points to the first 8 bytes of the udp header. We need
++ * to find the appropriate port.
++ */
++
++void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
++{
++ struct inet_sock *inet;
++ struct iphdr *iph = (struct iphdr*)skb->data;
++ struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
++ const int type = icmp_hdr(skb)->type;
++ const int code = icmp_hdr(skb)->code;
++ struct sock *sk;
++ int harderr;
++ int err;
++ struct net *net = dev_net(skb->dev);
++
++ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
++ iph->saddr, uh->source, skb->dev->ifindex, udptable);
++ if (sk == NULL) {
++ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
++ return; /* No socket for error */
++ }
++
++ err = 0;
++ harderr = 0;
++ inet = inet_sk(sk);
++
++ switch (type) {
++ default:
++ case ICMP_TIME_EXCEEDED:
++ err = EHOSTUNREACH;
++ break;
++ case ICMP_SOURCE_QUENCH:
++ goto out;
++ case ICMP_PARAMETERPROB:
++ err = EPROTO;
++ harderr = 1;
++ break;
++ case ICMP_DEST_UNREACH:
++ if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
++ if (inet->pmtudisc != IP_PMTUDISC_DONT) {
++ err = EMSGSIZE;
++ harderr = 1;
++ break;
++ }
++ goto out;
++ }
++ err = EHOSTUNREACH;
++ if (code <= NR_ICMP_UNREACH) {
++ harderr = icmp_err_convert[code].fatal;
++ err = icmp_err_convert[code].errno;
++ }
++ break;
++ }
++
++ /*
++ * RFC1122: OK. Passes ICMP errors back to application, as per
++ * 4.1.3.3.
++ */
++ if (!inet->recverr) {
++ if (!harderr || sk->sk_state != TCP_ESTABLISHED)
++ goto out;
++ } else {
++ ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
++ }
++ sk->sk_err = err;
++ sk->sk_error_report(sk);
++out:
++ sock_put(sk);
++}
++
++void udp_err(struct sk_buff *skb, u32 info)
++{
++ __udp4_lib_err(skb, info, &udp_table);
++}
++
++/*
++ * Throw away all pending data and cancel the corking. Socket is locked.
++ */
++void udp_flush_pending_frames(struct sock *sk)
++{
++ struct udp_sock *up = udp_sk(sk);
++
++ if (up->pending) {
++ up->len = 0;
++ up->pending = 0;
++ ip_flush_pending_frames(sk);
++ }
++}
++EXPORT_SYMBOL(udp_flush_pending_frames);
++
++/**
++ * udp4_hwcsum_outgoing - handle outgoing HW checksumming
++ * @sk: socket we are sending on
++ * @skb: sk_buff containing the filled-in UDP header
++ * (checksum field must be zeroed out)
++ */
++static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
++ __be32 src, __be32 dst, int len )
++{
++ unsigned int offset;
++ struct udphdr *uh = udp_hdr(skb);
++ __wsum csum = 0;
++
++ if (skb_queue_len(&sk->sk_write_queue) == 1) {
++ /*
++ * Only one fragment on the socket.
++ */
++ skb->csum_start = skb_transport_header(skb) - skb->head;
++ skb->csum_offset = offsetof(struct udphdr, check);
++ uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
++ } else {
++ /*
++ * HW-checksum won't work as there are two or more
++ * fragments on the socket so that all csums of sk_buffs
++ * should be together
++ */
++ offset = skb_transport_offset(skb);
++ skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
++
++ skb->ip_summed = CHECKSUM_NONE;
++
++ skb_queue_walk(&sk->sk_write_queue, skb) {
++ csum = csum_add(csum, skb->csum);
++ }
++
++ uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
++ if (uh->check == 0)
++ uh->check = CSUM_MANGLED_0;
++ }
++}
++
++/*
++ * Push out all pending data as one UDP datagram. Socket is locked.
++ */
++static int udp_push_pending_frames(struct sock *sk)
++{
++ struct udp_sock *up = udp_sk(sk);
++ struct inet_sock *inet = inet_sk(sk);
++ struct flowi *fl = &inet->cork.fl;
++ struct sk_buff *skb;
++ struct udphdr *uh;
++ int err = 0;
++ int is_udplite = IS_UDPLITE(sk);
++ __wsum csum = 0;
++
++ /* Grab the skbuff where UDP header space exists. */
++ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
++ goto out;
++
++ /*
++ * Create a UDP header
++ */
++ uh = udp_hdr(skb);
++ uh->source = fl->fl_ip_sport;
++ uh->dest = fl->fl_ip_dport;
++ uh->len = htons(up->len);
++ uh->check = 0;
++
++ if (is_udplite) /* UDP-Lite */
++ csum = udplite_csum_outgoing(sk, skb);
++
++ else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
++
++ skb->ip_summed = CHECKSUM_NONE;
++ goto send;
++
++ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
++
++ udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
++ goto send;
++
++ } else /* `normal' UDP */
++ csum = udp_csum_outgoing(sk, skb);
++
++ /* add protocol-dependent pseudo-header */
++ uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
++ sk->sk_protocol, csum );
++ if (uh->check == 0)
++ uh->check = CSUM_MANGLED_0;
++
++send:
++ err = ip_push_pending_frames(sk);
++out:
++ up->len = 0;
++ up->pending = 0;
++ if (!err)
++ UDP_INC_STATS_USER(sock_net(sk),
++ UDP_MIB_OUTDATAGRAMS, is_udplite);
++ return err;
++}
++
++int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
++ size_t len)
++{
++ struct inet_sock *inet = inet_sk(sk);
++ struct udp_sock *up = udp_sk(sk);
++ int ulen = len;
++ struct ipcm_cookie ipc;
++ struct rtable *rt = NULL;
++ int free = 0;
++ int connected = 0;
++ __be32 daddr, faddr, saddr;
++ __be16 dport;
++ u8 tos;
++ int err, is_udplite = IS_UDPLITE(sk);
++ int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
++ int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
++
++ if (len > 0xFFFF)
++ return -EMSGSIZE;
++
++ /*
++ * Check the flags.
++ */
++
++ if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
++ return -EOPNOTSUPP;
++
++ ipc.opt = NULL;
++ ipc.shtx.flags = 0;
++
++ if (up->pending) {
++ /*
++ * There are pending frames.
++ * The socket lock must be held while it's corked.
++ */
++ lock_sock(sk);
++ if (likely(up->pending)) {
++ if (unlikely(up->pending != AF_INET)) {
++ release_sock(sk);
++ return -EINVAL;
++ }
++ goto do_append_data;
++ }
++ release_sock(sk);
++ }
++ ulen += sizeof(struct udphdr);
++
++ /*
++ * Get and verify the address.
++ */
++ if (msg->msg_name) {
++ struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
++ if (msg->msg_namelen < sizeof(*usin))
++ return -EINVAL;
++ if (usin->sin_family != AF_INET) {
++ if (usin->sin_family != AF_UNSPEC)
++ return -EAFNOSUPPORT;
++ }
++
++ daddr = usin->sin_addr.s_addr;
++ dport = usin->sin_port;
++ if (dport == 0)
++ return -EINVAL;
++ } else {
++ if (sk->sk_state != TCP_ESTABLISHED)
++ return -EDESTADDRREQ;
++ daddr = inet->daddr;
++ dport = inet->dport;
++ /* Open fast path for connected socket.
++ Route will not be used, if at least one option is set.
++ */
++ connected = 1;
++ }
++ ipc.addr = inet->saddr;
++
++ ipc.oif = sk->sk_bound_dev_if;
++ err = sock_tx_timestamp(msg, sk, &ipc.shtx);
++ if (err)
++ return err;
++ if (msg->msg_controllen) {
++ err = ip_cmsg_send(sock_net(sk), msg, &ipc);
++ if (err)
++ return err;
++ if (ipc.opt)
++ free = 1;
++ connected = 0;
++ }
++ if (!ipc.opt)
++ ipc.opt = inet->opt;
++
++ saddr = ipc.addr;
++ ipc.addr = faddr = daddr;
++
++ if (ipc.opt && ipc.opt->srr) {
++ if (!daddr)
++ return -EINVAL;
++ faddr = ipc.opt->faddr;
++ connected = 0;
++ }
++ tos = RT_TOS(inet->tos);
++ if (sock_flag(sk, SOCK_LOCALROUTE) ||
++ (msg->msg_flags & MSG_DONTROUTE) ||
++ (ipc.opt && ipc.opt->is_strictroute)) {
++ tos |= RTO_ONLINK;
++ connected = 0;
++ }
++
++ if (ipv4_is_multicast(daddr)) {
++ if (!ipc.oif)
++ ipc.oif = inet->mc_index;
++ if (!saddr)
++ saddr = inet->mc_addr;
++ connected = 0;
++ }
++
++ if (connected)
++ rt = (struct rtable*)sk_dst_check(sk, 0);
++
++ if (rt == NULL) {
++ struct flowi fl = { .oif = ipc.oif,
++ .nl_u = { .ip4_u =
++ { .daddr = faddr,
++ .saddr = saddr,
++ .tos = tos } },
++ .proto = sk->sk_protocol,
++ .flags = inet_sk_flowi_flags(sk),
++ .uli_u = { .ports =
++ { .sport = inet->sport,
++ .dport = dport } } };
++ struct net *net = sock_net(sk);
++
++ security_sk_classify_flow(sk, &fl);
++ err = ip_route_output_flow(net, &rt, &fl, sk, 1);
++ if (err) {
++ if (err == -ENETUNREACH)
++ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
++ goto out;
++ }
++
++ err = -EACCES;
++ if ((rt->rt_flags & RTCF_BROADCAST) &&
++ !sock_flag(sk, SOCK_BROADCAST))
++ goto out;
++ if (connected)
++ sk_dst_set(sk, dst_clone(&rt->u.dst));
++ }
++
++ if (msg->msg_flags&MSG_CONFIRM)
++ goto do_confirm;
++back_from_confirm:
++
++ saddr = rt->rt_src;
++ if (!ipc.addr)
++ daddr = ipc.addr = rt->rt_dst;
++
++ lock_sock(sk);
++ if (unlikely(up->pending)) {
++ /* The socket is already corked while preparing it. */
++ /* ... which is an evident application bug. --ANK */
++ release_sock(sk);
++
++ LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
++ err = -EINVAL;
++ goto out;
++ }
++ /*
++ * Now cork the socket to pend data.
++ */
++ inet->cork.fl.fl4_dst = daddr;
++ inet->cork.fl.fl_ip_dport = dport;
++ inet->cork.fl.fl4_src = saddr;
++ inet->cork.fl.fl_ip_sport = inet->sport;
++ up->pending = AF_INET;
++
++do_append_data:
++ up->len += ulen;
++ getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
++ err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
++ sizeof(struct udphdr), &ipc, &rt,
++ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
++ if (err)
++ udp_flush_pending_frames(sk);
++ else if (!corkreq)
++ err = udp_push_pending_frames(sk);
++ else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
++ up->pending = 0;
++ release_sock(sk);
++
++out:
++ ip_rt_put(rt);
++ if (free)
++ kfree(ipc.opt);
++ if (!err)
++ return len;
++ /*
++ * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
++ * ENOBUFS might not be good (it's not tunable per se), but otherwise
++ * we don't have a good statistic (IpOutDiscards but it can be too many
++ * things). We could add another new stat but at least for now that
++ * seems like overkill.
++ */
++ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
++ UDP_INC_STATS_USER(sock_net(sk),
++ UDP_MIB_SNDBUFERRORS, is_udplite);
++ }
++ return err;
++
++do_confirm:
++ dst_confirm(&rt->u.dst);
++ if (!(msg->msg_flags&MSG_PROBE) || len)
++ goto back_from_confirm;
++ err = 0;
++ goto out;
++}
++
++int udp_sendpage(struct sock *sk, struct page *page, int offset,
++ size_t size, int flags)
++{
++ struct udp_sock *up = udp_sk(sk);
++ int ret;
++
++ if (!up->pending) {
++ struct msghdr msg = { .msg_flags = flags|MSG_MORE };
++
++ /* Call udp_sendmsg to specify destination address which
++ * sendpage interface can't pass.
++ * This will succeed only when the socket is connected.
++ */
++ ret = udp_sendmsg(NULL, sk, &msg, 0);
++ if (ret < 0)
++ return ret;
++ }
++
++ lock_sock(sk);
++
++ if (unlikely(!up->pending)) {
++ release_sock(sk);
++
++ LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
++ return -EINVAL;
++ }
++
++ ret = ip_append_page(sk, page, offset, size, flags);
++ if (ret == -EOPNOTSUPP) {
++ release_sock(sk);
++ return sock_no_sendpage(sk->sk_socket, page, offset,
++ size, flags);
++ }
++ if (ret < 0) {
++ udp_flush_pending_frames(sk);
++ goto out;
++ }
++
++ up->len += size;
++ if (!(up->corkflag || (flags&MSG_MORE)))
++ ret = udp_push_pending_frames(sk);
++ if (!ret)
++ ret = size;
++out:
++ release_sock(sk);
++ return ret;
++}
++
++/*
++ * IOCTL requests applicable to the UDP protocol
++ */
++
++int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
++{
++ switch (cmd) {
++ case SIOCOUTQ:
++ {
++ int amount = atomic_read(&sk->sk_wmem_alloc);
++ return put_user(amount, (int __user *)arg);
++ }
++
++ case SIOCINQ:
++ {
++ struct sk_buff *skb;
++ unsigned long amount;
++
++ amount = 0;
++ spin_lock_bh(&sk->sk_receive_queue.lock);
++ skb = skb_peek(&sk->sk_receive_queue);
++ if (skb != NULL) {
++ /*
++ * We will only return the amount
++ * of this packet since that is all
++ * that will be read.
++ */
++ amount = skb->len - sizeof(struct udphdr);
++ }
++ spin_unlock_bh(&sk->sk_receive_queue.lock);
++ return put_user(amount, (int __user *)arg);
++ }
++
++ default:
++ return -ENOIOCTLCMD;
++ }
++
++ return 0;
++}
++
++/*
++ * This should be easy, if there is something there we
++ * return it, otherwise we block.
++ */
++
++int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
++ size_t len, int noblock, int flags, int *addr_len)
++{
++ struct inet_sock *inet = inet_sk(sk);
++ struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
++ struct sk_buff *skb;
++ unsigned int ulen, copied;
++ int peeked;
++ int err;
++ int is_udplite = IS_UDPLITE(sk);
++
++ /*
++ * Check any passed addresses
++ */
++ if (addr_len)
++ *addr_len=sizeof(*sin);
++
++ if (flags & MSG_ERRQUEUE)
++ return ip_recv_error(sk, msg, len);
++
++try_again:
++ skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
++ &peeked, &err);
++ if (!skb)
++ goto out;
++
++ ulen = skb->len - sizeof(struct udphdr);
++ copied = len;
++ if (copied > ulen)
++ copied = ulen;
++ else if (copied < ulen)
++ msg->msg_flags |= MSG_TRUNC;
++
++ /*
++ * If checksum is needed at all, try to do it while copying the
++ * data. If the data is truncated, or if we only want a partial
++ * coverage checksum (UDP-Lite), do it before the copy.
++ */
++
++ if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
++ if (udp_lib_checksum_complete(skb))
++ goto csum_copy_err;
++ }
++
++ if (skb_csum_unnecessary(skb))
++ err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
++ msg->msg_iov, copied );
++ else {
++ err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
++
++ if (err == -EINVAL)
++ goto csum_copy_err;
++ }
++
++ if (err)
++ goto out_free;
++
++ if (!peeked)
++ UDP_INC_STATS_USER(sock_net(sk),
++ UDP_MIB_INDATAGRAMS, is_udplite);
++
++ sock_recv_timestamp(msg, sk, skb);
++
++ /* Copy the address. */
++ if (sin)
++ {
++ sin->sin_family = AF_INET;
++ sin->sin_port = udp_hdr(skb)->source;
++ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
++ memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++ }
++ if (inet->cmsg_flags)
++ ip_cmsg_recv(msg, skb);
++
++ err = copied;
++ if (flags & MSG_TRUNC)
++ err = ulen;
++
++out_free:
++ lock_sock(sk);
++ skb_free_datagram(sk, skb);
++ release_sock(sk);
++out:
++ return err;
++
++csum_copy_err:
++ lock_sock(sk);
++ if (!skb_kill_datagram(sk, skb, flags))
++ UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
++ release_sock(sk);
++
++ if (noblock)
++ return -EAGAIN;
++ goto try_again;
++}
++
++
++int udp_disconnect(struct sock *sk, int flags)
++{
++ struct inet_sock *inet = inet_sk(sk);
++ /*
++ * 1003.1g - break association.
++ */
++
++ sk->sk_state = TCP_CLOSE;
++ inet->daddr = 0;
++ inet->dport = 0;
++ sk->sk_bound_dev_if = 0;
++ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
++ inet_reset_saddr(sk);
++
++ if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
++ sk->sk_prot->unhash(sk);
++ inet->sport = 0;
++ }
++ sk_dst_reset(sk);
++ return 0;
++}
++
++void udp_lib_unhash(struct sock *sk)
++{
++ if (sk_hashed(sk)) {
++ struct udp_table *udptable = sk->sk_prot->h.udp_table;
++ unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash);
++ struct udp_hslot *hslot = &udptable->hash[hash];
++
++ spin_lock_bh(&hslot->lock);
++ if (sk_nulls_del_node_init_rcu(sk)) {
++ inet_sk(sk)->num = 0;
++ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
++ }
++ spin_unlock_bh(&hslot->lock);
++ }
++}
++EXPORT_SYMBOL(udp_lib_unhash);
++
++static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
++{
++ int is_udplite = IS_UDPLITE(sk);
++ int rc;
++
++ if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
++ /* Note that an ENOMEM error is charged twice */
++ if (rc == -ENOMEM) {
++ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
++ is_udplite);
++ atomic_inc(&sk->sk_drops);
++ }
++ goto drop;
++ }
++
++ return 0;
++
++drop:
++ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
++ kfree_skb(skb);
++ return -1;
++}
++
++/* returns:
++ * -1: error
++ * 0: success
++ * >0: "udp encap" protocol resubmission
++ *
++ * Note that in the success and error cases, the skb is assumed to
++ * have either been requeued or freed.
++ */
++int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
++{
++ struct udp_sock *up = udp_sk(sk);
++ int rc;
++ int is_udplite = IS_UDPLITE(sk);
++
++ /*
++ * Charge it to the socket, dropping if the queue is full.
++ */
++ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
++ goto drop;
++ nf_reset(skb);
++
++ if (up->encap_type) {
++ /*
++ * This is an encapsulation socket so pass the skb to
++ * the socket's udp_encap_rcv() hook. Otherwise, just
++ * fall through and pass this up the UDP socket.
++ * up->encap_rcv() returns the following value:
++ * =0 if skb was successfully passed to the encap
++ * handler or was discarded by it.
++ * >0 if skb should be passed on to UDP.
++ * <0 if skb should be resubmitted as proto -N
++ */
++
++ /* if we're overly short, let UDP handle it */
++ if (skb->len > sizeof(struct udphdr) &&
++ up->encap_rcv != NULL) {
++ int ret;
++
++ ret = (*up->encap_rcv)(sk, skb);
++ if (ret <= 0) {
++ UDP_INC_STATS_BH(sock_net(sk),
++ UDP_MIB_INDATAGRAMS,
++ is_udplite);
++ return -ret;
++ }
++ }
++
++ /* FALLTHROUGH -- it's a UDP Packet */
++ }
++
++ /*
++ * UDP-Lite specific tests, ignored on UDP sockets
++ */
++ if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
++
++ /*
++ * MIB statistics other than incrementing the error count are
++ * disabled for the following two types of errors: these depend
++ * on the application settings, not on the functioning of the
++ * protocol stack as such.
++ *
++ * RFC 3828 here recommends (sec 3.3): "There should also be a
++ * way ... to ... at least let the receiving application block
++ * delivery of packets with coverage values less than a value
++ * provided by the application."
++ */
++ if (up->pcrlen == 0) { /* full coverage was set */
++ LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
++ "%d while full coverage %d requested\n",
++ UDP_SKB_CB(skb)->cscov, skb->len);
++ goto drop;
++ }
++ /* The next case involves violating the min. coverage requested
++ * by the receiver. This is subtle: if receiver wants x and x is
++ * greater than the buffersize/MTU then receiver will complain
++ * that it wants x while sender emits packets of smaller size y.
++ * Therefore the above ...()->partial_cov statement is essential.
++ */
++ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
++ LIMIT_NETDEBUG(KERN_WARNING
++ "UDPLITE: coverage %d too small, need min %d\n",
++ UDP_SKB_CB(skb)->cscov, up->pcrlen);
++ goto drop;
++ }
++ }
++
++ if (sk->sk_filter) {
++ if (udp_lib_checksum_complete(skb))
++ goto drop;
++ }
++
++ rc = 0;
++
++ bh_lock_sock(sk);
++ if (!sock_owned_by_user(sk))
++ rc = __udp_queue_rcv_skb(sk, skb);
++ else
++ sk_add_backlog(sk, skb);
++ bh_unlock_sock(sk);
++
++ return rc;
++
++drop:
++ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
++ kfree_skb(skb);
++ return -1;
++}
++
++/*
++ * Multicasts and broadcasts go to each listener.
++ *
++ * Note: called only from the BH handler context,
++ * so we don't need to lock the hashes.
++ */
++static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
++ struct udphdr *uh,
++ __be32 saddr, __be32 daddr,
++ struct udp_table *udptable)
++{
++ struct sock *sk;
++ struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))];
++ int dif;
++
++ spin_lock(&hslot->lock);
++ sk = sk_nulls_head(&hslot->head);
++ dif = skb->dev->ifindex;
++ sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
++ if (sk) {
++ struct sock *sknext = NULL;
++
++ do {
++ struct sk_buff *skb1 = skb;
++
++ sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
++ daddr, uh->source, saddr,
++ dif);
++ if (sknext)
++ skb1 = skb_clone(skb, GFP_ATOMIC);
++
++ if (skb1) {
++ int ret = udp_queue_rcv_skb(sk, skb1);
++ if (ret > 0)
++ /* we should probably re-process instead
++ * of dropping packets here. */
++ kfree_skb(skb1);
++ }
++ sk = sknext;
++ } while (sknext);
++ } else
++ consume_skb(skb);
++ spin_unlock(&hslot->lock);
++ return 0;
++}
++
++/* Initialize UDP checksum. If exited with zero value (success),
++ * CHECKSUM_UNNECESSARY means, that no more checks are required.
++ * Otherwise, csum completion requires chacksumming packet body,
++ * including udp header and folding it to skb->csum.
++ */
++static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
++ int proto)
++{
++ const struct iphdr *iph;
++ int err;
++
++ UDP_SKB_CB(skb)->partial_cov = 0;
++ UDP_SKB_CB(skb)->cscov = skb->len;
++
++ if (proto == IPPROTO_UDPLITE) {
++ err = udplite_checksum_init(skb, uh);
++ if (err)
++ return err;
++ }
++
++ iph = ip_hdr(skb);
++ if (uh->check == 0) {
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
++ if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
++ proto, skb->csum))
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ }
++ if (!skb_csum_unnecessary(skb))
++ skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
++ skb->len, proto, 0);
++ /* Probably, we should checksum udp header (it should be in cache
++ * in any case) and data in tiny packets (< rx copybreak).
++ */
++
++ return 0;
++}
++
++/*
++ * All we need to do is get the socket, and then do a checksum.
++ */
++
++int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
++ int proto)
++{
++ struct sock *sk;
++ struct udphdr *uh;
++ unsigned short ulen;
++ struct rtable *rt = (struct rtable*)skb->dst;
++ __be32 saddr, daddr;
++ struct net *net = dev_net(skb->dev);
++
++ /*
++ * Validate the packet.
++ */
++ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
++ goto drop; /* No space for header. */
++
++ uh = udp_hdr(skb);
++ ulen = ntohs(uh->len);
++ if (ulen > skb->len)
++ goto short_packet;
++
++ if (proto == IPPROTO_UDP) {
++ /* UDP validates ulen. */
++ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
++ goto short_packet;
++ uh = udp_hdr(skb);
++ }
++
++ if (udp4_csum_init(skb, uh, proto))
++ goto csum_error;
++
++ saddr = ip_hdr(skb)->saddr;
++ daddr = ip_hdr(skb)->daddr;
++
++ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
++ return __udp4_lib_mcast_deliver(net, skb, uh,
++ saddr, daddr, udptable);
++
++ sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
++
++ if (sk != NULL) {
++ int ret = udp_queue_rcv_skb(sk, skb);
++ sock_put(sk);
++
++ /* a return value > 0 means to resubmit the input, but
++ * it wants the return to be -protocol, or 0
++ */
++ if (ret > 0)
++ return -ret;
++ return 0;
++ }
++
++ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
++ goto drop;
++ nf_reset(skb);
++
++ /* No socket. Drop packet silently, if checksum is wrong */
++ if (udp_lib_checksum_complete(skb))
++ goto csum_error;
++
++ UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
++ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
++
++ /*
++ * Hmm. We got an UDP packet to a port to which we
++ * don't wanna listen. Ignore it.
++ */
++ kfree_skb(skb);
++ return 0;
++
++short_packet:
++ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n",
++ proto == IPPROTO_UDPLITE ? "-Lite" : "",
++ &saddr,
++ ntohs(uh->source),
++ ulen,
++ skb->len,
++ &daddr,
++ ntohs(uh->dest));
++ goto drop;
++
++csum_error:
++ /*
++ * RFC1122: OK. Discards the bad packet silently (as far as
++ * the network is concerned, anyway) as per 4.1.3.4 (MUST).
++ */
++ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n",
++ proto == IPPROTO_UDPLITE ? "-Lite" : "",
++ &saddr,
++ ntohs(uh->source),
++ &daddr,
++ ntohs(uh->dest),
++ ulen);
++drop:
++ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
++ kfree_skb(skb);
++ return 0;
++}
++
++int udp_rcv(struct sk_buff *skb)
++{
++ return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);
++}
++
++void udp_destroy_sock(struct sock *sk)
++{
++ lock_sock(sk);
++ udp_flush_pending_frames(sk);
++ release_sock(sk);
++}
++
++/*
++ * Socket option code for UDP
++ */
++int udp_lib_setsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int optlen,
++ int (*push_pending_frames)(struct sock *))
++{
++ struct udp_sock *up = udp_sk(sk);
++ int val;
++ int err = 0;
++ int is_udplite = IS_UDPLITE(sk);
++
++ if (optlen<sizeof(int))
++ return -EINVAL;
++
++ if (get_user(val, (int __user *)optval))
++ return -EFAULT;
++
++ switch (optname) {
++ case UDP_CORK:
++ if (val != 0) {
++ up->corkflag = 1;
++ } else {
++ up->corkflag = 0;
++ lock_sock(sk);
++ (*push_pending_frames)(sk);
++ release_sock(sk);
++ }
++ break;
++
++ case UDP_ENCAP:
++ switch (val) {
++ case 0:
++ case UDP_ENCAP_ESPINUDP:
++ case UDP_ENCAP_ESPINUDP_NON_IKE:
++ up->encap_rcv = xfrm4_udp_encap_rcv;
++ /* FALLTHROUGH */
++ case UDP_ENCAP_L2TPINUDP:
++ up->encap_type = val;
++ break;
++ default:
++ err = -ENOPROTOOPT;
++ break;
++ }
++ break;
++
++ /*
++ * UDP-Lite's partial checksum coverage (RFC 3828).
++ */
++ /* The sender sets actual checksum coverage length via this option.
++ * The case coverage > packet length is handled by send module. */
++ case UDPLITE_SEND_CSCOV:
++ if (!is_udplite) /* Disable the option on UDP sockets */
++ return -ENOPROTOOPT;
++ if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
++ val = 8;
++ else if (val > USHORT_MAX)
++ val = USHORT_MAX;
++ up->pcslen = val;
++ up->pcflag |= UDPLITE_SEND_CC;
++ break;
++
++ /* The receiver specifies a minimum checksum coverage value. To make
++ * sense, this should be set to at least 8 (as done below). If zero is
++ * used, this again means full checksum coverage. */
++ case UDPLITE_RECV_CSCOV:
++ if (!is_udplite) /* Disable the option on UDP sockets */
++ return -ENOPROTOOPT;
++ if (val != 0 && val < 8) /* Avoid silly minimal values. */
++ val = 8;
++ else if (val > USHORT_MAX)
++ val = USHORT_MAX;
++ up->pcrlen = val;
++ up->pcflag |= UDPLITE_RECV_CC;
++ break;
++
++ default:
++ err = -ENOPROTOOPT;
++ break;
++ }
++
++ return err;
++}
++
++int udp_setsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int optlen)
++{
++ if (level == SOL_UDP || level == SOL_UDPLITE)
++ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
++ udp_push_pending_frames);
++ return ip_setsockopt(sk, level, optname, optval, optlen);
++}
++
++#ifdef CONFIG_COMPAT
++int compat_udp_setsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int optlen)
++{
++ if (level == SOL_UDP || level == SOL_UDPLITE)
++ return udp_lib_setsockopt(sk, level, optname, optval, optlen,
++ udp_push_pending_frames);
++ return compat_ip_setsockopt(sk, level, optname, optval, optlen);
++}
++#endif
++
++int udp_lib_getsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int __user *optlen)
++{
++ struct udp_sock *up = udp_sk(sk);
++ int val, len;
++
++ if (get_user(len,optlen))
++ return -EFAULT;
++
++ len = min_t(unsigned int, len, sizeof(int));
++
++ if (len < 0)
++ return -EINVAL;
++
++ switch (optname) {
++ case UDP_CORK:
++ val = up->corkflag;
++ break;
++
++ case UDP_ENCAP:
++ val = up->encap_type;
++ break;
++
++ /* The following two cannot be changed on UDP sockets, the return is
++ * always 0 (which corresponds to the full checksum coverage of UDP). */
++ case UDPLITE_SEND_CSCOV:
++ val = up->pcslen;
++ break;
++
++ case UDPLITE_RECV_CSCOV:
++ val = up->pcrlen;
++ break;
++
++ default:
++ return -ENOPROTOOPT;
++ }
++
++ if (put_user(len, optlen))
++ return -EFAULT;
++ if (copy_to_user(optval, &val,len))
++ return -EFAULT;
++ return 0;
++}
++
++int udp_getsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int __user *optlen)
++{
++ if (level == SOL_UDP || level == SOL_UDPLITE)
++ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
++ return ip_getsockopt(sk, level, optname, optval, optlen);
++}
++
++#ifdef CONFIG_COMPAT
++int compat_udp_getsockopt(struct sock *sk, int level, int optname,
++ char __user *optval, int __user *optlen)
++{
++ if (level == SOL_UDP || level == SOL_UDPLITE)
++ return udp_lib_getsockopt(sk, level, optname, optval, optlen);
++ return compat_ip_getsockopt(sk, level, optname, optval, optlen);
++}
++#endif
++/**
++ * udp_poll - wait for a UDP event.
++ * @file - file struct
++ * @sock - socket
++ * @wait - poll table
++ *
++ * This is same as datagram poll, except for the special case of
++ * blocking sockets. If application is using a blocking fd
++ * and a packet with checksum error is in the queue;
++ * then it could get return from select indicating data available
++ * but then block when reading it. Add special case code
++ * to work around these arguably broken applications.
++ */
++unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
++{
++ unsigned int mask = datagram_poll(file, sock, wait);
++ struct sock *sk = sock->sk;
++ int is_lite = IS_UDPLITE(sk);
++
++ /* Check for false positives due to checksum errors */
++ if ( (mask & POLLRDNORM) &&
++ !(file->f_flags & O_NONBLOCK) &&
++ !(sk->sk_shutdown & RCV_SHUTDOWN)){
++ struct sk_buff_head *rcvq = &sk->sk_receive_queue;
++ struct sk_buff *skb;
++
++ spin_lock_bh(&rcvq->lock);
++ while ((skb = skb_peek(rcvq)) != NULL &&
++ udp_lib_checksum_complete(skb)) {
++ UDP_INC_STATS_BH(sock_net(sk),
++ UDP_MIB_INERRORS, is_lite);
++ __skb_unlink(skb, rcvq);
++ kfree_skb(skb);
++ }
++ spin_unlock_bh(&rcvq->lock);
++
++ /* nothing to see, move along */
++ if (skb == NULL)
++ mask &= ~(POLLIN | POLLRDNORM);
++ }
++
++ return mask;
++
++}
++
++struct proto udp_prot = {
++ .name = "UDP",
++ .owner = THIS_MODULE,
++ .close = udp_lib_close,
++ .connect = ip4_datagram_connect,
++ .disconnect = udp_disconnect,
++ .ioctl = udp_ioctl,
++ .destroy = udp_destroy_sock,
++ .setsockopt = udp_setsockopt,
++ .getsockopt = udp_getsockopt,
++ .sendmsg = udp_sendmsg,
++ .recvmsg = udp_recvmsg,
++ .sendpage = udp_sendpage,
++ .backlog_rcv = __udp_queue_rcv_skb,
++ .hash = udp_lib_hash,
++ .unhash = udp_lib_unhash,
++ .get_port = udp_v4_get_port,
++ .memory_allocated = &udp_memory_allocated,
++ .sysctl_mem = sysctl_udp_mem,
++ .sysctl_wmem = &sysctl_udp_wmem_min,
++ .sysctl_rmem = &sysctl_udp_rmem_min,
++ .obj_size = sizeof(struct udp_sock),
++ .slab_flags = SLAB_DESTROY_BY_RCU,
++ .h.udp_table = &udp_table,
++#ifdef CONFIG_COMPAT
++ .compat_setsockopt = compat_udp_setsockopt,
++ .compat_getsockopt = compat_udp_getsockopt,
++#endif
++};
++
++/* ------------------------------------------------------------------------ */
++#ifdef CONFIG_PROC_FS
++
++static struct sock *udp_get_first(struct seq_file *seq, int start)
++{
++ struct sock *sk;
++ struct udp_iter_state *state = seq->private;
++ struct net *net = seq_file_net(seq);
++
++ for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
++ struct hlist_nulls_node *node;
++ struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
++ spin_lock_bh(&hslot->lock);
++ sk_nulls_for_each(sk, node, &hslot->head) {
++ if (!net_eq(sock_net(sk), net))
++ continue;
++ if (sk->sk_family == state->family)
++ goto found;
++ }
++ spin_unlock_bh(&hslot->lock);
++ }
++ sk = NULL;
++found:
++ return sk;
++}
++
++static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
++{
++ struct udp_iter_state *state = seq->private;
++ struct net *net = seq_file_net(seq);
++
++ do {
++ sk = sk_nulls_next(sk);
++ } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
++
++ if (!sk) {
++ if (state->bucket < UDP_HTABLE_SIZE)
++ spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
++ return udp_get_first(seq, state->bucket + 1);
++ }
++ return sk;
++}
++
++static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
++{
++ struct sock *sk = udp_get_first(seq, 0);
++
++ if (sk)
++ while (pos && (sk = udp_get_next(seq, sk)) != NULL)
++ --pos;
++ return pos ? NULL : sk;
++}
++
++static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
++{
++ struct udp_iter_state *state = seq->private;
++ state->bucket = UDP_HTABLE_SIZE;
++
++ return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
++}
++
++static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct sock *sk;
++
++ if (v == SEQ_START_TOKEN)
++ sk = udp_get_idx(seq, 0);
++ else
++ sk = udp_get_next(seq, v);
++
++ ++*pos;
++ return sk;
++}
++
++static void udp_seq_stop(struct seq_file *seq, void *v)
++{
++ struct udp_iter_state *state = seq->private;
++
++ if (state->bucket < UDP_HTABLE_SIZE)
++ spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
++}
++
++static int udp_seq_open(struct inode *inode, struct file *file)
++{
++ struct udp_seq_afinfo *afinfo = PDE(inode)->data;
++ struct udp_iter_state *s;
++ int err;
++
++ err = seq_open_net(inode, file, &afinfo->seq_ops,
++ sizeof(struct udp_iter_state));
++ if (err < 0)
++ return err;
++
++ s = ((struct seq_file *)file->private_data)->private;
++ s->family = afinfo->family;
++ s->udp_table = afinfo->udp_table;
++ return err;
++}
++
++/* ------------------------------------------------------------------------ */
++int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
++{
++ struct proc_dir_entry *p;
++ int rc = 0;
++
++ afinfo->seq_fops.open = udp_seq_open;
++ afinfo->seq_fops.read = seq_read;
++ afinfo->seq_fops.llseek = seq_lseek;
++ afinfo->seq_fops.release = seq_release_net;
++
++ afinfo->seq_ops.start = udp_seq_start;
++ afinfo->seq_ops.next = udp_seq_next;
++ afinfo->seq_ops.stop = udp_seq_stop;
++
++ p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
++ &afinfo->seq_fops, afinfo);
++ if (!p)
++ rc = -ENOMEM;
++ return rc;
++}
++
++void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
++{
++ proc_net_remove(net, afinfo->name);
++}
++
++/* ------------------------------------------------------------------------ */
++static void udp4_format_sock(struct sock *sp, struct seq_file *f,
++ int bucket, int *len)
++{
++ struct inet_sock *inet = inet_sk(sp);
++ __be32 dest = inet->daddr;
++ __be32 src = inet->rcv_saddr;
++ __u16 destp = ntohs(inet->dport);
++ __u16 srcp = ntohs(inet->sport);
++
++ seq_printf(f, "%4d: %08X:%04X %08X:%04X"
++ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
++ bucket, src, srcp, dest, destp, sp->sk_state,
++ atomic_read(&sp->sk_wmem_alloc),
++ atomic_read(&sp->sk_rmem_alloc),
++ 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
++ atomic_read(&sp->sk_refcnt), sp,
++ atomic_read(&sp->sk_drops), len);
++}
++
++int udp4_seq_show(struct seq_file *seq, void *v)
++{
++ if (v == SEQ_START_TOKEN)
++ seq_printf(seq, "%-127s\n",
++ " sl local_address rem_address st tx_queue "
++ "rx_queue tr tm->when retrnsmt uid timeout "
++ "inode ref pointer drops");
++ else {
++ struct udp_iter_state *state = seq->private;
++ int len;
++
++ udp4_format_sock(v, seq, state->bucket, &len);
++ seq_printf(seq, "%*s\n", 127 - len ,"");
++ }
++ return 0;
++}
++
++/* ------------------------------------------------------------------------ */
++static struct udp_seq_afinfo udp4_seq_afinfo = {
++ .name = "udp",
++ .family = AF_INET,
++ .udp_table = &udp_table,
++ .seq_fops = {
++ .owner = THIS_MODULE,
++ },
++ .seq_ops = {
++ .show = udp4_seq_show,
++ },
++};
++
++static int udp4_proc_init_net(struct net *net)
++{
++ return udp_proc_register(net, &udp4_seq_afinfo);
++}
++
++static void udp4_proc_exit_net(struct net *net)
++{
++ udp_proc_unregister(net, &udp4_seq_afinfo);
++}
++
++static struct pernet_operations udp4_net_ops = {
++ .init = udp4_proc_init_net,
++ .exit = udp4_proc_exit_net,
++};
++
++int __init udp4_proc_init(void)
++{
++ return register_pernet_subsys(&udp4_net_ops);
++}
++
++void udp4_proc_exit(void)
++{
++ unregister_pernet_subsys(&udp4_net_ops);
++}
++#endif /* CONFIG_PROC_FS */
++
++void __init udp_table_init(struct udp_table *table)
++{
++ int i;
++
++ for (i = 0; i < UDP_HTABLE_SIZE; i++) {
++ INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i);
++ spin_lock_init(&table->hash[i].lock);
++ }
++}
++
++void __init udp_init(void)
++{
++ unsigned long nr_pages, limit;
++
++ udp_table_init(&udp_table);
++ /* Set the pressure threshold up by the same strategy of TCP. It is a
++ * fraction of global memory that is up to 1/2 at 256 MB, decreasing
++ * toward zero with the amount of memory, with a floor of 128 pages.
++ */
++ nr_pages = totalram_pages - totalhigh_pages;
++ limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
++ limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
++ limit = max(limit, 128UL);
++ sysctl_udp_mem[0] = limit / 4 * 3;
++ sysctl_udp_mem[1] = limit;
++ sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
++
++ sysctl_udp_rmem_min = SK_MEM_QUANTUM;
++ sysctl_udp_wmem_min = SK_MEM_QUANTUM;
++}
++
++EXPORT_SYMBOL(udp_disconnect);
++EXPORT_SYMBOL(udp_ioctl);
++EXPORT_SYMBOL(udp_prot);
++EXPORT_SYMBOL(udp_sendmsg);
++EXPORT_SYMBOL(udp_lib_getsockopt);
++EXPORT_SYMBOL(udp_lib_setsockopt);
++EXPORT_SYMBOL(udp_poll);
++EXPORT_SYMBOL(udp_lib_get_port);
++
++#ifdef CONFIG_PROC_FS
++EXPORT_SYMBOL(udp_proc_register);
++EXPORT_SYMBOL(udp_proc_unregister);
++#endif