diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d286bde..34d56d7 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3162,7 +3162,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2faef33..9c12362 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -309,12 +309,6 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } -static inline struct inet6_request_sock * - inet6_rsk(const struct request_sock *rsk) -{ - return NULL; -} - static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4ad0706..a230dd0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -72,6 +72,53 @@ struct tcp_sack_block { u32 end_seq; }; +struct tcp_out_options { + u16 options; /* bit field of OPTION_* */ + u8 ws; /* window scale, 0 to disable */ + u8 num_sack_blocks;/* number of SACK blocks to include */ + u8 hash_size; /* bytes in hash_location */ + u16 mss; /* 0 to disable */ + __u8 *hash_location; /* temporary pointer, overloaded */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ + struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ +#ifdef CONFIG_MPTCP + u16 mptcp_options; /* bit field of MPTCP related OPTION_* */ + u8 dss_csum:1, + add_addr_v4:1, + add_addr_v6:1; /* dss-checksum required? */ + + __u32 data_seq; /* data sequence number, for MPTCP */ + __u32 data_ack; /* data ack, for MPTCP */ + + union { + struct { + __u64 sender_key; /* sender's key for mptcp */ + __u64 receiver_key; /* receiver's key for mptcp */ + } mp_capable; + + struct { + __u64 sender_truncated_mac; + __u32 sender_nonce; + /* random number of the sender */ + __u32 token; /* token for mptcp */ + } mp_join_syns; + }; + + struct { + struct in_addr addr; + u8 addr_id; + } add_addr4; + + struct { + struct in6_addr addr; + u8 addr_id; + } add_addr6; + + u16 remove_addrs; /* list of address id */ + u8 addr_id; /* address id (mp_join or add_address) */ +#endif /* CONFIG_MPTCP */ +}; + /*These are used to set the sack_ok field in struct tcp_options_received */ #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ #define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ @@ -95,6 +142,9 @@ struct tcp_options_received { u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; +struct mptcp_cb; +struct mptcp_tcp_sock; + static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; @@ -123,6 +173,7 @@ struct tcp_request_sock { * FastOpen it's the seq# * after data-in-SYN. */ + u8 saw_mpc:1; }; static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) @@ -130,6 +181,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) return (struct tcp_request_sock *)req; } +struct tcp_md5sig_key; + struct tcp_sock { /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; @@ -323,6 +376,45 @@ struct tcp_sock { * socket. Used to retransmit SYNACKs etc. */ struct request_sock *fastopen_rsk; + + + struct mptcp_cb *mpcb; + struct sock *meta_sk; + /* We keep these flags even if CONFIG_MPTCP is not checked, because + * it allows checking MPTCP capability just by checking the mpc flag, + * rather than adding ifdefs everywhere. + */ + u16 mpc:1, /* Other end is multipath capable */ + inside_tk_table:1, /* Is the tcp_sock inside the token-table? */ + send_mp_fclose:1, + request_mptcp:1, /* Did we send out an MP_CAPABLE? + * (this speeds up mptcp_doit() in tcp_recvmsg) + */ + mptcp_enabled:1, /* Is MPTCP enabled from the application ? */ + pf:1, /* Potentially Failed state: when this flag is set, we + * stop using the subflow + */ + mp_killed:1, /* Killed with a tcp_done in mptcp? */ + was_meta_sk:1, /* This was a meta sk (in case of reuse) */ + close_it:1, /* Must close socket in mptcp_data_ready? */ + closing:1; + struct mptcp_tcp_sock *mptcp; +#ifdef CONFIG_MPTCP + struct hlist_nulls_node tk_table; + u32 mptcp_loc_token; + u64 mptcp_loc_key; +#endif /* CONFIG_MPTCP */ + + /* Functions that depend on the value of the mpc flag */ + u32 (*__select_window)(struct sock *sk); + u16 (*select_window)(struct sock *sk); + void (*select_initial_window)(int __space, __u32 mss, __u32 *rcv_wnd, + __u32 *window_clamp, int wscale_ok, + __u8 *rcv_wscale, __u32 init_rcv_wnd, + const struct sock *sk); + void (*init_buffer_space)(struct sock *sk); + void (*set_rto)(struct sock *sk); + bool (*should_expand_sndbuf)(const struct sock *sk); }; enum tsq_flags { @@ -334,6 +426,8 @@ enum tsq_flags { TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call * tcp_v{4|6}_mtu_reduced() */ + MPTCP_PATH_MANAGER, /* MPTCP deferred creation of new subflows */ + MPTCP_SUB_DEFERRED, /* A subflow got deferred - process them */ }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -352,6 +446,7 @@ struct tcp_timewait_sock { #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *tw_md5_key; #endif + struct mptcp_tw *mptcp_tw; }; static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index f981ba7..0144c65 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -27,6 +27,8 @@ int inet6_csk_bind_conflict(const struct sock *sk, struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, const struct request_sock *req); +u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, + const u32 rnd, const u32 synq_hsize); struct request_sock *inet6_csk_search_req(const struct sock *sk, struct request_sock ***prevp, diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fe7994c..780f229 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -1,6 +1,8 @@ #ifndef _INET_COMMON_H #define _INET_COMMON_H +#include <net/sock.h> + extern const struct proto_ops inet_stream_ops; extern const struct proto_ops inet_dgram_ops; @@ -13,6 +15,8 @@ struct sock; struct sockaddr; struct socket; +int inet_create(struct net *net, struct socket *sock, int protocol, int kern); +int inet6_create(struct net *net, struct socket *sock, int protocol, int kern); int inet_release(struct socket *sock); int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c55aeed..84d1927 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -243,6 +243,9 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); +u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd, + const u32 synq_hsize); + struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __be16 rport, diff --git a/include/net/mptcp.h b/include/net/mptcp.h new file mode 100644 index 0000000..6454535 --- /dev/null +++ b/include/net/mptcp.h @@ -0,0 +1,1471 @@ +/* + * MPTCP implementation + * + * Initial Design & Implementation: + * Sébastien Barré <sebastien.barre@uclouvain.be> + * + * Current Maintainer & Author: + * Christoph Paasch <christoph.paasch@uclouvain.be> + * + * Additional authors: + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> + * Gregory Detal <gregory.detal@uclouvain.be> + * Fabien Duchêne <fabien.duchene@uclouvain.be> + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> + * Lavkesh Lahngir <lavkesh51@gmail.com> + * Andreas Ripke <ripke@neclab.eu> + * Vlad Dogaru <vlad.dogaru@intel.com> + * Octavian Purdila <octavian.purdila@intel.com> + * John Ronan <jronan@tssg.org> + * Catalin Nicutar <catalin.nicutar@gmail.com> + * Brandon Heller <brandonh@stanford.edu> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MPTCP_H +#define _MPTCP_H + +#include <linux/inetdevice.h> +#include <linux/ipv6.h> +#include <linux/list.h> +#include <linux/net.h> +#include <linux/netpoll.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <linux/kernel.h> + +#include <asm/byteorder.h> +#include <asm/unaligned.h> +#include <crypto/hash.h> +#include <net/tcp.h> + +#if defined(__LITTLE_ENDIAN_BITFIELD) + #define ntohll(x) be64_to_cpu(x) + #define htonll(x) cpu_to_be64(x) +#elif defined(__BIG_ENDIAN_BITFIELD) + #define ntohll(x) (x) + #define htonll(x) (x) +#endif + +/* Max number of local or remote addresses we can store. + * When changing, see the bitfield below in mptcp_loc4/6. */ +#define MPTCP_MAX_ADDR 8 + +#define MPTCP_SUBFLOW_RETRY_DELAY 1000 + +struct mptcp_loc4 { + u8 loc4_id; + u8 low_prio:1; + struct in_addr addr; +}; + +struct mptcp_rem4 { + u8 rem4_id; + u8 bitfield; + u8 retry_bitfield; + __be16 port; + struct in_addr addr; +}; + +struct mptcp_loc6 { + u8 loc6_id; + u8 low_prio:1; + struct in6_addr addr; +}; + +struct mptcp_rem6 { + u8 rem6_id; + u8 bitfield; + u8 retry_bitfield; + __be16 port; + struct in6_addr addr; +}; + +struct mptcp_request_sock { + struct tcp_request_sock req; + struct mptcp_cb *mpcb; + /* Collision list in the tuple hashtable. We need to find + * the req sock when receiving the third msg of the 3-way handshake, + * since that one does not contain the token. If this makes + * the request sock too long, we can use kmalloc'ed specific entries for + * that tuple hashtable. At the moment, though, I extend the + * request_sock. + */ + struct list_head collide_tuple; + struct hlist_nulls_node collide_tk; + u32 mptcp_rem_nonce; + u32 mptcp_loc_token; + u64 mptcp_loc_key; + u64 mptcp_rem_key; + u64 mptcp_hash_tmac; + u32 mptcp_loc_nonce; + u8 loc_id; + u8 rem_id; /* Address-id in the MP_JOIN */ + u8 dss_csum:1, + low_prio:1; +}; + +struct mptcp_options_received { + u16 saw_mpc:1, + dss_csum:1, + drop_me:1, + + is_mp_join:1, + join_ack:1, + + saw_low_prio:2, /* 0x1 - low-prio set for this subflow + * 0x2 - low-prio set for another subflow + */ + low_prio:1, + + saw_add_addr:2, /* Saw at least one add_addr option: + * 0x1: IPv4 - 0x2: IPv6 + */ + more_add_addr:1, /* Saw one more add-addr. */ + + saw_rem_addr:1, /* Saw at least one rem_addr option */ + more_rem_addr:1, /* Saw one more rem-addr. */ + + mp_fail:1, + mp_fclose:1; + u8 rem_id; /* Address-id in the MP_JOIN */ + u8 prio_addr_id; /* Address-id in the MP_PRIO */ + + const unsigned char *add_addr_ptr; /* Pointer to add-address option */ + const unsigned char *rem_addr_ptr; /* Pointer to rem-address option */ + + u32 data_ack; + u32 data_seq; + u16 data_len; + + u32 mptcp_rem_token;/* Remote token */ + + /* Key inside the option (from mp_capable or fast_close) */ + u64 mptcp_key; + + u32 mptcp_recv_nonce; + u64 mptcp_recv_tmac; + u8 mptcp_recv_mac[20]; +}; + +struct mptcp_tcp_sock { + struct tcp_sock *next; /* Next subflow socket */ + struct list_head cb_list; + struct mptcp_options_received rx_opt; + + /* Those three fields record the current mapping */ + u64 map_data_seq; + u32 map_subseq; + u16 map_data_len; + u16 slave_sk:1, + fully_established:1, + establish_increased:1, + second_packet:1, + attached:1, + send_mp_fail:1, + include_mpc:1, + mapping_present:1, + map_data_fin:1, + low_prio:1, /* use this socket as backup */ + rcv_low_prio:1, /* Peer sent low-prio option to us */ + send_mp_prio:1, /* Trigger to send mp_prio on this socket */ + pre_established:1; /* State between sending 3rd ACK and + * receiving the fourth ack of new subflows. + */ + + /* isn: needed to translate abs to relative subflow seqnums */ + u32 snt_isn; + u32 rcv_isn; + u32 last_data_seq; + u8 path_index; + u8 loc_id; + u8 rem_id; + + u32 last_rbuf_opti; /* Timestamp of last rbuf optimization */ + unsigned int sent_pkts; + + struct sk_buff *shortcut_ofoqueue; /* Shortcut to the current modified + * skb in the ofo-queue. + */ + + int init_rcv_wnd; + u32 infinite_cutoff_seq; + struct delayed_work work; + u32 mptcp_loc_nonce; + struct tcp_sock *tp; /* Where is my daddy? */ + u32 last_end_data_seq; + + /* MP_JOIN subflow: timer for retransmitting the 3rd ack */ + struct timer_list mptcp_ack_timer; + + /* HMAC of the third ack */ + char sender_mac[20]; +}; + +struct mptcp_tw { + struct list_head list; + u64 loc_key; + u64 rcv_nxt; + struct mptcp_cb __rcu *mpcb; + u8 meta_tw:1, + in_list:1; +}; + +#define MPTCP_PM_NAME_MAX 16 +struct mptcp_pm_ops { + struct list_head list; + + /* Signal the creation of a new MPTCP-session. */ + void (*new_session)(struct sock *meta_sk, int index); + void (*release_sock)(struct sock *meta_sk); + void (*fully_established)(struct sock *meta_sk); + void (*new_remote_address)(struct sock *meta_sk); + int (*get_local_index)(sa_family_t family, union inet_addr *addr, + struct net *net); + int (*get_local_id)(sa_family_t family, union inet_addr *addr, + struct net *net); + void (*addr_signal)(struct sock *sk, unsigned *size, + struct tcp_out_options *opts, struct sk_buff *skb); + + char name[MPTCP_PM_NAME_MAX]; + struct module *owner; +}; + +struct mptcp_cb { + struct sock *meta_sk; + + /* list of sockets in this multipath connection */ + struct tcp_sock *connection_list; + /* list of sockets that need a call to release_cb */ + struct list_head callback_list; + + spinlock_t tw_lock; + struct list_head tw_list; + unsigned char mptw_state; + + atomic_t mpcb_refcnt; + + /* High-order bits of 64-bit sequence numbers */ + u32 snd_high_order[2]; + u32 rcv_high_order[2]; + + u16 send_infinite_mapping:1, + in_time_wait:1, + list_rcvd:1, /* XXX TO REMOVE */ + dss_csum:1, + server_side:1, + infinite_mapping_rcv:1, + infinite_mapping_snd:1, + dfin_combined:1, /* Was the DFIN combined with subflow-fin? */ + passive_close:1, + snd_hiseq_index:1, /* Index in snd_high_order of snd_nxt */ + rcv_hiseq_index:1; /* Index in rcv_high_order of rcv_nxt */ + + /* socket count in this connection */ + u8 cnt_subflows; + u8 cnt_established; + + u32 noneligible; /* Path mask of temporarily non + * eligible subflows by the scheduler + */ + + struct sk_buff_head reinject_queue; + + u8 dfin_path_index; + +#define MPTCP_PM_SIZE 320 + u8 mptcp_pm[MPTCP_PM_SIZE] __aligned(8); + struct mptcp_pm_ops *pm_ops; + + /* Mutex needed, because otherwise mptcp_close will complain that the + * socket is owned by the user. + * E.g., mptcp_sub_close_wq is taking the meta-lock. + */ + struct mutex mpcb_mutex; + + /* Master socket, also part of the connection_list, this + * socket is the one that the application sees. + */ + struct sock *master_sk; + + u64 csum_cutoff_seq; + + __u64 mptcp_loc_key; + __u32 mptcp_loc_token; + __u64 mptcp_rem_key; + __u32 mptcp_rem_token; + + /* Create a new subflow - necessary because the meta-sk may be IPv4, but + * the new subflow can be IPv6 + */ + struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); + + /* Remote addresses */ + struct mptcp_rem4 remaddr4[MPTCP_MAX_ADDR]; + u8 rem4_bits; + + struct mptcp_rem6 remaddr6[MPTCP_MAX_ADDR]; + u8 rem6_bits; + + u32 path_index_bits; + /* Next pi to pick up in case a new path becomes available */ + u8 next_path_index; + + /* Original snd/rcvbuf of the initial subflow. + * Used for the new subflows on the server-side to allow correct + * autotuning + */ + int orig_sk_rcvbuf; + int orig_sk_sndbuf; + u32 orig_window_clamp; +}; + +#define MPTCP_SUB_CAPABLE 0 +#define MPTCP_SUB_LEN_CAPABLE_SYN 12 +#define MPTCP_SUB_LEN_CAPABLE_SYN_ALIGN 12 +#define MPTCP_SUB_LEN_CAPABLE_ACK 20 +#define MPTCP_SUB_LEN_CAPABLE_ACK_ALIGN 20 + +#define MPTCP_SUB_JOIN 1 +#define MPTCP_SUB_LEN_JOIN_SYN 12 +#define MPTCP_SUB_LEN_JOIN_SYN_ALIGN 12 +#define MPTCP_SUB_LEN_JOIN_SYNACK 16 +#define MPTCP_SUB_LEN_JOIN_SYNACK_ALIGN 16 +#define MPTCP_SUB_LEN_JOIN_ACK 24 +#define MPTCP_SUB_LEN_JOIN_ACK_ALIGN 24 + +#define MPTCP_SUB_DSS 2 +#define MPTCP_SUB_LEN_DSS 4 +#define MPTCP_SUB_LEN_DSS_ALIGN 4 + +/* Lengths for seq and ack are the ones without the generic MPTCP-option header, + * as they are part of the DSS-option. + * To get the total length, just add the different options together. + */ +#define MPTCP_SUB_LEN_SEQ 10 +#define MPTCP_SUB_LEN_SEQ_CSUM 12 +#define MPTCP_SUB_LEN_SEQ_ALIGN 12 + +#define MPTCP_SUB_LEN_SEQ_64 14 +#define MPTCP_SUB_LEN_SEQ_CSUM_64 16 +#define MPTCP_SUB_LEN_SEQ_64_ALIGN 16 + +#define MPTCP_SUB_LEN_ACK 4 +#define MPTCP_SUB_LEN_ACK_ALIGN 4 + +#define MPTCP_SUB_LEN_ACK_64 8 +#define MPTCP_SUB_LEN_ACK_64_ALIGN 8 + +/* This is the "default" option-length we will send out most often. + * MPTCP DSS-header + * 32-bit data sequence number + * 32-bit data ack + * + * It is necessary to calculate the effective MSS we will be using when + * sending data. + */ +#define MPTCP_SUB_LEN_DSM_ALIGN (MPTCP_SUB_LEN_DSS_ALIGN + \ + MPTCP_SUB_LEN_SEQ_ALIGN + \ + MPTCP_SUB_LEN_ACK_ALIGN) + +#define MPTCP_SUB_ADD_ADDR 3 +#define MPTCP_SUB_LEN_ADD_ADDR4 8 +#define MPTCP_SUB_LEN_ADD_ADDR6 20 +#define MPTCP_SUB_LEN_ADD_ADDR4_ALIGN 8 +#define MPTCP_SUB_LEN_ADD_ADDR6_ALIGN 20 + +#define MPTCP_SUB_REMOVE_ADDR 4 +#define MPTCP_SUB_LEN_REMOVE_ADDR 4 + +#define MPTCP_SUB_PRIO 5 +#define MPTCP_SUB_LEN_PRIO 3 +#define MPTCP_SUB_LEN_PRIO_ADDR 4 +#define MPTCP_SUB_LEN_PRIO_ALIGN 4 + +#define MPTCP_SUB_FAIL 6 +#define MPTCP_SUB_LEN_FAIL 12 +#define MPTCP_SUB_LEN_FAIL_ALIGN 12 + +#define MPTCP_SUB_FCLOSE 7 +#define MPTCP_SUB_LEN_FCLOSE 12 +#define MPTCP_SUB_LEN_FCLOSE_ALIGN 12 + + +#define OPTION_MPTCP (1 << 5) + +static inline void reset_mpc(struct tcp_sock *tp) +{ + tp->mpc = 0; + + tp->__select_window = __tcp_select_window; + tp->select_window = tcp_select_window; + tp->select_initial_window = tcp_select_initial_window; + tp->init_buffer_space = tcp_init_buffer_space; + tp->set_rto = tcp_set_rto; + tp->should_expand_sndbuf = tcp_should_expand_sndbuf; +} + +/* Initializes MPTCP flags in tcp_sock (and other tcp_sock members that depend + * on those flags). + */ +static inline void mptcp_init_tcp_sock(struct tcp_sock *tp) +{ + reset_mpc(tp); +} + +#ifdef CONFIG_MPTCP + +/* Used for checking if the mptcp initialization has been successful */ +extern bool mptcp_init_failed; + +/* MPTCP options */ +#define OPTION_TYPE_SYN (1 << 0) +#define OPTION_TYPE_SYNACK (1 << 1) +#define OPTION_TYPE_ACK (1 << 2) +#define OPTION_MP_CAPABLE (1 << 3) +#define OPTION_DATA_ACK (1 << 4) +#define OPTION_ADD_ADDR (1 << 5) +#define OPTION_MP_JOIN (1 << 6) +#define OPTION_MP_FAIL (1 << 7) +#define OPTION_MP_FCLOSE (1 << 8) +#define OPTION_REMOVE_ADDR (1 << 9) +#define OPTION_MP_PRIO (1 << 10) + +/* MPTCP flags */ +#define MPTCPHDR_ACK 0x01 +#define MPTCPHDR_SEQ 0x02 +#define MPTCPHDR_FIN 0x04 +#define MPTCPHDR_INF 0x08 +#define MPTCPHDR_SEQ64_SET 0x10 /* Did we received a 64-bit seq number */ +#define MPTCPHDR_SEQ64_OFO 0x20 /* Is it not in our circular array? */ +#define MPTCPHDR_SEQ64_INDEX 0x40 /* Index of seq in mpcb->snd_high_order */ +#define MPTCPHDR_DSS_CSUM 0x80 + +/* It is impossible, that all 8 bits of mptcp_flags are set to 1 with the above + * Thus, defining MPTCPHDR_JOIN as 0xFF is safe. + */ +#define MPTCPHDR_JOIN 0xFF + +struct mptcp_option { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ver:4, + sub:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + ver:4; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif +}; + +struct mp_capable { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ver:4, + sub:4; + __u8 h:1, + rsv:5, + b:1, + a:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + ver:4; + __u8 a:1, + b:1, + rsv:5, + h:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u64 sender_key; + __u64 receiver_key; +} __attribute__((__packed__)); + +struct mp_join { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 b:1, + rsv:3, + sub:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + rsv:3, + b:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u8 addr_id; + union { + struct { + u32 token; + u32 nonce; + } syn; + struct { + __u64 mac; + u32 nonce; + } synack; + struct { + __u8 mac[20]; + } ack; + } u; +} __attribute__((__packed__)); + +struct mp_dss { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rsv1:4, + sub:4, + A:1, + a:1, + M:1, + m:1, + F:1, + rsv2:3; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 sub:4, + rsv1:4, + rsv2:3, + F:1, + m:1, + M:1, + a:1, + A:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif +}; + +struct mp_add_addr { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 ipver:4, + sub:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + ipver:4; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u8 addr_id; + union { + struct { + struct in_addr addr; + __be16 port; + } v4; + struct { + struct in6_addr addr; + __be16 port; + } v6; + } u; +} __attribute__((__packed__)); + +struct mp_remove_addr { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 rsv:4, + sub:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + rsv:4; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + /* list of addr_id */ + __u8 addrs_id; +}; + +struct mp_fail { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rsv1:4, + sub:4, + rsv2:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 sub:4, + rsv1:4, + rsv2:8; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __be64 data_seq; +} __attribute__((__packed__)); + +struct mp_fclose { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rsv1:4, + sub:4, + rsv2:8; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 sub:4, + rsv1:4, + rsv2:8; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u64 key; +} __attribute__((__packed__)); + +struct mp_prio { + __u8 kind; + __u8 len; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 b:1, + rsv:3, + sub:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 sub:4, + rsv:3, + b:1; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u8 addr_id; +} __attribute__((__packed__)); + +static inline int mptcp_sub_len_dss(struct mp_dss *m, int csum) +{ + return 4 + m->A * (4 + m->a * 4) + m->M * (10 + m->m * 4 + csum * 2); +} + +#define MPTCP_APP 2 + +extern int sysctl_mptcp_enabled; +extern int sysctl_mptcp_checksum; +extern int sysctl_mptcp_debug; +extern int sysctl_mptcp_syn_retries; + +extern struct workqueue_struct *mptcp_wq; + +#define mptcp_debug(fmt, args...) \ + do { \ + if (unlikely(sysctl_mptcp_debug)) \ + pr_err(__FILE__ ": " fmt, ##args); \ + } while (0) + +/* Iterates over all subflows */ +#define mptcp_for_each_tp(mpcb, tp) \ + for ((tp) = (mpcb)->connection_list; (tp); (tp) = (tp)->mptcp->next) + +#define mptcp_for_each_sk(mpcb, sk) \ + for ((sk) = (struct sock *)(mpcb)->connection_list; \ + sk; \ + sk = (struct sock *)tcp_sk(sk)->mptcp->next) + +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp) \ + for (__sk = (struct sock *)(__mpcb)->connection_list, \ + __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL; \ + __sk; \ + __sk = __temp, \ + __temp = __sk ? (struct sock *)tcp_sk(__sk)->mptcp->next : NULL) + +/* Iterates over all bit set to 1 in a bitset */ +#define mptcp_for_each_bit_set(b, i) \ + for (i = ffs(b) - 1; i >= 0; i = ffs(b >> (i + 1) << (i + 1)) - 1) + +#define mptcp_for_each_bit_unset(b, i) \ + mptcp_for_each_bit_set(~b, i) + +extern struct lock_class_key meta_key; +extern struct lock_class_key meta_slock_key; +extern u32 mptcp_secret[MD5_MESSAGE_BYTES / 4]; + +/* This is needed to ensure that two subsequent key-generation result in + * different keys if the IPs and ports are the same. + */ +extern u32 mptcp_key_seed; + +#define MPTCP_HASH_SIZE 1024 + +extern struct hlist_nulls_head tk_hashtable[MPTCP_HASH_SIZE]; + +/* This second hashtable is needed to retrieve request socks + * created as a result of a join request. While the SYN contains + * the token, the final ack does not, so we need a separate hashtable + * to retrieve the mpcb. + */ +extern struct list_head mptcp_reqsk_htb[MPTCP_HASH_SIZE]; +extern spinlock_t mptcp_reqsk_hlock; /* hashtable protection */ + +/* Lock, protecting the two hash-tables that hold the token. Namely, + * mptcp_reqsk_tk_htb and tk_hashtable + */ +extern spinlock_t mptcp_tk_hashlock; /* hashtable protection */ + +void mptcp_data_ready(struct sock *sk, int bytes); +void mptcp_write_space(struct sock *sk); + +void mptcp_add_meta_ofo_queue(struct sock *meta_sk, struct sk_buff *skb, + struct sock *sk); +void mptcp_ofo_queue(struct sock *meta_sk); +void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp); +void mptcp_cleanup_rbuf(struct sock *meta_sk, int copied); +int mptcp_alloc_mpcb(struct sock *master_sk, __u64 remote_key, u32 window); +int mptcp_add_sock(struct sock *meta_sk, struct sock *sk, u8 loc_id, u8 rem_id, + gfp_t flags); +void mptcp_del_sock(struct sock *sk); +void mptcp_update_metasocket(struct sock *sock, struct sock *meta_sk); +void mptcp_reinject_data(struct sock *orig_sk, int clone_it); +void mptcp_update_sndbuf(struct mptcp_cb *mpcb); +struct sk_buff *mptcp_next_segment(struct sock *sk, int *reinject); +void mptcp_send_fin(struct sock *meta_sk); +void mptcp_send_active_reset(struct sock *meta_sk, gfp_t priority); +int mptcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp); +void mptcp_parse_options(const uint8_t *ptr, int opsize, + struct tcp_options_received *opt_rx, + struct mptcp_options_received *mopt, + const struct sk_buff *skb); +void mptcp_syn_options(struct sock *sk, struct tcp_out_options *opts, + unsigned *remaining); +void mptcp_synack_options(struct request_sock *req, + struct tcp_out_options *opts, + unsigned *remaining); +void mptcp_established_options(struct sock *sk, struct sk_buff *skb, + struct tcp_out_options *opts, unsigned *size); +void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp, + struct tcp_out_options *opts, + struct sk_buff *skb); +void mptcp_close(struct sock *meta_sk, long timeout); +int mptcp_doit(struct sock *sk); +int mptcp_create_master_sk(struct sock *meta_sk, __u64 remote_key, u32 window); +int mptcp_check_req_master(struct sock *sk, struct sock *child, + struct request_sock *req, + struct request_sock **prev, + struct mptcp_options_received *mopt); +struct sock *mptcp_check_req_child(struct sock *sk, struct sock *child, + struct request_sock *req, + struct request_sock **prev, + struct mptcp_options_received *mopt); +u32 __mptcp_select_window(struct sock *sk); +void mptcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, + __u32 *window_clamp, int wscale_ok, + __u8 *rcv_wscale, __u32 init_rcv_wnd, + const struct sock *sk); +unsigned int mptcp_current_mss(struct sock *meta_sk); +int mptcp_select_size(const struct sock *meta_sk, bool sg); +void mptcp_key_sha1(u64 key, u32 *token, u64 *idsn); +void mptcp_hmac_sha1(u8 *key_1, u8 *key_2, u8 *rand_1, u8 *rand_2, + u32 *hash_out); +void mptcp_clean_rtx_infinite(struct sk_buff *skb, struct sock *sk); +void mptcp_fin(struct sock *meta_sk); +void mptcp_retransmit_timer(struct sock *meta_sk); +int mptcp_write_wakeup(struct sock *meta_sk); +void mptcp_sub_close_wq(struct work_struct *work); +void mptcp_sub_close(struct sock *sk, unsigned long delay); +struct sock *mptcp_select_ack_sock(const struct sock *meta_sk, int copied); +void mptcp_fallback_meta_sk(struct sock *meta_sk); +int mptcp_backlog_rcv(struct sock *meta_sk, struct sk_buff *skb); +struct sock *mptcp_sk_clone(const struct sock *sk, int family, const gfp_t priority); +void mptcp_ack_handler(unsigned long); +int mptcp_check_rtt(const struct tcp_sock *tp, int time); +int mptcp_check_snd_buf(const struct tcp_sock *tp); +int mptcp_handle_options(struct sock *sk, const struct tcphdr *th, struct sk_buff *skb); +void __init mptcp_init(void); +int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len); +int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, + unsigned int mss_now, int reinject); +int mptso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, + unsigned int mss_now, gfp_t gfp, int reinject); +void mptcp_destroy_sock(struct sock *sk); +int mptcp_rcv_synsent_state_process(struct sock *sk, struct sock **skptr, + struct sk_buff *skb, + struct mptcp_options_received *mopt); +unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, u32 mss_now, + int large_allowed); +int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw); +void mptcp_twsk_destructor(struct tcp_timewait_sock *tw); +void mptcp_update_tw_socks(const struct tcp_sock *tp, int state); +void mptcp_disconnect(struct sock *sk); +bool mptcp_should_expand_sndbuf(const struct sock *sk); +int mptcp_retransmit_skb(struct sock *meta_sk, struct sk_buff *skb); +void mptcp_tsq_flags(struct sock *sk); +void mptcp_tsq_sub_deferred(struct sock *meta_sk); +struct mp_join *mptcp_find_join(struct sk_buff *skb); +void mptcp_hash_remove_bh(struct tcp_sock *meta_tp); +void mptcp_hash_remove(struct tcp_sock *meta_tp); +struct sock *mptcp_hash_find(struct net *net, u32 token); +int mptcp_lookup_join(struct sk_buff *skb, struct inet_timewait_sock *tw); +int mptcp_do_join_short(struct sk_buff *skb, struct mptcp_options_received *mopt, + struct tcp_options_received *tmp_opt, struct net *net); +void mptcp_reqsk_destructor(struct request_sock *req); +void mptcp_reqsk_new_mptcp(struct request_sock *req, + const struct tcp_options_received *rx_opt, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb); +int mptcp_check_req(struct sk_buff *skb, struct net *net); +void mptcp_connect_init(struct sock *sk); +void mptcp_sub_force_close(struct sock *sk); +int mptcp_sub_len_remove_addr_align(u16 bitfield); +void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb, + const struct sk_buff *skb); +void mptcp_init_buffer_space(struct sock *sk); + +/* MPTCP-path-manager registration/initialization functions */ +int mptcp_register_path_manager(struct mptcp_pm_ops *pm); +void mptcp_unregister_path_manager(struct mptcp_pm_ops *pm); +void mptcp_init_path_manager(struct mptcp_cb *mpcb); +void mptcp_cleanup_path_manager(struct mptcp_cb *mpcb); +void mptcp_fallback_default(struct mptcp_cb *mpcb); +void mptcp_get_default_path_manager(char *name); +int mptcp_set_default_path_manager(const char *name); +extern struct mptcp_pm_ops mptcp_pm_default; + +static inline +struct mptcp_request_sock *mptcp_rsk(const struct request_sock *req) +{ + return (struct mptcp_request_sock *)req; +} + +static inline +struct request_sock *rev_mptcp_rsk(const struct mptcp_request_sock *req) +{ + return (struct request_sock *)req; +} + +static inline bool mptcp_can_sendpage(struct sock *sk) +{ + struct sock *sk_it; + + if (tcp_sk(sk)->mpcb->dss_csum) + return false; + + mptcp_for_each_sk(tcp_sk(sk)->mpcb, sk_it) { + if (!(sk_it->sk_route_caps & NETIF_F_SG) || + !(sk_it->sk_route_caps & NETIF_F_ALL_CSUM)) + return false; + } + + return true; +} + +static inline void mptcp_push_pending_frames(struct sock *meta_sk) +{ + if (mptcp_next_segment(meta_sk, NULL)) { + struct tcp_sock *tp = tcp_sk(meta_sk); + + /* We don't care about the MSS, because it will be set in + * mptcp_write_xmit. + */ + __tcp_push_pending_frames(meta_sk, 0, tp->nonagle); + } +} + +static inline void mptcp_send_reset(struct sock *sk) +{ + tcp_send_active_reset(sk, GFP_ATOMIC); + mptcp_sub_force_close(sk); +} + +static inline int mptcp_is_data_seq(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ; +} + +static inline int mptcp_is_data_fin(const struct sk_buff *skb) +{ + return mptcp_is_data_seq(skb) && + (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_FIN); +} + +/* Is it a data-fin while in infinite mapping mode? + * In infinite mode, a subflow-fin is in fact a data-fin. + */ +static inline int mptcp_is_data_fin2(const struct sk_buff *skb, + const struct tcp_sock *tp) +{ + return mptcp_is_data_fin(skb) || + (tp->mpcb->infinite_mapping_rcv && tcp_hdr(skb)->fin); +} + +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp, + struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->mptcp_flags = MPTCPHDR_SEQ; +} + +static inline u8 mptcp_get_64_bit(u64 data_seq, struct mptcp_cb *mpcb) +{ + u64 data_seq_high = (u32)(data_seq >> 32); + + if (mpcb->rcv_high_order[0] == data_seq_high) + return 0; + else if (mpcb->rcv_high_order[1] == data_seq_high) + return MPTCPHDR_SEQ64_INDEX; + else + return MPTCPHDR_SEQ64_OFO; +} + +/* Sets the data_seq and returns pointer to the in-skb field of the data_seq. + * If the packet has a 64-bit dseq, the pointer points to the last 32 bits. + */ +static inline __u32 *mptcp_skb_set_data_seq(const struct sk_buff *skb, + u32 *data_seq, + struct mptcp_cb *mpcb) +{ + __u32 *ptr = (__u32 *)(skb_transport_header(skb) + TCP_SKB_CB(skb)->dss_off); + + if (TCP_SKB_CB(skb)->mptcp_flags & MPTCPHDR_SEQ64_SET) { + u64 data_seq64 = get_unaligned_be64(ptr); + + if (mpcb) + TCP_SKB_CB(skb)->mptcp_flags |= mptcp_get_64_bit(data_seq64, mpcb); + + *data_seq = (u32)data_seq64 ; + ptr++; + } else { + *data_seq = get_unaligned_be32(ptr); + } + + return ptr; +} + +static inline struct sock *mptcp_meta_sk(const struct sock *sk) +{ + return tcp_sk(sk)->meta_sk; +} + +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp) +{ + return tcp_sk(tp->meta_sk); +} + +static inline int is_meta_tp(const struct tcp_sock *tp) +{ + return tp->mpcb && mptcp_meta_tp(tp) == tp; +} + +static inline int is_meta_sk(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP && + tcp_sk(sk)->mpc && mptcp_meta_sk(sk) == sk; +} + +static inline int is_master_tp(const struct tcp_sock *tp) +{ + return !tp->mpc || (!tp->mptcp->slave_sk && !is_meta_tp(tp)); +} + +static inline void mptcp_hash_request_remove(struct request_sock *req) +{ + int in_softirq = 0; + + if (list_empty(&mptcp_rsk(req)->collide_tuple)) + return; + + if (in_softirq()) { + spin_lock(&mptcp_reqsk_hlock); + in_softirq = 1; + } else { + spin_lock_bh(&mptcp_reqsk_hlock); + } + + list_del(&mptcp_rsk(req)->collide_tuple); + + if (in_softirq) + spin_unlock(&mptcp_reqsk_hlock); + else + spin_unlock_bh(&mptcp_reqsk_hlock); +} + +static inline void mptcp_init_mp_opt(struct mptcp_options_received *mopt) +{ + mopt->saw_mpc = 0; + mopt->dss_csum = 0; + mopt->drop_me = 0; + + mopt->is_mp_join = 0; + mopt->join_ack = 0; + + mopt->saw_low_prio = 0; + mopt->low_prio = 0; + + mopt->saw_add_addr = 0; + mopt->more_add_addr = 0; + + mopt->saw_rem_addr = 0; + mopt->more_rem_addr = 0; + + mopt->mp_fail = 0; + mopt->mp_fclose = 0; +} + +static inline void mptcp_reset_mopt(struct tcp_sock *tp) +{ + struct mptcp_options_received *mopt = &tp->mptcp->rx_opt; + + mopt->saw_low_prio = 0; + mopt->saw_add_addr = 0; + mopt->more_add_addr = 0; + mopt->saw_rem_addr = 0; + mopt->more_rem_addr = 0; + mopt->join_ack = 0; + mopt->mp_fail = 0; + mopt->mp_fclose = 0; +} + +static inline __be32 mptcp_get_highorder_sndbits(const struct sk_buff *skb, + const struct mptcp_cb *mpcb) +{ + return htonl(mpcb->snd_high_order[(TCP_SKB_CB(skb)->mptcp_flags & + MPTCPHDR_SEQ64_INDEX) ? 1 : 0]); +} + +static inline u64 mptcp_get_data_seq_64(const struct mptcp_cb *mpcb, int index, + u32 data_seq_32) +{ + return ((u64)mpcb->rcv_high_order[index] << 32) | data_seq_32; +} + +static inline u64 mptcp_get_rcv_nxt_64(const struct tcp_sock *meta_tp) +{ + struct mptcp_cb *mpcb = meta_tp->mpcb; + return mptcp_get_data_seq_64(mpcb, mpcb->rcv_hiseq_index, + meta_tp->rcv_nxt); +} + +static inline void mptcp_check_sndseq_wrap(struct tcp_sock *meta_tp, int inc) +{ + if (unlikely(meta_tp->snd_nxt > meta_tp->snd_nxt + inc)) { + struct mptcp_cb *mpcb = meta_tp->mpcb; + mpcb->snd_hiseq_index = mpcb->snd_hiseq_index ? 0 : 1; + mpcb->snd_high_order[mpcb->snd_hiseq_index] += 2; + } +} + +static inline void mptcp_check_rcvseq_wrap(struct tcp_sock *meta_tp, + u32 old_rcv_nxt) +{ + if (unlikely(old_rcv_nxt > meta_tp->rcv_nxt)) { + struct mptcp_cb *mpcb = meta_tp->mpcb; + mpcb->rcv_high_order[mpcb->rcv_hiseq_index] += 2; + mpcb->rcv_hiseq_index = mpcb->rcv_hiseq_index ? 0 : 1; + } +} + +static inline int mptcp_sk_can_send(const struct sock *sk) +{ + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && + !tcp_sk(sk)->mptcp->pre_established; +} + +static inline int mptcp_sk_can_recv(const struct sock *sk) +{ + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCP_FIN_WAIT1 | TCP_FIN_WAIT2); +} + +static inline int mptcp_sk_can_send_ack(const struct sock *sk) +{ + return !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV | + TCPF_CLOSE | TCPF_LISTEN)) && + !tcp_sk(sk)->mptcp->pre_established; +} + +/* Only support GSO if all subflows supports it */ +static inline bool mptcp_sk_can_gso(const struct sock *meta_sk) +{ + struct sock *sk; + + if (tcp_sk(meta_sk)->mpcb->dss_csum) + return 0; + + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) { + if (!mptcp_sk_can_send(sk)) + continue; + if (!sk_can_gso(sk)) + return false; + } + return true; +} + +static inline bool mptcp_can_sg(const struct sock *meta_sk) +{ + struct sock *sk; + + if (tcp_sk(meta_sk)->mpcb->dss_csum) + return 0; + + mptcp_for_each_sk(tcp_sk(meta_sk)->mpcb, sk) { + if (!mptcp_sk_can_send(sk)) + continue; + if (!(sk->sk_route_caps & NETIF_F_SG)) + return false; + } + return true; +} + +static inline void mptcp_set_rto(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sock *sk_it; + struct inet_connection_sock *micsk = inet_csk(mptcp_meta_sk(sk)); + __u32 max_rto = 0; + + /* We are in recovery-phase on the MPTCP-level. Do not update the + * RTO, because this would kill exponential backoff. + */ + if (micsk->icsk_retransmits) + return; + + mptcp_for_each_sk(tp->mpcb, sk_it) { + if (mptcp_sk_can_send(sk_it) && + inet_csk(sk_it)->icsk_rto > max_rto) + max_rto = inet_csk(sk_it)->icsk_rto; + } + if (max_rto) { + micsk->icsk_rto = max_rto << 1; + + /* A successfull rto-measurement - reset backoff counter */ + micsk->icsk_backoff = 0; + } +} + +static inline int mptcp_sysctl_syn_retries(void) +{ + return sysctl_mptcp_syn_retries; +} + +static inline void mptcp_sub_close_passive(struct sock *sk) +{ + struct sock *meta_sk = mptcp_meta_sk(sk); + struct tcp_sock *tp = tcp_sk(sk), *meta_tp = tcp_sk(meta_sk); + + /* Only close, if the app did a send-shutdown (passive close), and we + * received the data-ack of the data-fin. + */ + if (tp->mpcb->passive_close && meta_tp->snd_una == meta_tp->write_seq) + mptcp_sub_close(sk, 0); +} + +static inline bool mptcp_fallback_infinite(struct sock *sk, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* If data has been acknowleged on the meta-level, fully_established + * will have been set before and thus we will not fall back to infinite + * mapping. + */ + if (likely(tp->mptcp->fully_established)) + return false; + + if (!(flag & MPTCP_FLAG_DATA_ACKED)) + return false; + + /* Don't fallback twice ;) */ + if (tp->mpcb->infinite_mapping_snd) + return false; + + pr_err("%s %#x will fallback - pi %d, src %pI4 dst %pI4 from %pS\n", + __func__, tp->mpcb->mptcp_loc_token, tp->mptcp->path_index, + &inet_sk(sk)->inet_saddr, &inet_sk(sk)->inet_daddr, + __builtin_return_address(0)); + if (!is_master_tp(tp)) + return true; + + tp->mpcb->infinite_mapping_snd = 1; + tp->mpcb->infinite_mapping_rcv = 1; + tp->mptcp->fully_established = 1; + + return false; +} + +/* Find the first free index in the bitfield */ +static inline int __mptcp_find_free_index(u8 bitfield, int j, u8 base) +{ + int i; + mptcp_for_each_bit_unset(bitfield >> base, i) { + /* We wrapped at the bitfield - try from 0 on */ + if (i + base >= sizeof(bitfield) * 8) { + mptcp_for_each_bit_unset(bitfield, i) { + if (i >= sizeof(bitfield) * 8) + goto exit; + + if (i != j) + return i; + } + goto exit; + } + if (i + base >= sizeof(bitfield) * 8) + break; + + if (i + base != j) + return i + base; + } +exit: + return -1; +} + +static inline int mptcp_find_free_index(u8 bitfield) +{ + return __mptcp_find_free_index(bitfield, -1, 0); +} + +/* Find the first index whose bit in the bit-field == 0 */ +static inline u8 mptcp_set_new_pathindex(struct mptcp_cb *mpcb) +{ + u8 base = mpcb->next_path_index; + int i; + + /* Start at 1, because 0 is reserved for the meta-sk */ + mptcp_for_each_bit_unset(mpcb->path_index_bits >> base, i) { + if (i + base < 1) + continue; + if (i + base >= sizeof(mpcb->path_index_bits) * 8) + break; + i += base; + mpcb->path_index_bits |= (1 << i); + mpcb->next_path_index = i + 1; + return i; + } + mptcp_for_each_bit_unset(mpcb->path_index_bits, i) { + if (i >= sizeof(mpcb->path_index_bits) * 8) + break; + if (i < 1) + continue; + mpcb->path_index_bits |= (1 << i); + mpcb->next_path_index = i + 1; + return i; + } + + return 0; +} + +static inline int mptcp_v6_is_v4_mapped(struct sock *sk) +{ + return sk->sk_family == AF_INET6 && + ipv6_addr_type(&inet6_sk(sk)->saddr) == IPV6_ADDR_MAPPED; +} + +/* TCP and MPTCP mpc flag-depending functions */ +u16 mptcp_select_window(struct sock *sk); +void mptcp_init_buffer_space(struct sock *sk); +void mptcp_tcp_set_rto(struct sock *sk); + +static inline void set_mpc(struct tcp_sock *tp) +{ + tp->mpc = 1; + + tp->__select_window = __mptcp_select_window; + tp->select_window = mptcp_select_window; + tp->select_initial_window = mptcp_select_initial_window; + tp->init_buffer_space = mptcp_init_buffer_space; + tp->set_rto = mptcp_tcp_set_rto; + tp->should_expand_sndbuf = mptcp_should_expand_sndbuf; +} + +#else /* CONFIG_MPTCP */ +#define mptcp_debug(fmt, args...) \ + do { \ + } while (0) + +/* Without MPTCP, we just do one iteration + * over the only socket available. This assumes that + * the sk/tp arg is the socket in that case. + */ +#define mptcp_for_each_sk(mpcb, sk) +#define mptcp_for_each_sk_safe(__mpcb, __sk, __temp) + +static inline int mptcp_is_data_fin(const struct sk_buff *skb) +{ + return 0; +} +static inline int mptcp_is_data_seq(const struct sk_buff *skb) +{ + return 0; +} +static inline struct sock *mptcp_meta_sk(const struct sock *sk) +{ + return NULL; +} +static inline struct tcp_sock *mptcp_meta_tp(const struct tcp_sock *tp) +{ + return NULL; +} +static inline int is_meta_sk(const struct sock *sk) +{ + return 0; +} +static inline int is_master_tp(const struct tcp_sock *tp) +{ + return 0; +} +static inline void mptcp_purge_ofo_queue(struct tcp_sock *meta_tp) {} +static inline void mptcp_cleanup_rbuf(const struct sock *meta_sk, int copied) {} +static inline void mptcp_del_sock(const struct sock *sk) {} +static inline void mptcp_reinject_data(struct sock *orig_sk, int clone_it) {} +static inline void mptcp_update_sndbuf(const struct mptcp_cb *mpcb) {} +static inline void mptcp_skb_entail_init(const struct tcp_sock *tp, + const struct sk_buff *skb) {} +static inline void mptcp_clean_rtx_infinite(const struct sk_buff *skb, + const struct sock *sk) {} +static inline void mptcp_retransmit_timer(const struct sock *meta_sk) {} +static inline int mptcp_write_wakeup(struct sock *meta_sk) +{ + return 0; +} +static inline void mptcp_sub_close(struct sock *sk, unsigned long delay) {} +static inline void mptcp_set_rto(const struct sock *sk) {} +static inline void mptcp_send_fin(const struct sock *meta_sk) {} +static inline void mptcp_parse_options(const uint8_t *ptr, const int opsize, + const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) {} +static inline void mptcp_syn_options(struct sock *sk, + struct tcp_out_options *opts, + unsigned *remaining) {} +static inline void mptcp_synack_options(struct request_sock *req, + struct tcp_out_options *opts, + unsigned *remaining) {} + +static inline void mptcp_established_options(struct sock *sk, + struct sk_buff *skb, + struct tcp_out_options *opts, + unsigned *size) {} +static inline void mptcp_options_write(__be32 *ptr, struct tcp_sock *tp, + struct tcp_out_options *opts, + struct sk_buff *skb) {} +static inline void mptcp_close(struct sock *meta_sk, long timeout) {} +static inline int mptcp_doit(struct sock *sk) +{ + return 0; +} +static inline int mptcp_check_req_master(const struct sock *sk, + const struct sock *child, + struct request_sock *req, + struct request_sock **prev, + const struct mptcp_options_received *mopt) +{ + return 1; +} +static inline struct sock *mptcp_check_req_child(struct sock *sk, + struct sock *child, + struct request_sock *req, + struct request_sock **prev, + struct mptcp_options_received *mopt) +{ + return NULL; +} +static inline unsigned int mptcp_current_mss(struct sock *meta_sk) +{ + return 0; +} +static inline int mptcp_select_size(const struct sock *meta_sk, bool sg) +{ + return 0; +} +static inline void mptcp_sub_close_passive(struct sock *sk) {} +static inline bool mptcp_fallback_infinite(const struct sock *sk, int flag) +{ + return false; +} +static inline void mptcp_init_mp_opt(const struct mptcp_options_received *mopt) {} +static inline int mptcp_check_rtt(const struct tcp_sock *tp, int time) +{ + return 0; +} +static inline int mptcp_check_snd_buf(const struct tcp_sock *tp) +{ + return 0; +} +static inline int mptcp_sysctl_syn_retries(void) +{ + return 0; +} +static inline void mptcp_send_reset(const struct sock *sk) {} +static inline void mptcp_send_active_reset(struct sock *meta_sk, + gfp_t priority) {} +static inline int mptcp_write_xmit(struct sock *sk, unsigned int mss_now, + int nonagle, int push_one, gfp_t gfp) +{ + return 0; +} +static inline struct sock *mptcp_sk_clone(const struct sock *sk, int family, + const gfp_t priority) +{ + return NULL; +} +static inline int mptcp_handle_options(struct sock *sk, + const struct tcphdr *th, + struct sk_buff *skb) +{ + return 0; +} +static inline void mptcp_reset_mopt(struct tcp_sock *tp) {} +static inline void __init mptcp_init(void) {} +static inline int mptcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) +{ + return 0; +} +static inline int mptcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, + unsigned int mss_now, int reinject) +{ + return 0; +} +static inline int mptso_fragment(struct sock *sk, struct sk_buff *skb, + unsigned int len, unsigned int mss_now, + gfp_t gfp, int reinject) +{ + return 0; +} +static inline bool mptcp_sk_can_gso(const struct sock *sk) +{ + return false; +} +static inline bool mptcp_can_sg(const struct sock *meta_sk) +{ + return false; +} +static inline unsigned int mptcp_xmit_size_goal(struct sock *meta_sk, + u32 mss_now, int large_allowed) +{ + return 0; +} +static inline void mptcp_destroy_sock(struct sock *sk) {} +static inline int mptcp_rcv_synsent_state_process(struct sock *sk, + struct sock **skptr, + struct sk_buff *skb, + struct mptcp_options_received *mopt) +{ + return 0; +} +static inline bool mptcp_can_sendpage(struct sock *sk) +{ + return false; +} +static inline int mptcp_time_wait(struct sock *sk, struct tcp_timewait_sock *tw) +{ + return 0; +} +static inline void mptcp_twsk_destructor(struct tcp_timewait_sock *tw) {} +static inline void mptcp_update_tw_socks(const struct tcp_sock *tp, int state) {} +static inline void mptcp_disconnect(struct sock *sk) {} +static inline void mptcp_tsq_flags(struct sock *sk) {} +static inline void mptcp_tsq_sub_deferred(struct sock *meta_sk) {} +static inline void mptcp_hash_remove_bh(struct tcp_sock *meta_tp) {} +static inline void mptcp_hash_remove(struct tcp_sock *meta_tp) {} +static inline void mptcp_reqsk_new_mptcp(struct request_sock *req, + const struct tcp_options_received *rx_opt, + const struct mptcp_options_received *mopt, + const struct sk_buff *skb) {} +static inline void mptcp_remove_shortcuts(const struct mptcp_cb *mpcb, + const struct sk_buff *skb) {} +#endif /* CONFIG_MPTCP */ + +#endif /* _MPTCP_H */ diff --git a/include/net/mptcp_v4.h b/include/net/mptcp_v4.h new file mode 100644 index 0000000..047884c --- /dev/null +++ b/include/net/mptcp_v4.h @@ -0,0 +1,69 @@ +/* + * MPTCP implementation + * + * Initial Design & Implementation: + * Sébastien Barré <sebastien.barre@uclouvain.be> + * + * Current Maintainer & Author: + * Christoph Paasch <christoph.paasch@uclouvain.be> + * + * Additional authors: + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> + * Gregory Detal <gregory.detal@uclouvain.be> + * Fabien Duchêne <fabien.duchene@uclouvain.be> + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> + * Lavkesh Lahngir <lavkesh51@gmail.com> + * Andreas Ripke <ripke@neclab.eu> + * Vlad Dogaru <vlad.dogaru@intel.com> + * Octavian Purdila <octavian.purdila@intel.com> + * John Ronan <jronan@tssg.org> + * Catalin Nicutar <catalin.nicutar@gmail.com> + * Brandon Heller <brandonh@stanford.edu> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef MPTCP_V4_H_ +#define MPTCP_V4_H_ + + +#include <linux/in.h> +#include <linux/skbuff.h> +#include <net/mptcp.h> +#include <net/request_sock.h> +#include <net/sock.h> + +extern struct request_sock_ops mptcp_request_sock_ops; + +#ifdef CONFIG_MPTCP + +int mptcp_v4_do_rcv(struct sock *meta_sk, struct sk_buff *skb); +int mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id); +int mptcp_v4_add_raddress(struct mptcp_cb *mpcb, const struct in_addr *addr, + __be16 port, u8 id); +void mptcp_v4_set_init_addr_bit(struct mptcp_cb *mpcb, __be32 daddr, int index); +struct sock *mptcp_v4_search_req(const __be16 rport, const __be32 raddr, + const __be32 laddr, const struct net *net); +int mptcp_init4_subsockets(struct sock *meta_sk, const struct mptcp_loc4 *loc, + struct mptcp_rem4 *rem); +int mptcp_pm_v4_init(void); +void mptcp_pm_v4_undo(void); +u32 mptcp_v4_get_nonce(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, + u32 seq); +u64 mptcp_v4_get_key(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); + +#else + +static inline int mptcp_v4_do_rcv(const struct sock *meta_sk, + const struct sk_buff *skb) +{ + return 0; +} + +#endif /* CONFIG_MPTCP */ + +#endif /* MPTCP_V4_H_ */ diff --git a/include/net/mptcp_v6.h b/include/net/mptcp_v6.h new file mode 100644 index 0000000..c303208 --- /dev/null +++ b/include/net/mptcp_v6.h @@ -0,0 +1,72 @@ +/* + * MPTCP implementation + * + * Initial Design & Implementation: + * Sébastien Barré <sebastien.barre@uclouvain.be> + * + * Current Maintainer & Author: + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> + * + * Additional authors: + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> + * Gregory Detal <gregory.detal@uclouvain.be> + * Fabien Duchêne <fabien.duchene@uclouvain.be> + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> + * Lavkesh Lahngir <lavkesh51@gmail.com> + * Andreas Ripke <ripke@neclab.eu> + * Vlad Dogaru <vlad.dogaru@intel.com> + * Octavian Purdila <octavian.purdila@intel.com> + * John Ronan <jronan@tssg.org> + * Catalin Nicutar <catalin.nicutar@gmail.com> + * Brandon Heller <brandonh@stanford.edu> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MPTCP_V6_H +#define _MPTCP_V6_H + +#include <linux/in6.h> +#include <net/if_inet6.h> + +#include <net/mptcp.h> + +extern struct request_sock_ops mptcp6_request_sock_ops; +extern struct proto mptcpv6_prot; + +#ifdef CONFIG_MPTCP + +int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb); +int mptcp_v6_rem_raddress(struct mptcp_cb *mpcb, u8 id); +int mptcp_v6_add_raddress(struct mptcp_cb *mpcb, const struct in6_addr *addr, + __be16 port, u8 id); +void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb, + const struct in6_addr *daddr, int index); +struct sock *mptcp_v6_search_req(const __be16 rport, const struct in6_addr *raddr, + const struct in6_addr *laddr, const struct net *net); +int mptcp_init6_subsockets(struct sock *meta_sk, const struct mptcp_loc6 *loc, + struct mptcp_rem6 *rem); +int mptcp_pm_v6_init(void); +void mptcp_pm_v6_undo(void); +struct sock *mptcp_v6v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +__u32 mptcp_v6_get_nonce(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport, u32 seq); +u64 mptcp_v6_get_key(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); + +#else /* CONFIG_MPTCP */ + +static inline int mptcp_v6_do_rcv(struct sock *meta_sk, struct sk_buff *skb) +{ + return 0; +} + +#endif /* CONFIG_MPTCP */ + +#endif /* _MPTCP_V6_H */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 991dcd9..6297c97 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -15,6 +15,7 @@ #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> +#include <net/netns/mptcp.h> #include <net/netns/sctp.h> #include <net/netns/dccp.h> #include <net/netns/netfilter.h> @@ -90,6 +91,9 @@ struct net { #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; #endif +#if IS_ENABLED(CONFIG_MPTCP) + struct netns_mptcp mptcp; +#endif #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) struct netns_sctp sctp; #endif diff --git a/include/net/netns/mptcp.h b/include/net/netns/mptcp.h new file mode 100644 index 0000000..bad418b --- /dev/null +++ b/include/net/netns/mptcp.h @@ -0,0 +1,44 @@ +/* + * MPTCP implementation - MPTCP namespace + * + * Initial Design & Implementation: + * Sébastien Barré <sebastien.barre@uclouvain.be> + * + * Current Maintainer: + * Christoph Paasch <christoph.paasch@uclouvain.be> + * + * Additional authors: + * Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi> + * Gregory Detal <gregory.detal@uclouvain.be> + * Fabien Duchêne <fabien.duchene@uclouvain.be> + * Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de> + * Lavkesh Lahngir <lavkesh51@gmail.com> + * Andreas Ripke <ripke@neclab.eu> + * Vlad Dogaru <vlad.dogaru@intel.com> + * Octavian Purdila <octavian.purdila@intel.com> + * John Ronan <jronan@tssg.org> + * Catalin Nicutar <catalin.nicutar@gmail.com> + * Brandon Heller <brandonh@stanford.edu> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __NETNS_MPTCP_H__ +#define __NETNS_MPTCP_H__ + +#include <linux/compiler.h> + +enum { + MPTCP_PM_FULLMESH = 0, + MPTCP_PM_MAX +}; + +struct netns_mptcp { + void *path_managers[MPTCP_PM_MAX]; +}; + +#endif /* __NETNS_MPTCP_H__ */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 7f830ff..e79e87a 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -164,7 +164,7 @@ struct request_sock_queue { }; int reqsk_queue_alloc(struct request_sock_queue *queue, - unsigned int nr_table_entries); + unsigned int nr_table_entries, gfp_t flags); void __reqsk_queue_destroy(struct request_sock_queue *queue); void reqsk_queue_destroy(struct request_sock_queue *queue); diff --git a/include/net/sock.h b/include/net/sock.h index b9586a1..09a682e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -899,6 +899,16 @@ void sk_clear_memalloc(struct sock *sk); int sk_wait_data(struct sock *sk, long *timeo); +/* START - needed for MPTCP */ +extern void sock_def_error_report(struct sock *sk); +extern struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, + int family); +extern void sock_lock_init(struct sock *sk); + +extern struct lock_class_key af_callback_keys[AF_MAX]; +extern char *const af_family_clock_key_strings[AF_MAX+1]; +/* END - needed for MPTCP */ + struct request_sock_ops; struct timewait_sock_ops; struct inet_hashinfo; diff --git a/include/net/tcp.h b/include/net/tcp.h index 743acce..db0cc04 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -176,6 +176,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_MPTCP 30 #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -234,6 +235,27 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_ALWAYS 0x1000 +/* Flags from tcp_input.c for tcp_ack */ +#define FLAG_DATA 0x01 /* Incoming frame contained data. */ +#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ +#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ +#define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */ +#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ +#define FLAG_DATA_SACKED 0x20 /* New SACK. */ +#define FLAG_ECE 0x40 /* ECE in this ACK */ +#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ +#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ +#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ +#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ +#define MPTCP_FLAG_DATA_ACKED 0x8000 + +#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) +#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ @@ -349,6 +371,112 @@ extern struct proto tcp_prot; #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) +/**** START - Exports needed for MPTCP ****/ +extern const struct inet_connection_sock_af_ops ipv4_specific; +extern const struct inet_connection_sock_af_ops ipv6_specific; +extern const struct inet_connection_sock_af_ops ipv6_mapped; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops; +extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops; + +struct mptcp_options_received; + +int tcp_close_state(struct sock *sk); +void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, int + size_goal); +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb); +int tcp_xmit_probe_skb(struct sock *sk, int urgent); +void tcp_cwnd_validate(struct sock *sk); +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb); +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask); +unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle); +bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb); +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss, int nonagle); +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss); +unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb); +int tcp_mtu_probe(struct sock *sk); +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now); +void __pskb_trim_head(struct sk_buff *skb, int len); +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb); +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags); +void tcp_reset(struct sock *sk); +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin); +bool tcp_urg_mode(const struct tcp_sock *tp); +void tcp_ack_probe(struct sock *sk); +void tcp_rearm_rto(struct sock *sk); +int tcp_write_timeout(struct sock *sk); +bool retransmits_timed_out(struct sock *sk, unsigned int boundary, + unsigned int timeout, bool syn_set); +void tcp_write_err(struct sock *sk); +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr); +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now); + +int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req); +void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +__u32 tcp_v4_init_sequence(const struct sk_buff *skb); +int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + u16 queue_mapping); +void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb); +struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb); +struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb); +void tcp_v4_reqsk_destructor(struct request_sock *req); + +int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req); +void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); +__u32 tcp_v6_init_sequence(const struct sk_buff *skb); +int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, + struct flowi6 *fl6, struct request_sock *req, + u16 queue_mapping); +void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); +int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); +int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +void tcp_v6_destroy_sock(struct sock *sk); +void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); +void tcp_v6_hash(struct sock *sk); +struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb); +struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +void tcp_v6_reqsk_destructor(struct request_sock *req); + +void sock_valbool_flag(struct sock *sk, int bit, int valbool); +unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + int large_allowed); +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb); + +void skb_clone_fraglist(struct sk_buff *skb); +void copy_skb_header(struct sk_buff *new, const struct sk_buff *old); + +void inet_twsk_free(struct inet_timewait_sock *tw); +/* These states need RST on ABORT according to RFC793 */ +static inline bool tcp_need_reset(int state) +{ + return (1 << state) & + (TCPF_ESTABLISHED | TCPF_CLOS