From 1b4e9058e86b9fc78ab69d3688cddeba73343c00 Mon Sep 17 00:00:00 2001 From: Saied Kazemi Date: Mon, 16 Feb 2015 13:18:32 +0300 Subject: [PATCH] Do not call listen() when SO_REUSEADDR is off For an established TCP connection, the send queue is restored in two steps: in step (1), we retransmit the data that was sent before but not yet acknowledged, and in step (2), we transmit the data that was never sent outside before. The TCP_REPAIR option is disabled before step (2) and re-enabled after step (2) (without this patch). If the amount of data to be sent in step (2) is large, the TCP_REPAIR flag on the socket can remain off for some time (O(milliseconds)). If a listen() is called on another socket bound to the same port during this time window, it fails. This is because -- turning TCP_REPAIR off clears the SO_REUSEADDR flag on the socket. This patch adds a mutex (reuseaddr_lock) per port number, so that a listen() on a port number does not happen while SO_REUSEADDR for another socket on the same port is off. Thanks to Amey Deshpande for debugging. Signed-off-by: Saied Kazemi Signed-off-by: Pavel Emelyanov --- include/sk-inet.h | 1 + sk-inet.c | 10 ++++++++++ sk-tcp.c | 14 ++++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/sk-inet.h b/include/sk-inet.h index aa7db16d4..22553bcba 100644 --- a/include/sk-inet.h +++ b/include/sk-inet.h @@ -79,5 +79,6 @@ extern int restore_one_tcp(int sk, struct inet_sk_info *si); extern int check_tcp(void); extern int rst_tcp_socks_add(int fd, bool reuseaddr); +extern mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii); #endif /* __CR_SK_INET_H__ */ diff --git a/sk-inet.c b/sk-inet.c index adf6fc35c..539c80c4b 100644 --- a/sk-inet.c +++ b/sk-inet.c @@ -30,6 +30,7 @@ struct inet_port { int port; int type; futex_t users; + mutex_t reuseaddr_lock; struct list_head list; }; @@ -53,6 +54,7 @@ static struct inet_port *port_add(int type, int port) e->type = type; futex_init(&e->users); futex_inc(&e->users); + mutex_init(&e->reuseaddr_lock); list_add(&e->list, &inet_ports); @@ -537,10 +539,13 @@ static int open_inet_sk(struct file_desc *d) goto err; } + mutex_lock(&ii->port->reuseaddr_lock); if (listen(sk, ie->backlog) == -1) { pr_perror("Can't listen on a socket"); + mutex_unlock(&ii->port->reuseaddr_lock); goto err; } + mutex_unlock(&ii->port->reuseaddr_lock); } if (ie->state == TCP_ESTABLISHED && @@ -624,3 +629,8 @@ int inet_connect(int sk, struct inet_sk_info *ii) return 0; } + +mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii) +{ + return &ii->port->reuseaddr_lock; +} diff --git a/sk-tcp.c b/sk-tcp.c index 3f1556dbc..85eaafa80 100644 --- a/sk-tcp.c +++ b/sk-tcp.c @@ -507,7 +507,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img) return __send_tcp_queue(sk, queue, len, img); } -static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img) +static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img, mutex_t *reuse_lock) { u32 len; @@ -534,11 +534,17 @@ static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img) * they can be restored without any tricks. */ len = tse->unsq_len; + mutex_lock(reuse_lock); tcp_repair_off(sk); - if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) + if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) { + mutex_unlock(reuse_lock); return -1; - if (tcp_repair_on(sk)) + } + if (tcp_repair_on(sk)) { + mutex_unlock(reuse_lock); return -1; + } + mutex_unlock(reuse_lock); return 0; } @@ -621,7 +627,7 @@ static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii) if (restore_tcp_opts(sk, tse)) goto err_c; - if (restore_tcp_queues(sk, tse, img)) + if (restore_tcp_queues(sk, tse, img, inet_get_reuseaddr_lock(ii))) goto err_c; if (tse->has_nodelay && tse->nodelay) {