From 505652ef863d13792c512933770684a558d2089e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 10 Nov 2015 11:43:00 +0300 Subject: [PATCH] page-server: Fine grained corking control (v3) When live migrating a container with large amount of processes inside the time to do page-server-ed dump may be up to 10 times slower than for the local dump. The delay is always introduced in the open_page_server_xfer() when criu negotiates the has_parent bit on the 2nd task. This likely happens because of the Nagel algo taking place -- after the write() of the OPEN2 command happened kernel delays this command sending waiting for more data. v2: Fix this by turning on CORK option on memory transfer sockets on send side, and NODELAY one once on urgent data. Receive side is always NODELAY-ed. According to Alexey Kuznetsov this is the best mode ever for such type of transfers. v3: Push packets in pre-dump's check_parent_server_xfer too. Signed-off-by: Pavel Emelyanov Acked-by: Andrew Vagin Conflicts: include/util.h util.c --- include/util.h | 2 ++ page-xfer.c | 23 +++++++++++++++++++++-- util.c | 15 +++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/include/util.h b/include/util.h index f2300a95f..5c8d5a865 100644 --- a/include/util.h +++ b/include/util.h @@ -261,4 +261,6 @@ void split(char *str, char token, char ***out, int *n); int fd_has_data(int lfd); +void tcp_nodelay(int sk, bool on); +void tcp_cork(int sk, bool on); #endif /* __CR_UTIL_H__ */ diff --git a/page-xfer.c b/page-xfer.c index dc7e81593..f8e999cf0 100644 --- a/page-xfer.c +++ b/page-xfer.c @@ -13,7 +13,7 @@ #include "image.h" #include "page-xfer.h" #include "page-pipe.h" - +#include "util.h" #include "protobuf.h" #include "protobuf/pagemap.pb-c.h" @@ -183,6 +183,13 @@ static int page_server_serve(int sk) int ret = -1; bool flushed = false; + /* + * This socket only accepts data except one thing -- it + * writes back the has_parent bit from time to time, so + * make it NODELAY all the time. + */ + tcp_nodelay(sk, true); + if (pipe(cxfer.p)) { pr_perror("Can't make pipe for xfer"); close(sk); @@ -386,7 +393,7 @@ int connect_to_page_server(void) if (opts.ps_socket != -1) { page_server_sk = opts.ps_socket; pr_info("Re-using ps socket %d\n", page_server_sk); - return 0; + goto out; } pr_info("Connecting to server %s:%u\n", @@ -406,6 +413,13 @@ int connect_to_page_server(void) return -1; } +out: + /* + * CORK the socket at the very beginning. As per ANK + * the corked by default socket with sporadic NODELAY-s + * on urgent data is the smartest mode ever. + */ + tcp_cork(page_server_sk, true); return 0; } @@ -524,6 +538,9 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, long id) return -1; } + /* Push the command NOW */ + tcp_nodelay(xfer->sk, true); + if (read(xfer->sk, &has_parent, 1) != 1) { pr_perror("The page server doesn't answer"); return -1; @@ -826,6 +843,8 @@ static int check_parent_server_xfer(int fd_type, long id) return -1; } + tcp_nodelay(page_server_sk, true); + if (read(page_server_sk, &has_parent, sizeof(int)) != sizeof(int)) { pr_perror("The page server doesn't answer"); return -1; diff --git a/util.c b/util.c index b916eca3c..b89448640 100644 --- a/util.c +++ b/util.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include "compiler.h" #include "asm/types.h" @@ -845,3 +848,15 @@ int fd_has_data(int lfd) return ret; } + +void tcp_cork(int sk, bool on) +{ + int val = on ? 1 : 0; + setsockopt(sk, SOL_TCP, TCP_CORK, &val, sizeof(val)); +} + +void tcp_nodelay(int sk, bool on) +{ + int val = on ? 1 : 0; + setsockopt(sk, SOL_TCP, TCP_NODELAY, &val, sizeof(val)); +}