diff --git a/include/page-pipe.h b/include/page-pipe.h index 393627bfe..a2dc26852 100644 --- a/include/page-pipe.h +++ b/include/page-pipe.h @@ -81,6 +81,7 @@ struct page_pipe_buf { struct page_pipe { unsigned int nr_pipes; /* how many page_pipe_bufs in there */ struct list_head bufs; /* list of bufs */ + struct list_head free_bufs; /* list of bufs */ unsigned int nr_iovs; /* number of iovs */ unsigned int free_iov; /* first free iov */ struct iovec *iovs; /* iovs. They are provided into create_page_pipe @@ -89,13 +90,18 @@ struct page_pipe { unsigned int nr_holes; /* number of holes allocated */ unsigned int free_hole; /* number of holes in use */ struct iovec *holes; /* holes */ + + bool chunk_mode; /* Restrict the maximum buffer size of pipes + and dump memory for a few iterations */ }; -extern struct page_pipe *create_page_pipe(unsigned int nr, struct iovec *); +extern struct page_pipe *create_page_pipe(unsigned int nr, + struct iovec *, bool chunk_mode); extern void destroy_page_pipe(struct page_pipe *p); extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr); extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr); extern void debug_show_page_pipe(struct page_pipe *pp); +void page_pipe_reinit(struct page_pipe *pp); #endif /* __CR_PAGE_PIPE_H__ */ diff --git a/mem.c b/mem.c index 02cb8df3e..151ec446f 100644 --- a/mem.c +++ b/mem.c @@ -220,7 +220,7 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl, goto out_free; ret = -1; - pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args)); + pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args), false); if (!pp) goto out_close; diff --git a/page-pipe.c b/page-pipe.c index bfe1989c0..3736c2890 100644 --- a/page-pipe.c +++ b/page-pipe.c @@ -7,12 +7,24 @@ #include "util.h" #include "page-pipe.h" +/* The number of pipes for one chunk */ +#define NR_PIPES_PER_CHUNK 8 + static int page_pipe_grow(struct page_pipe *pp) { struct page_pipe_buf *ppb; pr_debug("Will grow page pipe (iov off is %u)\n", pp->free_iov); + if (!list_empty(&pp->free_bufs)) { + ppb = list_first_entry(&pp->free_bufs, struct page_pipe_buf, l); + list_move_tail(&ppb->l, &pp->bufs); + goto out; + } + + if (pp->chunk_mode && pp->nr_pipes == NR_PIPES_PER_CHUNK) + return -EAGAIN; + ppb = xmalloc(sizeof(*ppb)); if (!ppb) return -1; @@ -24,17 +36,20 @@ static int page_pipe_grow(struct page_pipe *pp) } ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE; + + list_add_tail(&ppb->l, &pp->bufs); +out: ppb->pages_in = 0; ppb->nr_segs = 0; ppb->iov = &pp->iovs[pp->free_iov]; - list_add_tail(&ppb->l, &pp->bufs); pp->nr_pipes++; return 0; } -struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) +struct page_pipe *create_page_pipe(unsigned int nr_segs, + struct iovec *iovs, bool chunk_mode) { struct page_pipe *pp; @@ -44,6 +59,7 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) if (pp) { pp->nr_pipes = 0; INIT_LIST_HEAD(&pp->bufs); + INIT_LIST_HEAD(&pp->free_bufs); pp->nr_iovs = nr_segs; pp->iovs = iovs; pp->free_iov = 0; @@ -52,6 +68,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs) pp->free_hole = 0; pp->holes = NULL; + pp->chunk_mode = chunk_mode; + if (page_pipe_grow(pp)) return NULL; } @@ -65,6 +83,7 @@ void destroy_page_pipe(struct page_pipe *pp) pr_debug("Killing page pipe\n"); + list_splice(&pp->free_bufs, &pp->bufs); list_for_each_entry_safe(ppb, n, &pp->bufs, l) { close(ppb->p[0]); close(ppb->p[1]); @@ -74,6 +93,40 @@ void destroy_page_pipe(struct page_pipe *pp) xfree(pp); } +void page_pipe_reinit(struct page_pipe *pp) +{ + struct page_pipe_buf *ppb, *n; + + BUG_ON(!pp->chunk_mode); + + pr_debug("Clean up page pipe\n"); + + list_for_each_entry_safe(ppb, n, &pp->bufs, l) + list_move(&ppb->l, &pp->free_bufs); + + pp->free_hole = 0; + + if (page_pipe_grow(pp)) + BUG(); /* It can't fail, because ppb is in free_bufs */ +} + +#define PAGE_ALLOC_COSTLY_ORDER 3 /* from the kernel source code */ +struct kernel_pipe_buffer { + struct page *page; + unsigned int offset, len; + const struct pipe_buf_operations *ops; + unsigned int flags; + unsigned long private; +}; + +/* + * The kernel allocates the linear chunk of memory for pipe buffers. + * Allocation of chunks with size more than PAGE_ALLOC_COSTLY_ORDER + * fails very often, so we need to restrict the pipe capacity to not + * allocate big chunks. + */ +#define PIPE_MAX_SIZE ((1 << PAGE_ALLOC_COSTLY_ORDER) * PAGE_SIZE / \ + sizeof(struct kernel_pipe_buffer)) #define PPB_IOV_BATCH 8 static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *ppb, @@ -82,9 +135,13 @@ static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *pp struct iovec *iov; if (ppb->pages_in == ppb->pipe_size) { + unsigned long new_size = ppb->pipe_size << 1; int ret; - ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (ppb->pipe_size * PAGE_SIZE) << 1); + if (new_size > PIPE_MAX_SIZE) + return 1; + + ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE); if (ret < 0) return 1; /* need to add another buf */ diff --git a/shmem.c b/shmem.c index 1deb5918c..0c1a97fa3 100644 --- a/shmem.c +++ b/shmem.c @@ -317,7 +317,7 @@ static int dump_one_shmem(struct shmem_info_dump *si) if (!iovs) goto err_unmap; - pp = create_page_pipe((nrpages + 1) / 2, iovs); + pp = create_page_pipe((nrpages + 1) / 2, iovs, false); if (!pp) goto err_iovs;