From d9298fd26b16f5dbb3800aab3fd3c2d4fa88551f Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Wed, 26 Jan 2022 23:06:41 -0500
Subject: [PATCH 1/3] mlr split

---
 internal/pkg/output/file_output_handlers.go   |  23 +
 .../pkg/transformers/aaa_transformer_table.go |   1 +
 internal/pkg/transformers/split.go            | 437 ++++++++++++++++++
 test/input/example.csv                        |  11 +
 todo.txt                                      |   2 +
 5 files changed, 474 insertions(+)
 create mode 100644 internal/pkg/transformers/split.go
 create mode 100644 test/input/example.csv

diff --git a/internal/pkg/output/file_output_handlers.go b/internal/pkg/output/file_output_handlers.go
index b5e1df510..cd7c3f896 100644
--- a/internal/pkg/output/file_output_handlers.go
+++ b/internal/pkg/output/file_output_handlers.go
@@ -56,6 +56,17 @@ type MultiOutputHandlerManager struct {
 }
 
 // ----------------------------------------------------------------
+func NewFileOutputHandlerManager(
+	recordWriterOptions *cli.TWriterOptions,
+	doAppend bool,
+) *MultiOutputHandlerManager {
+	if doAppend {
+		return NewFileAppendHandlerManager(recordWriterOptions)
+	} else {
+		return NewFileWritetHandlerManager(recordWriterOptions)
+	}
+}
+
 func NewFileWritetHandlerManager(
 	recordWriterOptions *cli.TWriterOptions,
 ) *MultiOutputHandlerManager {
@@ -228,6 +239,18 @@ func newOutputHandlerCommon(
 }
 
 // ----------------------------------------------------------------
+func NewFileOutputHandler(
+	filename string,
+	recordWriterOptions *cli.TWriterOptions,
+	doAppend bool,
+) (*FileOutputHandler, error) {
+	if doAppend {
+		return NewFileAppendOutputHandler(filename, recordWriterOptions)
+	} else {
+		return NewFileWriteOutputHandler(filename, recordWriterOptions)
+	}
+}
+
 func NewFileWriteOutputHandler(
 	filename string,
 	recordWriterOptions *cli.TWriterOptions,
diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go
index ed6c0a84d..463b745a4 100644
--- a/internal/pkg/transformers/aaa_transformer_table.go
+++ b/internal/pkg/transformers/aaa_transformer_table.go
@@ -59,6 +59,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	SkipTrivialRecordsSetup,
 	SortSetup,
 	SortWithinRecordsSetup,
+	SplitSetup,
 	Stats1Setup,
 	Stats2Setup,
 	StepSetup,
diff --git a/internal/pkg/transformers/split.go b/internal/pkg/transformers/split.go
new file mode 100644
index 000000000..287b42768
--- /dev/null
+++ b/internal/pkg/transformers/split.go
@@ -0,0 +1,437 @@
+package transformers
+
+import (
+	"bytes"
+	"container/list"
+	"fmt"
+	"net/url"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/output"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameSplit = "split"
+const splitDefaultOutputFileNamePrefix = "split"
+
+var SplitSetup = TransformerSetup{
+	Verb:         verbNameSplit,
+	UsageFunc:    transformerSplitUsage,
+	ParseCLIFunc: transformerSplitParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerSplitUsage(
+	o *os.File,
+	doExit bool,
+	exitCode int,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options] {filename}\n", "mlr", verbNameSplit)
+	fmt.Fprintf(o,
+		`Options:
+-n {n}:      Cap file sizes at N records.
+-m {m}:      Produce M files, round-robining records among them.
+-g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+Exactly one  of -m, -n, or -g must be supplied.
+--prefix {p} Specify filename prefix; default "`+splitDefaultOutputFileNamePrefix+`".
+--suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+-a           Append to existing file(s), if any, rather than overwriting.
+-v           Send records along to downstream verbs as well as splitting to files.
+-h|--help    Show this message.
+Any of the output-format command-line flags (see mlr -h). For example, using
+  mlr --icsv --from myfile.csv split --ojson -n 1000
+the input is CSV, but the output files are JSON.
+
+Examples: Suppose myfile.csv has 1,000,000 records.
+
+100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -n 10000
+
+10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -m 10
+Same, but with JSON output.
+  mlr --csv --from myfile.csv split -m 10 -o json
+
+Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+  mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+Same, but written to the /tmp/ directory.
+  mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+  mlr --csv --from myfile.csv split -g shape
+
+If the color field has values yellow and green, and the shape field has values triangle and square,
+then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+  mlr --csv --from myfile.csv split -g color,shape
+
+See also the "tee" DSL function which lets you do more ad-hoc customization.
+`)
+	if doExit {
+		os.Exit(exitCode)
+	}
+}
+
+func transformerSplitParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	mainOptions *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	var n int = 0
+	var doMod bool = false
+	var doSize bool = false
+	var groupByFieldNames []string = nil
+	var emitDownstream bool = false
+	var doAppend bool = false
+	var outputFileNamePrefix string = splitDefaultOutputFileNamePrefix
+	var outputFileNameSuffix string = "uninit"
+	haveOutputFileNameSuffix := false
+
+	var localOptions *cli.TOptions = nil
+	if mainOptions != nil {
+		copyThereof := *mainOptions // struct copy
+		localOptions = &copyThereof
+	}
+
+	// Parse local flags.
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerSplitUsage(os.Stdout, true, 0)
+
+		} else if opt == "-n" {
+			n = cli.VerbGetIntArgOrDie(verb, opt, args, &argi, argc)
+			doSize = true
+
+		} else if opt == "-m" {
+			n = cli.VerbGetIntArgOrDie(verb, opt, args, &argi, argc)
+			doMod = true
+
+		} else if opt == "-g" {
+			groupByFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+
+		} else if opt == "--prefix" {
+			outputFileNamePrefix = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
+
+		} else if opt == "--suffix" {
+			outputFileNameSuffix = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)
+			haveOutputFileNameSuffix = true
+
+		} else if opt == "-a" {
+			doAppend = true
+
+		} else if opt == "-v" {
+			emitDownstream = true
+
+		} else {
+			// This is inelegant. For error-proofing we advance argi already in our
+			// loop (so individual if-statements don't need to). However,
+			// ParseWriterOptions expects it unadvanced.
+			largi := argi - 1
+			if cli.FLAG_TABLE.Parse(args, argc, &largi, localOptions) {
+				// This lets mlr main and mlr split have different output formats.
+				// Nothing else to handle here.
+				argi = largi
+			} else {
+				transformerSplitUsage(os.Stderr, true, 1)
+			}
+		}
+	}
+
+	doGroup := groupByFieldNames != nil
+	if !doMod && !doSize && !doGroup {
+		fmt.Fprintf(os.Stderr, "mlr %s: At least one of -m, -n, or -g is required.\n", verb)
+		os.Exit(1)
+	}
+	if (doMod && doSize) || (doMod && doGroup) || (doSize && doGroup) {
+		fmt.Fprintf(os.Stderr, "mlr %s: Only one of -m, -n, or -g is required.\n", verb)
+		os.Exit(1)
+	}
+
+	cli.FinalizeWriterOptions(&localOptions.WriterOptions)
+	if !haveOutputFileNameSuffix {
+		outputFileNameSuffix = localOptions.WriterOptions.OutputFileFormat
+	}
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerSplit(
+		n,
+		doMod,
+		doSize,
+		groupByFieldNames,
+		emitDownstream,
+		doAppend,
+		outputFileNamePrefix,
+		outputFileNameSuffix,
+		&localOptions.WriterOptions,
+	)
+	if err != nil {
+		// Error message already printed out
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerSplit struct {
+	n                    int
+	outputFileNamePrefix string
+	outputFileNameSuffix string
+	emitDownstream       bool
+	ungroupedCounter     int
+	groupByFieldNames    []string
+	recordWriterOptions  *cli.TWriterOptions
+	doAppend             bool
+
+	// For doSize ungrouped: only one file open at a time
+	outputHandler    output.OutputHandler
+	previousQuotient int
+
+	// For all other cases: multiple files open at a time
+	outputHandlerManager output.OutputHandlerManager
+
+	recordTransformerFunc RecordTransformerFunc
+}
+
+func NewTransformerSplit(
+	n int,
+	doMod bool,
+	doSize bool,
+	groupByFieldNames []string,
+	emitDownstream bool,
+	doAppend bool,
+	outputFileNamePrefix string,
+	outputFileNameSuffix string,
+	recordWriterOptions *cli.TWriterOptions,
+) (*TransformerSplit, error) {
+
+	tr := &TransformerSplit{
+		n:                    n,
+		outputFileNamePrefix: outputFileNamePrefix,
+		outputFileNameSuffix: outputFileNameSuffix,
+		emitDownstream:       emitDownstream,
+		ungroupedCounter:     0,
+		groupByFieldNames:    groupByFieldNames,
+		recordWriterOptions:  recordWriterOptions,
+		doAppend:             doAppend,
+
+		outputHandler:    nil,
+		previousQuotient: -1,
+	}
+
+	tr.outputHandlerManager = output.NewFileOutputHandlerManager(recordWriterOptions, doAppend)
+
+	if groupByFieldNames != nil {
+		tr.recordTransformerFunc = tr.splitGrouped
+	} else if doMod {
+		tr.recordTransformerFunc = tr.splitModUngrouped
+	} else {
+		tr.recordTransformerFunc = tr.splitSizeUngrouped
+	}
+
+	return tr, nil
+}
+
+func (tr *TransformerSplit) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+	tr.recordTransformerFunc(inrecAndContext, outputRecordsAndContexts, inputDownstreamDoneChannel,
+		outputDownstreamDoneChannel)
+}
+
+func (tr *TransformerSplit) splitModUngrouped(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	if !inrecAndContext.EndOfStream {
+		remainder := 1 + (tr.ungroupedCounter % tr.n)
+		filename := tr.makeUngroupedOutputFileName(remainder)
+
+		err := tr.outputHandlerManager.WriteRecordAndContext(inrecAndContext, filename)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "mlr: file-write error: %v\n", err)
+			os.Exit(1)
+		}
+
+		if tr.emitDownstream {
+			outputRecordsAndContexts.PushBack(inrecAndContext)
+		}
+
+		tr.ungroupedCounter++
+
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker
+		errs := tr.outputHandlerManager.Close()
+		if len(errs) > 0 {
+			for _, err := range errs {
+				fmt.Fprintf(os.Stderr, "mlr: file-close error: %v\n", err)
+			}
+			os.Exit(1)
+		}
+	}
+}
+
+func (tr *TransformerSplit) splitSizeUngrouped(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	var err error
+	if !inrecAndContext.EndOfStream {
+		quotient := 1 + (tr.ungroupedCounter / tr.n)
+
+		if quotient != tr.previousQuotient {
+			if tr.outputHandler != nil {
+				err = tr.outputHandler.Close()
+				if err != nil {
+					fmt.Fprintf(os.Stderr, "mlr: file-close error: %v\n", err)
+					os.Exit(1)
+				}
+			}
+
+			filename := tr.makeUngroupedOutputFileName(quotient)
+			tr.outputHandler, err = output.NewFileOutputHandler(
+				filename,
+				tr.recordWriterOptions,
+				tr.doAppend,
+			)
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "mlr: file-open error: %v\n", err)
+				os.Exit(1)
+			}
+
+			tr.previousQuotient = quotient
+		}
+
+		err = tr.outputHandler.WriteRecordAndContext(inrecAndContext)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "mlr: file-write error: %v\n", err)
+			os.Exit(1)
+		}
+
+		if tr.emitDownstream {
+			outputRecordsAndContexts.PushBack(inrecAndContext)
+		}
+
+		tr.ungroupedCounter++
+
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker
+
+		if tr.outputHandler != nil {
+			err := tr.outputHandler.Close()
+			if err != nil {
+				fmt.Fprintf(os.Stderr, "mlr: file-close error: %v\n", err)
+				os.Exit(1)
+			}
+		}
+	}
+}
+
+func (tr *TransformerSplit) splitGrouped(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	if !inrecAndContext.EndOfStream {
+		var filename string
+		groupByFieldValues, ok := inrecAndContext.Record.GetSelectedValues(tr.groupByFieldNames)
+		if !ok {
+			filename = fmt.Sprintf("%s_ungrouped.%s", tr.outputFileNamePrefix, tr.outputFileNameSuffix)
+		} else {
+			filename = tr.makeGroupedOutputFileName(groupByFieldValues)
+		}
+		err := tr.outputHandlerManager.WriteRecordAndContext(inrecAndContext, filename)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "mlr: %v\n", err)
+			os.Exit(1)
+		}
+
+		if tr.emitDownstream {
+			outputRecordsAndContexts.PushBack(inrecAndContext)
+		}
+
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+
+		errs := tr.outputHandlerManager.Close()
+		if len(errs) > 0 {
+			for _, err := range errs {
+				fmt.Fprintf(os.Stderr, "mlr: file-close error: %v\n", err)
+			}
+			os.Exit(1)
+		}
+	}
+}
+
+// makeUngroupedOutputFileName example: "split_53.csv"
+func (tr *TransformerSplit) makeUngroupedOutputFileName(k int) string {
+	return fmt.Sprintf("%s_%d.%s", tr.outputFileNamePrefix, k, tr.outputFileNameSuffix)
+}
+
+// makeGroupedOutputFileName example: "split_orange.csv"
+func (tr *TransformerSplit) makeGroupedOutputFileName(
+	groupByFieldValues []*mlrval.Mlrval,
+) string {
+	var buffer bytes.Buffer
+	buffer.WriteString(tr.outputFileNamePrefix)
+	for _, groupByFieldValue := range groupByFieldValues {
+		buffer.WriteString("_")
+		buffer.WriteString(url.QueryEscape(groupByFieldValue.String()))
+	}
+	buffer.WriteString(".")
+	buffer.WriteString(tr.outputFileNameSuffix)
+	return buffer.String()
+}
+
+// makeGroupedIndexedOutputFileName example: "split_yellow_53.csv"
+func (tr *TransformerSplit) makeGroupedIndexedOutputFileName(
+	groupByFieldValues []*mlrval.Mlrval,
+	index int,
+) string {
+	// URL-escape the fields which come from data and which may have '/'
+	// etc within. Don't URL-escape the prefix since people may want to
+	// use prefixes like '/tmp/split' to write to the /tmp directory, etc.
+	var buffer bytes.Buffer
+	buffer.WriteString(tr.outputFileNamePrefix)
+	for _, groupByFieldValue := range groupByFieldValues {
+		buffer.WriteString("_")
+		buffer.WriteString(url.QueryEscape(groupByFieldValue.String()))
+	}
+	buffer.WriteString(fmt.Sprintf("_%d", index))
+	buffer.WriteString(".")
+	buffer.WriteString(tr.outputFileNameSuffix)
+	return buffer.String()
+}
diff --git a/test/input/example.csv b/test/input/example.csv
new file mode 100644
index 000000000..bf79dd5f7
--- /dev/null
+++ b/test/input/example.csv
@@ -0,0 +1,11 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,square,true,2,15,79.2778,0.0130
+red,circle,true,3,16,13.8103,2.9010
+red,square,false,4,48,77.5542,7.4670
+purple,triangle,false,5,51,81.2290,8.5910
+red,square,false,6,64,77.1991,9.5310
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+purple,square,false,10,91,72.3735,8.2430
diff --git a/todo.txt b/todo.txt
index 92fda4e5f..a6e7a7439 100644
--- a/todo.txt
+++ b/todo.txt
@@ -26,6 +26,7 @@ FEATURES
   o format/unformat
   o strmatch
   o =~
+* separate examples from FAQs
 
 ----------------------------------------------------------------
 k better print-interpolate with {} etc
@@ -42,6 +43,7 @@ mlr split ... -n, -g -- ?
 
 ----------------------------------------------------------------
 * new example entry, with ccump and pgr
+  o slwin --prune (or somesuch) to only emit averages over full windows -- ?
 * make a lag-by-n and lead-by-n
 
 ----------------------------------------------------------------

From 494671ba42536beeb6f5bfa8465939d7bd7afc8b Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Wed, 26 Jan 2022 23:07:53 -0500
Subject: [PATCH 2/3] regression-test cases

---
 test/cases/cli-help/0001/expout               | 41 ++++++++++++++++
 test/cases/verb-split/0001/cmd                |  1 +
 test/cases/verb-split/0001/experr             |  0
 test/cases/verb-split/0001/expout             |  0
 test/cases/verb-split/0001/postcmp            |  3 ++
 test/cases/verb-split/0001/split_1.csv.expect |  6 +++
 test/cases/verb-split/0001/split_2.csv.expect |  6 +++
 test/cases/verb-split/0002/cmd                |  1 +
 test/cases/verb-split/0002/experr             |  0
 test/cases/verb-split/0002/expout             |  0
 test/cases/verb-split/0002/postcmp            |  6 +++
 test/cases/verb-split/0002/split_1.csv.expect |  3 ++
 test/cases/verb-split/0002/split_2.csv.expect |  3 ++
 test/cases/verb-split/0002/split_3.csv.expect |  3 ++
 test/cases/verb-split/0002/split_4.csv.expect |  3 ++
 test/cases/verb-split/0002/split_5.csv.expect |  3 ++
 test/cases/verb-split/0003/cmd                |  1 +
 test/cases/verb-split/0003/experr             |  0
 test/cases/verb-split/0003/expout             |  0
 test/cases/verb-split/0003/postcmp            |  3 ++
 .../verb-split/0003/split_circle.csv.expect   |  4 ++
 .../verb-split/0003/split_square.csv.expect   |  5 ++
 .../verb-split/0003/split_triangle.csv.expect |  4 ++
 test/cases/verb-split/0004/cmd                |  1 +
 test/cases/verb-split/0004/experr             |  0
 test/cases/verb-split/0004/expout             |  0
 test/cases/verb-split/0004/postcmp            |  7 +++
 .../0004/split_purple_square.csv.expect       |  2 +
 .../0004/split_purple_triangle.csv.expect     |  3 ++
 .../0004/split_red_circle.csv.expect          |  2 +
 .../0004/split_red_square.csv.expect          |  4 ++
 .../0004/split_yellow_circle.csv.expect       |  3 ++
 .../0004/split_yellow_triangle.csv.expect     |  2 +
 test/cases/verb-split/0005/cmd                |  1 +
 test/cases/verb-split/0005/experr             |  0
 test/cases/verb-split/0005/expout             |  0
 test/cases/verb-split/0005/postcmp            |  3 ++
 test/cases/verb-split/0005/split_1.dat.expect |  6 +++
 test/cases/verb-split/0005/split_2.dat.expect |  6 +++
 test/cases/verb-split/0006/cmd                |  1 +
 test/cases/verb-split/0006/experr             |  0
 test/cases/verb-split/0006/expout             |  0
 test/cases/verb-split/0006/postcmp            |  3 ++
 .../cases/verb-split/0006/split_1.json.expect | 47 +++++++++++++++++++
 .../cases/verb-split/0006/split_2.json.expect | 47 +++++++++++++++++++
 test/cases/verb-split/0007/cmd                |  1 +
 test/cases/verb-split/0007/experr             |  0
 test/cases/verb-split/0007/expout             | 11 +++++
 test/cases/verb-split/0007/postcmp            |  3 ++
 test/cases/verb-split/0007/split_1.csv.expect |  6 +++
 test/cases/verb-split/0007/split_2.csv.expect |  6 +++
 51 files changed, 261 insertions(+)
 create mode 100644 test/cases/verb-split/0001/cmd
 create mode 100644 test/cases/verb-split/0001/experr
 create mode 100644 test/cases/verb-split/0001/expout
 create mode 100644 test/cases/verb-split/0001/postcmp
 create mode 100644 test/cases/verb-split/0001/split_1.csv.expect
 create mode 100644 test/cases/verb-split/0001/split_2.csv.expect
 create mode 100644 test/cases/verb-split/0002/cmd
 create mode 100644 test/cases/verb-split/0002/experr
 create mode 100644 test/cases/verb-split/0002/expout
 create mode 100644 test/cases/verb-split/0002/postcmp
 create mode 100644 test/cases/verb-split/0002/split_1.csv.expect
 create mode 100644 test/cases/verb-split/0002/split_2.csv.expect
 create mode 100644 test/cases/verb-split/0002/split_3.csv.expect
 create mode 100644 test/cases/verb-split/0002/split_4.csv.expect
 create mode 100644 test/cases/verb-split/0002/split_5.csv.expect
 create mode 100644 test/cases/verb-split/0003/cmd
 create mode 100644 test/cases/verb-split/0003/experr
 create mode 100644 test/cases/verb-split/0003/expout
 create mode 100644 test/cases/verb-split/0003/postcmp
 create mode 100644 test/cases/verb-split/0003/split_circle.csv.expect
 create mode 100644 test/cases/verb-split/0003/split_square.csv.expect
 create mode 100644 test/cases/verb-split/0003/split_triangle.csv.expect
 create mode 100644 test/cases/verb-split/0004/cmd
 create mode 100644 test/cases/verb-split/0004/experr
 create mode 100644 test/cases/verb-split/0004/expout
 create mode 100644 test/cases/verb-split/0004/postcmp
 create mode 100644 test/cases/verb-split/0004/split_purple_square.csv.expect
 create mode 100644 test/cases/verb-split/0004/split_purple_triangle.csv.expect
 create mode 100644 test/cases/verb-split/0004/split_red_circle.csv.expect
 create mode 100644 test/cases/verb-split/0004/split_red_square.csv.expect
 create mode 100644 test/cases/verb-split/0004/split_yellow_circle.csv.expect
 create mode 100644 test/cases/verb-split/0004/split_yellow_triangle.csv.expect
 create mode 100644 test/cases/verb-split/0005/cmd
 create mode 100644 test/cases/verb-split/0005/experr
 create mode 100644 test/cases/verb-split/0005/expout
 create mode 100644 test/cases/verb-split/0005/postcmp
 create mode 100644 test/cases/verb-split/0005/split_1.dat.expect
 create mode 100644 test/cases/verb-split/0005/split_2.dat.expect
 create mode 100644 test/cases/verb-split/0006/cmd
 create mode 100644 test/cases/verb-split/0006/experr
 create mode 100644 test/cases/verb-split/0006/expout
 create mode 100644 test/cases/verb-split/0006/postcmp
 create mode 100644 test/cases/verb-split/0006/split_1.json.expect
 create mode 100644 test/cases/verb-split/0006/split_2.json.expect
 create mode 100644 test/cases/verb-split/0007/cmd
 create mode 100644 test/cases/verb-split/0007/experr
 create mode 100644 test/cases/verb-split/0007/expout
 create mode 100644 test/cases/verb-split/0007/postcmp
 create mode 100644 test/cases/verb-split/0007/split_1.csv.expect
 create mode 100644 test/cases/verb-split/0007/split_2.csv.expect

diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 3da8c64e1..14fae72c6 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -929,6 +929,47 @@ Options:
 -r        Recursively sort subobjects/submaps, e.g. for JSON input.
 -h|--help Show this message.
 
+================================================================
+split
+Usage: mlr split [options] {filename}
+Options:
+-n {n}:      Cap file sizes at N records.
+-m {m}:      Produce M files, round-robining records among them.
+-g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+Exactly one  of -m, -n, or -g must be supplied.
+--prefix {p} Specify filename prefix; default "split".
+--suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+-a           Append to existing file(s), if any, rather than overwriting.
+-v           Send records along to downstream verbs as well as splitting to files.
+-h|--help    Show this message.
+Any of the output-format command-line flags (see mlr -h). For example, using
+  mlr --icsv --from myfile.csv split --ojson -n 1000
+the input is CSV, but the output files are JSON.
+
+Examples: Suppose myfile.csv has 1,000,000 records.
+
+100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -n 10000
+
+10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -m 10
+Same, but with JSON output.
+  mlr --csv --from myfile.csv split -m 10 -o json
+
+Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+  mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+Same, but written to the /tmp/ directory.
+  mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+  mlr --csv --from myfile.csv split -g shape
+
+If the color field has values yellow and green, and the shape field has values triangle and square,
+then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+  mlr --csv --from myfile.csv split -g color,shape
+
+See also the "tee" DSL function which lets you do more ad-hoc customization.
+
 ================================================================
 stats1
 Usage: mlr stats1 [options]
diff --git a/test/cases/verb-split/0001/cmd b/test/cases/verb-split/0001/cmd
new file mode 100644
index 000000000..8ef25a57d
--- /dev/null
+++ b/test/cases/verb-split/0001/cmd
@@ -0,0 +1 @@
+mlr --csv split -m 2 --prefix ${CASEDIR}/split test/input/example.csv
diff --git a/test/cases/verb-split/0001/experr b/test/cases/verb-split/0001/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0001/expout b/test/cases/verb-split/0001/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0001/postcmp b/test/cases/verb-split/0001/postcmp
new file mode 100644
index 000000000..97b9056ae
--- /dev/null
+++ b/test/cases/verb-split/0001/postcmp
@@ -0,0 +1,3 @@
+${CASEDIR}/split_1.csv.expect ${CASEDIR}/split_1.csv
+${CASEDIR}/split_2.csv.expect ${CASEDIR}/split_2.csv
+
diff --git a/test/cases/verb-split/0001/split_1.csv.expect b/test/cases/verb-split/0001/split_1.csv.expect
new file mode 100644
index 000000000..f228ed651
--- /dev/null
+++ b/test/cases/verb-split/0001/split_1.csv.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,circle,true,3,16,13.8103,2.9010
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,9,87,63.5058,8.3350
diff --git a/test/cases/verb-split/0001/split_2.csv.expect b/test/cases/verb-split/0001/split_2.csv.expect
new file mode 100644
index 000000000..cf8dd0bd4
--- /dev/null
+++ b/test/cases/verb-split/0001/split_2.csv.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+yellow,circle,true,8,73,63.9785,4.2370
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0002/cmd b/test/cases/verb-split/0002/cmd
new file mode 100644
index 000000000..81c9af7dd
--- /dev/null
+++ b/test/cases/verb-split/0002/cmd
@@ -0,0 +1 @@
+mlr --csv split -n 2 --prefix ${CASEDIR}/split test/input/example.csv
diff --git a/test/cases/verb-split/0002/experr b/test/cases/verb-split/0002/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0002/expout b/test/cases/verb-split/0002/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0002/postcmp b/test/cases/verb-split/0002/postcmp
new file mode 100644
index 000000000..b0cb3514e
--- /dev/null
+++ b/test/cases/verb-split/0002/postcmp
@@ -0,0 +1,6 @@
+${CASEDIR}/split_1.csv.expect ${CASEDIR}/split_1.csv
+${CASEDIR}/split_2.csv.expect ${CASEDIR}/split_2.csv
+${CASEDIR}/split_3.csv.expect ${CASEDIR}/split_3.csv
+${CASEDIR}/split_4.csv.expect ${CASEDIR}/split_4.csv
+${CASEDIR}/split_5.csv.expect ${CASEDIR}/split_5.csv
+
diff --git a/test/cases/verb-split/0002/split_1.csv.expect b/test/cases/verb-split/0002/split_1.csv.expect
new file mode 100644
index 000000000..6203cbca0
--- /dev/null
+++ b/test/cases/verb-split/0002/split_1.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,square,true,2,15,79.2778,0.0130
diff --git a/test/cases/verb-split/0002/split_2.csv.expect b/test/cases/verb-split/0002/split_2.csv.expect
new file mode 100644
index 000000000..9ad680950
--- /dev/null
+++ b/test/cases/verb-split/0002/split_2.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+red,circle,true,3,16,13.8103,2.9010
+red,square,false,4,48,77.5542,7.4670
diff --git a/test/cases/verb-split/0002/split_3.csv.expect b/test/cases/verb-split/0002/split_3.csv.expect
new file mode 100644
index 000000000..bc2e5ba37
--- /dev/null
+++ b/test/cases/verb-split/0002/split_3.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+purple,triangle,false,5,51,81.2290,8.5910
+red,square,false,6,64,77.1991,9.5310
diff --git a/test/cases/verb-split/0002/split_4.csv.expect b/test/cases/verb-split/0002/split_4.csv.expect
new file mode 100644
index 000000000..0be4a6258
--- /dev/null
+++ b/test/cases/verb-split/0002/split_4.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,8,73,63.9785,4.2370
diff --git a/test/cases/verb-split/0002/split_5.csv.expect b/test/cases/verb-split/0002/split_5.csv.expect
new file mode 100644
index 000000000..577f20e31
--- /dev/null
+++ b/test/cases/verb-split/0002/split_5.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+yellow,circle,true,9,87,63.5058,8.3350
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0003/cmd b/test/cases/verb-split/0003/cmd
new file mode 100644
index 000000000..32b90536e
--- /dev/null
+++ b/test/cases/verb-split/0003/cmd
@@ -0,0 +1 @@
+mlr --csv split -g shape --prefix ${CASEDIR}/split test/input/example.csv
diff --git a/test/cases/verb-split/0003/experr b/test/cases/verb-split/0003/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0003/expout b/test/cases/verb-split/0003/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0003/postcmp b/test/cases/verb-split/0003/postcmp
new file mode 100644
index 000000000..d00abe7f1
--- /dev/null
+++ b/test/cases/verb-split/0003/postcmp
@@ -0,0 +1,3 @@
+${CASEDIR}/split_square.csv.expect ${CASEDIR}/split_square.csv
+${CASEDIR}/split_circle.csv.expect ${CASEDIR}/split_circle.csv
+${CASEDIR}/split_triangle.csv.expect ${CASEDIR}/split_triangle.csv
diff --git a/test/cases/verb-split/0003/split_circle.csv.expect b/test/cases/verb-split/0003/split_circle.csv.expect
new file mode 100644
index 000000000..6ea6a0a93
--- /dev/null
+++ b/test/cases/verb-split/0003/split_circle.csv.expect
@@ -0,0 +1,4 @@
+color,shape,flag,k,index,quantity,rate
+red,circle,true,3,16,13.8103,2.9010
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
diff --git a/test/cases/verb-split/0003/split_square.csv.expect b/test/cases/verb-split/0003/split_square.csv.expect
new file mode 100644
index 000000000..122663bfe
--- /dev/null
+++ b/test/cases/verb-split/0003/split_square.csv.expect
@@ -0,0 +1,5 @@
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0003/split_triangle.csv.expect b/test/cases/verb-split/0003/split_triangle.csv.expect
new file mode 100644
index 000000000..70bce77e6
--- /dev/null
+++ b/test/cases/verb-split/0003/split_triangle.csv.expect
@@ -0,0 +1,4 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
diff --git a/test/cases/verb-split/0004/cmd b/test/cases/verb-split/0004/cmd
new file mode 100644
index 000000000..938e16043
--- /dev/null
+++ b/test/cases/verb-split/0004/cmd
@@ -0,0 +1 @@
+mlr --csv split -g color,shape --prefix ${CASEDIR}/split test/input/example.csv
diff --git a/test/cases/verb-split/0004/experr b/test/cases/verb-split/0004/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0004/expout b/test/cases/verb-split/0004/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0004/postcmp b/test/cases/verb-split/0004/postcmp
new file mode 100644
index 000000000..25cc0362d
--- /dev/null
+++ b/test/cases/verb-split/0004/postcmp
@@ -0,0 +1,7 @@
+${CASEDIR}/split_purple_square.csv.expect ${CASEDIR}/split_purple_square.csv
+${CASEDIR}/split_purple_triangle.csv.expect ${CASEDIR}/split_purple_triangle.csv
+${CASEDIR}/split_red_circle.csv.expect ${CASEDIR}/split_red_circle.csv
+${CASEDIR}/split_red_square.csv.expect ${CASEDIR}/split_red_square.csv
+${CASEDIR}/split_yellow_circle.csv.expect ${CASEDIR}/split_yellow_circle.csv
+${CASEDIR}/split_yellow_triangle.csv.expect ${CASEDIR}/split_yellow_triangle.csv
+
diff --git a/test/cases/verb-split/0004/split_purple_square.csv.expect b/test/cases/verb-split/0004/split_purple_square.csv.expect
new file mode 100644
index 000000000..019f93431
--- /dev/null
+++ b/test/cases/verb-split/0004/split_purple_square.csv.expect
@@ -0,0 +1,2 @@
+color,shape,flag,k,index,quantity,rate
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0004/split_purple_triangle.csv.expect b/test/cases/verb-split/0004/split_purple_triangle.csv.expect
new file mode 100644
index 000000000..7201c5aba
--- /dev/null
+++ b/test/cases/verb-split/0004/split_purple_triangle.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
diff --git a/test/cases/verb-split/0004/split_red_circle.csv.expect b/test/cases/verb-split/0004/split_red_circle.csv.expect
new file mode 100644
index 000000000..79d82eb67
--- /dev/null
+++ b/test/cases/verb-split/0004/split_red_circle.csv.expect
@@ -0,0 +1,2 @@
+color,shape,flag,k,index,quantity,rate
+red,circle,true,3,16,13.8103,2.9010
diff --git a/test/cases/verb-split/0004/split_red_square.csv.expect b/test/cases/verb-split/0004/split_red_square.csv.expect
new file mode 100644
index 000000000..439afffa0
--- /dev/null
+++ b/test/cases/verb-split/0004/split_red_square.csv.expect
@@ -0,0 +1,4 @@
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
diff --git a/test/cases/verb-split/0004/split_yellow_circle.csv.expect b/test/cases/verb-split/0004/split_yellow_circle.csv.expect
new file mode 100644
index 000000000..cbeb34546
--- /dev/null
+++ b/test/cases/verb-split/0004/split_yellow_circle.csv.expect
@@ -0,0 +1,3 @@
+color,shape,flag,k,index,quantity,rate
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
diff --git a/test/cases/verb-split/0004/split_yellow_triangle.csv.expect b/test/cases/verb-split/0004/split_yellow_triangle.csv.expect
new file mode 100644
index 000000000..cc98d358e
--- /dev/null
+++ b/test/cases/verb-split/0004/split_yellow_triangle.csv.expect
@@ -0,0 +1,2 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
diff --git a/test/cases/verb-split/0005/cmd b/test/cases/verb-split/0005/cmd
new file mode 100644
index 000000000..77ddf9077
--- /dev/null
+++ b/test/cases/verb-split/0005/cmd
@@ -0,0 +1 @@
+mlr --csv split -m 2 --prefix ${CASEDIR}/split --suffix dat test/input/example.csv
diff --git a/test/cases/verb-split/0005/experr b/test/cases/verb-split/0005/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0005/expout b/test/cases/verb-split/0005/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0005/postcmp b/test/cases/verb-split/0005/postcmp
new file mode 100644
index 000000000..a40bc1194
--- /dev/null
+++ b/test/cases/verb-split/0005/postcmp
@@ -0,0 +1,3 @@
+${CASEDIR}/split_1.dat.expect ${CASEDIR}/split_1.dat
+${CASEDIR}/split_2.dat.expect ${CASEDIR}/split_2.dat
+
diff --git a/test/cases/verb-split/0005/split_1.dat.expect b/test/cases/verb-split/0005/split_1.dat.expect
new file mode 100644
index 000000000..f228ed651
--- /dev/null
+++ b/test/cases/verb-split/0005/split_1.dat.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,circle,true,3,16,13.8103,2.9010
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,9,87,63.5058,8.3350
diff --git a/test/cases/verb-split/0005/split_2.dat.expect b/test/cases/verb-split/0005/split_2.dat.expect
new file mode 100644
index 000000000..cf8dd0bd4
--- /dev/null
+++ b/test/cases/verb-split/0005/split_2.dat.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+yellow,circle,true,8,73,63.9785,4.2370
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0006/cmd b/test/cases/verb-split/0006/cmd
new file mode 100644
index 000000000..a93d29864
--- /dev/null
+++ b/test/cases/verb-split/0006/cmd
@@ -0,0 +1 @@
+mlr --csv split -m 2 --prefix ${CASEDIR}/split --ojson test/input/example.csv
diff --git a/test/cases/verb-split/0006/experr b/test/cases/verb-split/0006/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0006/expout b/test/cases/verb-split/0006/expout
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0006/postcmp b/test/cases/verb-split/0006/postcmp
new file mode 100644
index 000000000..e63a309cf
--- /dev/null
+++ b/test/cases/verb-split/0006/postcmp
@@ -0,0 +1,3 @@
+${CASEDIR}/split_1.json.expect ${CASEDIR}/split_1.json
+${CASEDIR}/split_2.json.expect ${CASEDIR}/split_2.json
+
diff --git a/test/cases/verb-split/0006/split_1.json.expect b/test/cases/verb-split/0006/split_1.json.expect
new file mode 100644
index 000000000..00534646c
--- /dev/null
+++ b/test/cases/verb-split/0006/split_1.json.expect
@@ -0,0 +1,47 @@
+[
+{
+  "color": "yellow",
+  "shape": "triangle",
+  "flag": "true",
+  "k": 1,
+  "index": 11,
+  "quantity": 43.6498,
+  "rate": 9.8870
+},
+{
+  "color": "red",
+  "shape": "circle",
+  "flag": "true",
+  "k": 3,
+  "index": 16,
+  "quantity": 13.8103,
+  "rate": 2.9010
+},
+{
+  "color": "purple",
+  "shape": "triangle",
+  "flag": "false",
+  "k": 5,
+  "index": 51,
+  "quantity": 81.2290,
+  "rate": 8.5910
+},
+{
+  "color": "purple",
+  "shape": "triangle",
+  "flag": "false",
+  "k": 7,
+  "index": 65,
+  "quantity": 80.1405,
+  "rate": 5.8240
+},
+{
+  "color": "yellow",
+  "shape": "circle",
+  "flag": "true",
+  "k": 9,
+  "index": 87,
+  "quantity": 63.5058,
+  "rate": 8.3350
+}
+]
diff --git a/test/cases/verb-split/0006/split_2.json.expect b/test/cases/verb-split/0006/split_2.json.expect
new file mode 100644
index 000000000..d2d370f1e
--- /dev/null
+++ b/test/cases/verb-split/0006/split_2.json.expect
@@ -0,0 +1,47 @@
+[
+{
+  "color": "red",
+  "shape": "square",
+  "flag": "true",
+  "k": 2,
+  "index": 15,
+  "quantity": 79.2778,
+  "rate": 0.0130
+},
+{
+  "color": "red",
+  "shape": "square",
+  "flag": "false",
+  "k": 4,
+  "index": 48,
+  "quantity": 77.5542,
+  "rate": 7.4670
+},
+{
+  "color": "red",
+  "shape": "square",
+  "flag": "false",
+  "k": 6,
+  "index": 64,
+  "quantity": 77.1991,
+  "rate": 9.5310
+},
+{
+  "color": "yellow",
+  "shape": "circle",
+  "flag": "true",
+  "k": 8,
+  "index": 73,
+  "quantity": 63.9785,
+  "rate": 4.2370
+},
+{
+  "color": "purple",
+  "shape": "square",
+  "flag": "false",
+  "k": 10,
+  "index": 91,
+  "quantity": 72.3735,
+  "rate": 8.2430
+}
+]
diff --git a/test/cases/verb-split/0007/cmd b/test/cases/verb-split/0007/cmd
new file mode 100644
index 000000000..44f51882f
--- /dev/null
+++ b/test/cases/verb-split/0007/cmd
@@ -0,0 +1 @@
+mlr --csv split -m 2 -v --prefix ${CASEDIR}/split test/input/example.csv
diff --git a/test/cases/verb-split/0007/experr b/test/cases/verb-split/0007/experr
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/cases/verb-split/0007/expout b/test/cases/verb-split/0007/expout
new file mode 100644
index 000000000..bf79dd5f7
--- /dev/null
+++ b/test/cases/verb-split/0007/expout
@@ -0,0 +1,11 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,square,true,2,15,79.2778,0.0130
+red,circle,true,3,16,13.8103,2.9010
+red,square,false,4,48,77.5542,7.4670
+purple,triangle,false,5,51,81.2290,8.5910
+red,square,false,6,64,77.1991,9.5310
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,8,73,63.9785,4.2370
+yellow,circle,true,9,87,63.5058,8.3350
+purple,square,false,10,91,72.3735,8.2430
diff --git a/test/cases/verb-split/0007/postcmp b/test/cases/verb-split/0007/postcmp
new file mode 100644
index 000000000..97b9056ae
--- /dev/null
+++ b/test/cases/verb-split/0007/postcmp
@@ -0,0 +1,3 @@
+${CASEDIR}/split_1.csv.expect ${CASEDIR}/split_1.csv
+${CASEDIR}/split_2.csv.expect ${CASEDIR}/split_2.csv
+
diff --git a/test/cases/verb-split/0007/split_1.csv.expect b/test/cases/verb-split/0007/split_1.csv.expect
new file mode 100644
index 000000000..f228ed651
--- /dev/null
+++ b/test/cases/verb-split/0007/split_1.csv.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+yellow,triangle,true,1,11,43.6498,9.8870
+red,circle,true,3,16,13.8103,2.9010
+purple,triangle,false,5,51,81.2290,8.5910
+purple,triangle,false,7,65,80.1405,5.8240
+yellow,circle,true,9,87,63.5058,8.3350
diff --git a/test/cases/verb-split/0007/split_2.csv.expect b/test/cases/verb-split/0007/split_2.csv.expect
new file mode 100644
index 000000000..cf8dd0bd4
--- /dev/null
+++ b/test/cases/verb-split/0007/split_2.csv.expect
@@ -0,0 +1,6 @@
+color,shape,flag,k,index,quantity,rate
+red,square,true,2,15,79.2778,0.0130
+red,square,false,4,48,77.5542,7.4670
+red,square,false,6,64,77.1991,9.5310
+yellow,circle,true,8,73,63.9785,4.2370
+purple,square,false,10,91,72.3735,8.2430

From c8b71779d995a24c00465072ac02cd896273d5d9 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Wed, 26 Jan 2022 23:11:17 -0500
Subject: [PATCH 3/3] doc-build artifacts

---
 docs/src/manpage.md            | 46 +++++++++++++++++++++++++++--
 docs/src/manpage.txt           | 46 +++++++++++++++++++++++++++--
 docs/src/reference-verbs.md    | 46 +++++++++++++++++++++++++++++
 docs/src/reference-verbs.md.in |  6 ++++
 man/manpage.txt                | 46 +++++++++++++++++++++++++++--
 man/mlr.1                      | 54 +++++++++++++++++++++++++++++++---
 todo.txt                       | 11 +++----
 7 files changed, 235 insertions(+), 20 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index 77331400b..9492fe5b1 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -195,8 +195,8 @@ VERB LIST
        json-stringify join label least-frequent merge-fields most-frequent nest
        nothing put regularize remove-empty-columns rename reorder repeat reshape
        sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort
-       sort-within-records stats1 stats2 step tac tail tee template top unflatten
-       uniq unsparsify
+       sort-within-records split stats1 stats2 step tac tail tee template top
+       unflatten uniq unsparsify
 
 FUNCTION LIST
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1737,6 +1737,46 @@ VERBS
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   split
+       Usage: mlr split [options] {filename}
+       Options:
+       -n {n}:      Cap file sizes at N records.
+       -m {m}:      Produce M files, round-robining records among them.
+       -g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+       Exactly one  of -m, -n, or -g must be supplied.
+       --prefix {p} Specify filename prefix; default "split".
+       --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+       -a           Append to existing file(s), if any, rather than overwriting.
+       -v           Send records along to downstream verbs as well as splitting to files.
+       -h|--help    Show this message.
+       Any of the output-format command-line flags (see mlr -h). For example, using
+         mlr --icsv --from myfile.csv split --ojson -n 1000
+       the input is CSV, but the output files are JSON.
+
+       Examples: Suppose myfile.csv has 1,000,000 records.
+
+       100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -n 10000
+
+       10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -m 10
+       Same, but with JSON output.
+         mlr --csv --from myfile.csv split -m 10 -o json
+
+       Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+         mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+       Same, but written to the /tmp/ directory.
+         mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+       If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+         mlr --csv --from myfile.csv split -g shape
+
+       If the color field has values yellow and green, and the shape field has values triangle and square,
+       then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+         mlr --csv --from myfile.csv split -g color,shape
+
+       See also the "tee" DSL function which lets you do more ad-hoc customization.
+
    stats1
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -3091,5 +3131,5 @@ SEE ALSO
 
 
 
-                                  2022-01-25                         MILLER(1)
+                                  2022-01-27                         MILLER(1)
 </pre>
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index 8aa7753f3..bdba32306 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -174,8 +174,8 @@ VERB LIST
        json-stringify join label least-frequent merge-fields most-frequent nest
        nothing put regularize remove-empty-columns rename reorder repeat reshape
        sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort
-       sort-within-records stats1 stats2 step tac tail tee template top unflatten
-       uniq unsparsify
+       sort-within-records split stats1 stats2 step tac tail tee template top
+       unflatten uniq unsparsify
 
 FUNCTION LIST
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1716,6 +1716,46 @@ VERBS
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   split
+       Usage: mlr split [options] {filename}
+       Options:
+       -n {n}:      Cap file sizes at N records.
+       -m {m}:      Produce M files, round-robining records among them.
+       -g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+       Exactly one  of -m, -n, or -g must be supplied.
+       --prefix {p} Specify filename prefix; default "split".
+       --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+       -a           Append to existing file(s), if any, rather than overwriting.
+       -v           Send records along to downstream verbs as well as splitting to files.
+       -h|--help    Show this message.
+       Any of the output-format command-line flags (see mlr -h). For example, using
+         mlr --icsv --from myfile.csv split --ojson -n 1000
+       the input is CSV, but the output files are JSON.
+
+       Examples: Suppose myfile.csv has 1,000,000 records.
+
+       100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -n 10000
+
+       10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -m 10
+       Same, but with JSON output.
+         mlr --csv --from myfile.csv split -m 10 -o json
+
+       Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+         mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+       Same, but written to the /tmp/ directory.
+         mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+       If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+         mlr --csv --from myfile.csv split -g shape
+
+       If the color field has values yellow and green, and the shape field has values triangle and square,
+       then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+         mlr --csv --from myfile.csv split -g color,shape
+
+       See also the "tee" DSL function which lets you do more ad-hoc customization.
+
    stats1
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -3070,4 +3110,4 @@ SEE ALSO
 
 
 
-                                  2022-01-25                         MILLER(1)
+                                  2022-01-27                         MILLER(1)
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index bd74fa02f..991a14748 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -2978,6 +2978,52 @@ a b c
 9 8 7
 </pre>
 
+## split
+
+<pre class="pre-highlight-in-pair">
+<b>mlr split --help</b>
+</pre>
+<pre class="pre-non-highlight-in-pair">
+Usage: mlr split [options] {filename}
+Options:
+-n {n}:      Cap file sizes at N records.
+-m {m}:      Produce M files, round-robining records among them.
+-g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+Exactly one  of -m, -n, or -g must be supplied.
+--prefix {p} Specify filename prefix; default "split".
+--suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+-a           Append to existing file(s), if any, rather than overwriting.
+-v           Send records along to downstream verbs as well as splitting to files.
+-h|--help    Show this message.
+Any of the output-format command-line flags (see mlr -h). For example, using
+  mlr --icsv --from myfile.csv split --ojson -n 1000
+the input is CSV, but the output files are JSON.
+
+Examples: Suppose myfile.csv has 1,000,000 records.
+
+100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -n 10000
+
+10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -m 10
+Same, but with JSON output.
+  mlr --csv --from myfile.csv split -m 10 -o json
+
+Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+  mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+Same, but written to the /tmp/ directory.
+  mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+  mlr --csv --from myfile.csv split -g shape
+
+If the color field has values yellow and green, and the shape field has values triangle and square,
+then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+  mlr --csv --from myfile.csv split -g color,shape
+
+See also the "tee" DSL function which lets you do more ad-hoc customization.
+</pre>
+
 ## stats1
 
 <pre class="pre-highlight-in-pair">
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index 0addd5d57..2c371a554 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -936,6 +936,12 @@ GENMD-RUN-COMMAND
 mlr --ijson --opprint sort-within-records data/sort-within-records.json
 GENMD-EOF
 
+## split
+
+GENMD-RUN-COMMAND
+mlr split --help
+GENMD-EOF
+
 ## stats1
 
 GENMD-RUN-COMMAND
diff --git a/man/manpage.txt b/man/manpage.txt
index 8aa7753f3..bdba32306 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -174,8 +174,8 @@ VERB LIST
        json-stringify join label least-frequent merge-fields most-frequent nest
        nothing put regularize remove-empty-columns rename reorder repeat reshape
        sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort
-       sort-within-records stats1 stats2 step tac tail tee template top unflatten
-       uniq unsparsify
+       sort-within-records split stats1 stats2 step tac tail tee template top
+       unflatten uniq unsparsify
 
 FUNCTION LIST
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -1716,6 +1716,46 @@ VERBS
        -r        Recursively sort subobjects/submaps, e.g. for JSON input.
        -h|--help Show this message.
 
+   split
+       Usage: mlr split [options] {filename}
+       Options:
+       -n {n}:      Cap file sizes at N records.
+       -m {m}:      Produce M files, round-robining records among them.
+       -g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+       Exactly one  of -m, -n, or -g must be supplied.
+       --prefix {p} Specify filename prefix; default "split".
+       --suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+       -a           Append to existing file(s), if any, rather than overwriting.
+       -v           Send records along to downstream verbs as well as splitting to files.
+       -h|--help    Show this message.
+       Any of the output-format command-line flags (see mlr -h). For example, using
+         mlr --icsv --from myfile.csv split --ojson -n 1000
+       the input is CSV, but the output files are JSON.
+
+       Examples: Suppose myfile.csv has 1,000,000 records.
+
+       100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -n 10000
+
+       10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+         mlr --csv --from myfile.csv split -m 10
+       Same, but with JSON output.
+         mlr --csv --from myfile.csv split -m 10 -o json
+
+       Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+         mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+       Same, but written to the /tmp/ directory.
+         mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+       If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+         mlr --csv --from myfile.csv split -g shape
+
+       If the color field has values yellow and green, and the shape field has values triangle and square,
+       then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+         mlr --csv --from myfile.csv split -g color,shape
+
+       See also the "tee" DSL function which lets you do more ad-hoc customization.
+
    stats1
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -3070,4 +3110,4 @@ SEE ALSO
 
 
 
-                                  2022-01-25                         MILLER(1)
+                                  2022-01-27                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 6eb74a052..57011f3e4 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2022-01-25
+.\"      Date: 2022-01-27
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2022-01-25" "\ \&" "\ \&"
+.TH "MILLER" "1" "2022-01-27" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -215,8 +215,8 @@ fraction gap grep group-by group-like having-fields head histogram json-parse
 json-stringify join label least-frequent merge-fields most-frequent nest
 nothing put regularize remove-empty-columns rename reorder repeat reshape
 sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records sort
-sort-within-records stats1 stats2 step tac tail tee template top unflatten
-uniq unsparsify
+sort-within-records split stats1 stats2 step tac tail tee template top
+unflatten uniq unsparsify
 .fi
 .if n \{\
 .RE
@@ -2169,6 +2169,52 @@ Options:
 .fi
 .if n \{\
 .RE
+.SS "split"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr split [options] {filename}
+Options:
+-n {n}:      Cap file sizes at N records.
+-m {m}:      Produce M files, round-robining records among them.
+-g {a,b,c}:  Write separate files with records having distinct values for fields named a,b,c.
+Exactly one  of -m, -n, or -g must be supplied.
+--prefix {p} Specify filename prefix; default "split".
+--suffix {s} Specify filename suffix; default is from mlr output format, e.g. "csv".
+-a           Append to existing file(s), if any, rather than overwriting.
+-v           Send records along to downstream verbs as well as splitting to files.
+-h|--help    Show this message.
+Any of the output-format command-line flags (see mlr -h). For example, using
+  mlr --icsv --from myfile.csv split --ojson -n 1000
+the input is CSV, but the output files are JSON.
+
+Examples: Suppose myfile.csv has 1,000,000 records.
+
+100 output files, 10,000 records each. First 10,000 records in split_1.csv, next in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -n 10000
+
+10 output files, 100,000 records each. Records 1,11,21,etc in split_1.csv, records 2,12,22, etc in split_2.csv, etc.
+  mlr --csv --from myfile.csv split -m 10
+Same, but with JSON output.
+  mlr --csv --from myfile.csv split -m 10 -o json
+
+Same but instead of split_1.csv, split_2.csv, etc. there are test_1.dat, test_2.dat, etc.
+  mlr --csv --from myfile.csv split -m 10 --prefix test --suffix dat
+Same, but written to the /tmp/ directory.
+  mlr --csv --from myfile.csv split -m 10 --prefix /tmp/test --suffix dat
+
+If the shape field has values triangle and square, then there will be split_triangle.csv and split_square.csv.
+  mlr --csv --from myfile.csv split -g shape
+
+If the color field has values yellow and green, and the shape field has values triangle and square,
+then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
+  mlr --csv --from myfile.csv split -g color,shape
+
+See also the "tee" DSL function which lets you do more ad-hoc customization.
+.fi
+.if n \{\
+.RE
 .SS "stats1"
 .if n \{\
 .RS 0
diff --git a/todo.txt b/todo.txt
index a6e7a7439..3539fde95 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,4 +1,4 @@
-================================================================
+===============================================================
 RELEASES
 
 * follow ...
@@ -27,6 +27,9 @@ FEATURES
   o strmatch
   o =~
 * separate examples from FAQs
+* mlr split -- needs an example page along with the tee DSL function
+* new example entry, with ccump and pgr
+  o slwin --prune (or somesuch) to only emit averages over full windows -- ?
 
 ----------------------------------------------------------------
 k better print-interpolate with {} etc
@@ -34,16 +37,10 @@ k better print-interpolate with {} etc
 ----------------------------------------------------------------
 ! sysdate, sysdate_local; datediff ...
 
-----------------------------------------------------------------
-mlr split ... -n, -g -- ?
-- how to specify filenames?
-
 ----------------------------------------------------------------
 ! strmatch https://github.com/johnkerl/miller/issues/77#issuecomment-538790927
 
 ----------------------------------------------------------------
-* new example entry, with ccump and pgr
-  o slwin --prune (or somesuch) to only emit averages over full windows -- ?
 * make a lag-by-n and lead-by-n
 
 ----------------------------------------------------------------