Export library code in pkg/ (#1391)

* Export library code in `pkg/` * new doc page
2026-01-23 02:14:13 +00:00 · 2023-09-10 17:15:13 -04:00 · 2023-09-10 17:15:13 -04:00 · 268a96d002
commit 268a96d002
parent 93b7c8eac0
358 changed files with 1076 additions and 693 deletions
--- a/pkg/cli/README.md
+++ b/pkg/cli/README.md
@ -0,0 +1,4 @@
+Datatypes for parsing the Miller command line, and the flags table.
+
+* `pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer.
+* `pkg/cli` contains datatypes and the flags table for the CLI-parser, which was split out to avoid a Go package-import cycle.
--- a/pkg/cli/doc.go
+++ b/pkg/cli/doc.go
@ -0,0 +1,3 @@
+// Package cli contains the flags table and associated datatypes for parsing
+// the Miller command line.
+package cli
--- a/pkg/cli/flag_types.go
+++ b/pkg/cli/flag_types.go
@ -0,0 +1,535 @@
+// ================================================================
+// Miller support for command-line flags.
+//
+// * Flags are used for several purposes:
+//
+//   o Command-line parsing the main mlr program.
+//
+//   o Record-reader and record-writer options for a few verbs such as join and
+//     tee. E.g. `mlr --csv join -f foo.tsv --tsv ...`: the main input files are
+//     CSV but the join-in file is TSV>
+//
+//   o Processing .mlrrc files.
+//
+//   o Autogenerating on-line help for `mlr help flags`.
+//
+//   o Autogenerating the manpage for `man mlr`.
+//
+//   o Autogenerating webdocs (mkdocs).
+//
+// * For these reasons, flags are organized into tables; for documentation
+//   purposes, flags are organized into sections (see pkg/cli/option_parse.go).
+//
+// * The Flag struct separates out flag name (e.g. `--csv`), any alternate
+//   names (e.g. `-c`), any arguments the flag may take, a help string, and a
+//   command-line parser function.
+//
+// * The tabular structure may seem overwrought; in fact it has been a blessing
+//   to develop the tabular structure since these flags objects need to serve
+//   so many roles as listed above.
+//
+// * I don't use Go flags for a few reasons. The most important one is that I
+//   need to handle repeated flags, e.g. --from can be used more than once for
+//   mlr, and -f/-n/-r etc can be used more than once for mlr sort, etc. I also
+//   insist on total control of flag formatting including alphabetization of
+//   flags for on-line help and documentation systems.
+// ================================================================
+
+package cli
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/johnkerl/miller/pkg/colorizer"
+	"github.com/johnkerl/miller/pkg/lib"
+)
+
+// ----------------------------------------------------------------
+// Data types used within the flags table.
+
+// FlagParser is a function which takes a flag such as `--foo`.
+//   - It should assume that a flag.Owns method has already been invoked to be
+//     sure that this function is indeed the right one to call for `--foo`.
+//   - The FlagParser function is responsible for advancing *pargi by 1 (if
+//     `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
+//     the latter case, and mutating the options struct.
+//   - Successful handling of the flag is indicated by this function making a
+//     non-zero increment of *pargi.
+type FlagParser func(
+	args []string,
+	argc int,
+	pargi *int,
+	options *TOptions,
+)
+
+// ----------------------------------------------------------------
+
+// FlagTable holds all the flags for Miller, organized into sections.
+type FlagTable struct {
+	sections []*FlagSection
+}
+
+// FlagSection holds all the flags in a given cateogory, where these
+// categories exist for documentation purposes.
+//
+// The name should be right-cased for webdocs. For on-line help and
+// manpage use, it will get fully uppercased.
+//
+// The infoPrinter provides summary/overview for all flags in the
+// section, for on-line help / webdocs.
+type FlagSection struct {
+	name        string
+	infoPrinter func()
+	flags       []Flag
+}
+
+// Flag is a container for all runtime as well as documentation information for
+// a flag.
+type Flag struct {
+	// In most cases, the flag has just one spelling, like "--ifs".
+	name string
+
+	// In some cases, the flag has more than one spelling, like "-h" and
+	// "--help", or "-c" and "--csv". The altNames field can be omitted from
+	// struct initializers, which in Go means it will read as nil.
+	altNames []string
+
+	// If not "", a name for the flag's argument, for on-line help. E.g. the
+	// "bar" in ""--foo {bar}". It should always be written in curly braces.
+	arg string
+
+	// Help string for `mlr help flags`, `man mlr`, and webdocs.
+	// * It should be all one line within the source code. The text will be
+	//   reformatted as a paragraph for on-line help / manpage, so there should
+	//   be no attempt at line-breaking within the help string.
+	// * Any code bits should be marked with backticks. These look OK for
+	//   on-line help / manpage, and render marvelously for webdocs which
+	//   take markdown.
+	// * After changing flags you can run `make precommit` in the Miller
+	//   repo base directory followed by `git diff` to see how the output
+	//   looks. See also the README.md files in the docs and man directories
+	//   for how to look at the autogenned docs pre-commit.
+	help string
+
+	// A function for parsing the command line, as described above.
+	parser FlagParser
+
+	// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+	// need to print a tedious 60-line list.
+	suppressFlagEnumeration bool
+}
+
+// ================================================================
+// FlagTable methods
+
+// Sort organizes the sections in the table alphabetically, to make on-line
+// help easier to read. This is done from func-init context so on-line help
+// will always be easy to navigate.
+func (ft *FlagTable) Sort() {
+	// Go sort API: for ascending sort, return true if element i < element j.
+	sort.Slice(ft.sections, func(i, j int) bool {
+		return strings.ToLower(ft.sections[i].name) < strings.ToLower(ft.sections[j].name)
+	})
+}
+
+// Parse is for parsing a flag on the command line. Given say `--foo`, if a
+// Flag object is found which owns the flag, and if its parser accepts it (e.g.
+// `bar` is present and spelt correctly if the flag-parser expects `--foo bar`)
+// then the return value is true, else false.
+func (ft *FlagTable) Parse(
+	args []string,
+	argc int,
+	pargi *int,
+	options *TOptions,
+) bool {
+	for _, section := range ft.sections {
+		for _, flag := range section.flags {
+			if flag.Owns(args[*pargi]) {
+				// Let the flag-parser advance *pargi, depending on how many
+				// arguments follow the flag. E.g. `--ifs pipe` will advance
+				// *pargi by 2; `-I` will advance it by 1.
+				oargi := *pargi
+				flag.parser(args, argc, pargi, options)
+				nargi := *pargi
+				return nargi > oargi
+			}
+		}
+	}
+	return false
+}
+
+// ShowHelp prints all-in-one on-line help, nominally for `mlr help flags`.
+func (ft *FlagTable) ShowHelp() {
+	for i, section := range ft.sections {
+		if i > 0 {
+			fmt.Println()
+		}
+		fmt.Println(colorizer.MaybeColorizeHelp(strings.ToUpper(section.name), true))
+		fmt.Println()
+		section.PrintInfo()
+		section.ShowHelpForFlags()
+	}
+}
+
+// ListFlagSections exposes some of the flags-table structure, so Ruby autogen
+// scripts for on-line help and webdocs can traverse the structure with looping
+// inside their own code.
+func (ft *FlagTable) ListFlagSections() {
+	for _, section := range ft.sections {
+		fmt.Println(section.name)
+	}
+}
+
+// PrintInfoForSection exposes some of the flags-table structure, so Ruby
+// autogen scripts for on-line help and webdocs can traverse the structure with
+// looping inside their own code.
+func (ft *FlagTable) ShowHelpForSection(sectionName string) bool {
+	for _, section := range ft.sections {
+		if sectionName == section.name {
+			section.PrintInfo()
+			section.ShowHelpForFlags()
+			return true
+		}
+	}
+	return false
+}
+
+// Sections are named like "CSV-only flags". `mlr help` uses `mlr help
+// csv-only-flags`. The latter is downcased from the former, with spaces
+// replaced by dashes -- hence "downdashed section name". Here we look up
+// flag-section help given a downdashed section name.
+func (ft *FlagTable) ShowHelpForSectionViaDowndash(downdashSectionName string) bool {
+	for _, section := range ft.sections {
+		if downdashSectionName == section.GetDowndashSectionName() {
+			fmt.Println(colorizer.MaybeColorizeHelp(strings.ToUpper(section.name), true))
+			section.PrintInfo()
+			section.ShowHelpForFlags()
+			return true
+		}
+	}
+	return false
+}
+
+// PrintInfoForSection exposes some of the flags-table structure, so Ruby
+// autogen scripts for on-line help and webdocs can traverse the structure with
+// looping inside their own code.
+func (ft *FlagTable) PrintInfoForSection(sectionName string) bool {
+	for _, section := range ft.sections {
+		if sectionName == section.name {
+			section.PrintInfo()
+			return true
+		}
+	}
+	return false
+}
+
+// ListFlagsForSection exposes some of the flags-table structure, so Ruby
+// autogen scripts for on-line help and webdocs can traverse the structure with
+// looping inside their own code.
+func (ft *FlagTable) ListFlagsForSection(sectionName string) bool {
+	for _, section := range ft.sections {
+		if sectionName == section.name {
+			section.ListFlags()
+			return true
+		}
+	}
+	return false
+}
+
+// Given flag named `--foo`, altName `-f`, and argument spec `{bar}`, the
+// headline is `--foo or -f {bar}`. This is the bit which is highlighted in
+// on-line help; its length is also used for alignment decisions in the on-line
+// help and the manapge.
+func (ft *FlagTable) ShowHeadlineForFlag(flagName string) bool {
+	for _, fs := range ft.sections {
+		for _, flag := range fs.flags {
+			if flag.Owns(flagName) {
+				fmt.Println(flag.GetHeadline())
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// ShowHelpForFlag prints the flag's help-string all on one line.  This is for
+// webdoc usage where the browser does dynamic line-wrapping, as the user
+// resizes the browser window.
+func (ft *FlagTable) ShowHelpForFlag(flagName string) bool {
+	return ft.showHelpForFlagMaybeWithName(flagName, false)
+}
+
+// ShowHelpForFlagWithName prints the flag's name colorized, then flag's
+// help-string all on one line.  This is for on-line help usage.
+func (ft *FlagTable) ShowHelpForFlagWithName(flagName string) bool {
+	return ft.showHelpForFlagMaybeWithName(flagName, true)
+}
+
+// showHelpForFlagMaybeWithName supports ShowHelpForFlag and ShowHelpForFlagWithName.
+// webdoc usage where the browser does dynamic line-wrapping, as the user
+// resizes the browser window.
+func (ft *FlagTable) showHelpForFlagMaybeWithName(flagName string, showName bool) bool {
+	for _, fs := range ft.sections {
+		for _, flag := range fs.flags {
+			if flag.Owns(flagName) {
+				if showName {
+					fmt.Println(colorizer.MaybeColorizeHelp(flagName, true))
+				}
+				fmt.Println(flag.GetHelpOneLine())
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// ShowHelpForFlagApproximateWithName is like ShowHelpForFlagWithName
+// but allows substring matches.  This is for on-line help usage.
+func (ft *FlagTable) ShowHelpForFlagApproximateWithName(searchString string) bool {
+	for _, fs := range ft.sections {
+		for _, flag := range fs.flags {
+			if flag.Matches(searchString) {
+				fmt.Println(colorizer.MaybeColorizeHelp(flag.name, true))
+				fmt.Println(flag.GetHelpOneLine())
+			}
+		}
+	}
+	return false
+}
+
+// Map "CSV-only flags" to "csv-only-flags" etc. for the benefit of per-section
+// help in `mlr help topics`.
+func (ft *FlagTable) GetDowndashSectionNames() []string {
+	downdashSectionNames := make([]string, len(ft.sections))
+	for i, fs := range ft.sections {
+		// Get names like "CSV-only flags" from the FLAG_TABLE.
+		// Downcase and replace spaces with dashes to get names like
+		// "csv-only-flags"
+		downdashSectionNames[i] = fs.GetDowndashSectionName()
+	}
+	return downdashSectionNames
+}
+
+// NilCheck checks to see if any flag/section is missing help info. This arises
+// since in Go you needn't specify all struct initializers, so for example a
+// Flag struct-initializer which doesn't say `help: "..."` will have empty help
+// string. This nil-checking doesn't need to be done on every Miller
+// invocation, but rather, only at build time. The `mlr help` terminal has an
+// entrypoint wherein a regression-test case can do `mlr help nil-check` and
+// make this function exits cleanly.
+func (ft *FlagTable) NilCheck() {
+	lib.InternalCodingErrorWithMessageIf(ft.sections == nil, "Nil table sections")
+	lib.InternalCodingErrorWithMessageIf(len(ft.sections) == 0, "Zero table sections")
+	for _, fs := range ft.sections {
+		fs.NilCheck()
+	}
+	fmt.Println("Flag-table nil check completed successfully.")
+}
+
+// ================================================================
+// FlagSection methods
+
+// Sort organizes the flags in the section alphabetically, to make on-line help
+// easier to read.  This is done from func-init context so on-line help will
+// always be easy to navigate.
+func (fs *FlagSection) Sort() {
+	// Go sort API: for ascending sort, return true if element i < element j.
+	sort.Slice(fs.flags, func(i, j int) bool {
+		return strings.ToLower(fs.flags[i].name) < strings.ToLower(fs.flags[j].name)
+	})
+}
+
+// ShowHelpForFlags prints all-in-one on-line help, nominally for `mlr help
+// flags`.
+func (fs *FlagSection) ShowHelpForFlags() {
+	for _, flag := range fs.flags {
+		// For format-conversion keystroke-savers, a matrix is plenty -- we don't
+		// need to print a tedious 60-line list.
+		if flag.suppressFlagEnumeration {
+			continue
+		}
+		flag.ShowHelp()
+	}
+}
+
+// PrintInfo exposes some of the flags-table structure, so Ruby autogen scripts
+// for on-line help and webdocs can traverse the structure with looping inside
+// their own code.
+func (fs *FlagSection) PrintInfo() {
+	fs.infoPrinter()
+	fmt.Println()
+}
+
+// ListFlags exposes some of the flags-table structure, so Ruby autogen scripts
+// for on-line help and webdocs can traverse the structure with looping inside
+// their own code.
+func (fs *FlagSection) ListFlags() {
+	for _, flag := range fs.flags {
+		fmt.Println(flag.name)
+	}
+}
+
+// Map "CSV-only flags" to "csv-only-flags" etc. for the benefit of per-section
+// help in `mlr help topics`.
+func (fs *FlagSection) GetDowndashSectionName() string {
+	return strings.ReplaceAll(strings.ToLower(fs.name), " ", "-")
+}
+
+// See comments above FlagTable's NilCheck method.
+func (fs *FlagSection) NilCheck() {
+	lib.InternalCodingErrorWithMessageIf(fs.name == "", "Empty section name")
+	lib.InternalCodingErrorWithMessageIf(fs.infoPrinter == nil, "Nil infoPrinter for section "+fs.name)
+	lib.InternalCodingErrorWithMessageIf(fs.flags == nil, "Nil flags for section "+fs.name)
+	lib.InternalCodingErrorWithMessageIf(len(fs.flags) == 0, "Zero flags for section "+fs.name)
+	for _, flag := range fs.flags {
+		flag.NilCheck()
+	}
+}
+
+// ================================================================
+// Flag methods
+
+// Owns determines whether this object handles a command-line flag such as
+// "--foo".  This is used for command-line parsing, as well as for on-line help
+// with exact match on flag name.
+func (flag *Flag) Owns(input string) bool {
+	if flag.name == input {
+		return true
+	}
+	for _, name := range flag.altNames {
+		if name == input {
+			return true
+		}
+	}
+	return false
+}
+
+// Matches is like Owns but is for substring matching, for on-line help with
+// approximate match on flag name.
+func (flag *Flag) Matches(input string) bool {
+	if strings.Contains(flag.name, input) {
+		return true
+	}
+	for _, name := range flag.altNames {
+		if strings.Contains(name, input) {
+			return true
+		}
+	}
+	return false
+}
+
+// ShowHelp produces formatting for `mlr help flags` and manpage use.
+// Example:
+// * Flag name is `--foo`
+// * altName is `-f`
+// * Argument spec is `{bar}`
+// * Help string is "Lorem ipsum dolor sit amet, consectetur adipiscing elit,
+//   sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim
+//   ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
+//   ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
+//   velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
+//   cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id
+//   est laborum."
+// * The headline (see the GetHeadline function) is `--foo or -f {bar}`.
+// * We place the headline left in a 25-character column, colorized with the
+//   help color.
+// * We format the help text as 55-character lines and place them
+//   to the right.
+// * The result looks like
+//
+//   --foo or -f {bar}        Lorem ipsum dolor sit amet, consectetur adipiscing
+//                            elit, sed do eiusmod tempor incididunt ut labore et
+//                            dolore magna aliqua. Ut enim ad minim veniam, quis
+//                            nostrud exercitation ullamco laboris nisi ut aliquip
+//                            ex ea commodo consequat. Duis aute irure dolor in
+//                            reprehenderit in voluptate velit esse cillum dolore
+//                            eu fugiat nulla pariatur. Excepteur sint occaecat
+//                            cupidatat non proident, sunt in culpa qui officia
+//                            deserunt mollit anim id est laborum.
+//
+// * If the headline is too long we put the first help line a line below like this:
+//
+//   --foo-flag-is-very-very-long {bar}
+//                            Lorem ipsum dolor sit amet, consectetur adipiscing
+//                            elit, sed do eiusmod tempor incididunt ut labore et
+//                            dolore magna aliqua. Ut enim ad minim veniam, quis
+//                            nostrud exercitation ullamco laboris nisi ut aliquip
+//                            ex ea commodo consequat. Duis aute irure dolor in
+//                            reprehenderit in voluptate velit esse cillum dolore
+//                            eu fugiat nulla pariatur. Excepteur sint occaecat
+//                            cupidatat non proident, sunt in culpa qui officia
+//                            deserunt mollit anim id est laborum.
+//
+
+func (flag *Flag) ShowHelp() {
+	headline := flag.GetHeadline()
+	displayHeadline := fmt.Sprintf("%-25s", headline)
+	broken := len(headline) >= 25
+
+	helpLines := lib.FormatAsParagraph(flag.help, 55)
+
+	if broken {
+		fmt.Printf("%s\n", colorizer.MaybeColorizeHelp(displayHeadline, true))
+		for _, helpLine := range helpLines {
+			fmt.Printf("%25s%s\n", " ", helpLine)
+		}
+	} else {
+		fmt.Printf("%s", colorizer.MaybeColorizeHelp(displayHeadline, true))
+		if len(helpLines) == 0 {
+			fmt.Println()
+		}
+		for i, helpLine := range helpLines {
+			if i == 0 {
+				fmt.Printf("%s\n", helpLine)
+			} else {
+				fmt.Printf("%25s%s\n", " ", helpLine)
+			}
+		}
+	}
+}
+
+// GetHeadline puts together the flag name, any altNames, and any argument spec
+// into a single string for the left column of online help / manpage content.
+// Given flag named `--foo`, altName `-f`, and argument spec `{bar}`, the
+// headline is `--foo or -f {bar}`. This is the bit which is highlighted in
+// on-line help; its length is also used for alignment decisions in the on-line
+// help and the manapge.
+func (flag *Flag) GetHeadline() string {
+	displayNames := make([]string, 1)
+	displayNames[0] = flag.name
+	if flag.altNames != nil {
+		displayNames = append(displayNames, flag.altNames...)
+	}
+	displayText := strings.Join(displayNames, " or ")
+	if flag.arg != "" {
+		displayText += " "
+		displayText += flag.arg
+	}
+	return displayText
+}
+
+// Gets the help string all on one line (just in case anyone typed it in using
+// multiline string-literal backtick notation in Go). This is suitable for
+// webdoc use where we create all one line, and the browser dynamically
+// line-wraps as the user resizes the window.
+func (flag *Flag) GetHelpOneLine() string {
+	return strings.Join(strings.Split(flag.help, "\n"), " ")
+}
+
+// See comments above FlagTable's NilCheck method.
+func (flag *Flag) NilCheck() {
+	lib.InternalCodingErrorWithMessageIf(flag.name == "", "Empty flag name")
+	lib.InternalCodingErrorWithMessageIf(flag.help == "", "Empty flag help for flag "+flag.name)
+	lib.InternalCodingErrorWithMessageIf(flag.parser == nil, "Nil parser help for flag "+flag.name)
+}
+
+// ================================================================
+// Helper methods
+
+// NoOpParse1 is a helper function for flags which take no argument and are
+// backward-compatibility no-ops.
+func NoOpParse1(args []string, argc int, pargi *int, options *TOptions) {
+	*pargi += 1
+}
--- a/pkg/cli/flatten_unflatten.go
+++ b/pkg/cli/flatten_unflatten.go
@ -0,0 +1,79 @@
+package cli
+
+// ================================================================
+// Decide whether to insert a flatten or unflatten verb at the end of the
+// chain.  See also repl/verbs.go which handles the same issue in the REPL.
+//
+// ----------------------------------------------------------------
+// PROBLEM TO BE SOLVED:
+//
+// JSON has nested structures and CSV et al. do not. For example:
+// {
+//   "req" : {
+//     "method": "GET",
+//     "path":   "api/check",
+//   }
+// }
+//
+// For CSV we flatten this down to
+//
+// {
+//   "req.method": "GET",
+//   "req.path":   "api/check"
+// }
+//
+// ----------------------------------------------------------------
+// APPROACH:
+//
+// Use the Principle of Least Surprise (POLS).
+//
+// * If input is JSON and output is JSON:
+//   o Records can be nested from record-read
+//   o They remain that way through the Miller record-processing stream
+//   o They are nested on record-write
+//   o No action needs to be taken
+//
+// * If input is JSON and output is non-JSON:
+//   o Records can be nested from record-read
+//   o They remain that way through the Miller record-processing stream
+//   o On record-write, nested structures will be converted to string (carriage
+//     returns and all) using json_stringify. People *might* want this but
+//     (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
+//     --no-auto-unflatten CLI flag for those who want it.
+//
+// * If input is non-JSON and output is non-JSON:
+//   o If there is a "req.method" field, people should be able to do
+//     'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
+//     no auto-unflatten on input.  People can insert an unflatten verb
+//     into their verb chain if they really want unflatten for non-JSON
+//     files.
+//   o The DSL can make nested data, so AUTO-FLATTEN at output.
+//
+// * If input is non-JSON and output is JSON:
+//   o Default is to auto-unflatten at output.
+//   o There is a --no-auto-unflatten for those who want it.
+// ================================================================
+
+func DecideFinalFlatten(writerOptions *TWriterOptions) bool {
+	ofmt := writerOptions.OutputFileFormat
+	if writerOptions.AutoFlatten {
+		if ofmt != "json" {
+			return true
+		}
+	}
+	return false
+}
+
+func DecideFinalUnflatten(options *TOptions) bool {
+	ifmt := options.ReaderOptions.InputFileFormat
+	ofmt := options.WriterOptions.OutputFileFormat
+
+	if options.WriterOptions.AutoUnflatten {
+		if ifmt != "json" {
+			if ofmt == "json" {
+				return true
+			}
+		}
+	}
+	return false
+}
--- a/pkg/cli/mlrcli_util.go
+++ b/pkg/cli/mlrcli_util.go
@ -0,0 +1,38 @@
+package cli
+
+import (
+	"fmt"
+	"os"
+)
+
+// CheckArgCount is for flags with values, e.g. ["-n" "10"], while we're
+// looking at the "-n": this let us see if the "10" slot exists.
+func CheckArgCount(args []string, argi int, argc int, n int) {
+	if (argc - argi) < n {
+		fmt.Fprintf(os.Stderr, "%s: option \"%s\" missing argument(s).\n", "mlr", args[argi])
+		fmt.Fprintf(os.Stderr, "Please run \"%s --help\" for detailed usage information.\n", "mlr")
+		os.Exit(1)
+	}
+}
+
+// SeparatorFromArg is for letting people do things like `--ifs pipe`
+// rather than `--ifs '|'`.
+func SeparatorFromArg(name string) string {
+	sep, ok := SEPARATOR_NAMES_TO_VALUES[name]
+	if ok {
+		return sep
+	} else {
+		return name
+	}
+}
+
+// SeparatorRegexFromArg is for letting people do things like `--ifs-regex whitespace`
+// rather than `--ifs '([ \t])+'`.
+func SeparatorRegexFromArg(name string) string {
+	sep, ok := SEPARATOR_REGEX_NAMES_TO_VALUES[name]
+	if ok {
+		return sep
+	} else {
+		return name
+	}
+}
--- a/pkg/cli/option_parse.go
+++ b/pkg/cli/option_parse.go
--- a/pkg/cli/option_types.go
+++ b/pkg/cli/option_types.go
@ -0,0 +1,226 @@
+// ================================================================
+// Items which might better belong in miller/cli, but which are placed in a
+// deeper package to avoid a package-dependency cycle between miller/cli and
+// miller/transforming.
+// ================================================================
+
+package cli
+
+import (
+	"regexp"
+
+	"github.com/johnkerl/miller/pkg/lib"
+)
+
+type TCommentHandling int
+
+const (
+	CommentsAreData TCommentHandling = iota
+	SkipComments
+	PassComments
+)
+const DEFAULT_COMMENT_STRING = "#"
+
+const DEFAULT_GEN_FIELD_NAME = "i"
+const DEFAULT_GEN_START_AS_STRING = "1"
+const DEFAULT_GEN_STEP_AS_STRING = "1"
+const DEFAULT_GEN_STOP_AS_STRING = "100"
+
+const DEFAULT_RECORDS_PER_BATCH = 500
+
+type TGeneratorOptions struct {
+	FieldName     string
+	StartAsString string
+	StepAsString  string
+	StopAsString  string
+}
+
+type TReaderOptions struct {
+	InputFileFormat  string
+	IFS              string
+	IPS              string
+	IRS              string
+	AllowRepeatIFS   bool
+	IFSRegex         *regexp.Regexp
+	IPSRegex         *regexp.Regexp
+	DedupeFieldNames bool
+
+	// If unspecified on the command line, these take input-format-dependent
+	// defaults.  E.g. default FS is comma for DKVP but space for NIDX;
+	// default AllowRepeatIFS is false for CSV but true for PPRINT.
+	ifsWasSpecified            bool
+	ipsWasSpecified            bool
+	irsWasSpecified            bool
+	allowRepeatIFSWasSpecified bool
+
+	UseImplicitCSVHeader bool
+	AllowRaggedCSVInput  bool
+	CSVLazyQuotes        bool
+	CSVTrimLeadingSpace  bool
+
+	CommentHandling TCommentHandling
+	CommentString   string
+
+	// Fake internal-data-generator 'reader'
+	GeneratorOptions TGeneratorOptions
+
+	// For out-of-process handling of compressed data, via popen
+	Prepipe string
+	// For most things like gunzip we do 'gunzip < filename | mlr ...' if
+	// filename is present, else 'gunzip | mlr ...' if reading from stdin.
+	// However some commands like 'unzip -qc' are weird so this option lets
+	// people give the command and we won't insert the '<'.
+	PrepipeIsRaw bool
+	// For in-process gunzip/bunzip2/zcat (distinct from prepipe)
+	FileInputEncoding lib.TFileInputEncoding
+
+	// TODO: comment
+	RecordsPerBatch int64
+}
+
+// ----------------------------------------------------------------
+type TWriterOptions struct {
+	OutputFileFormat string
+	ORS              string
+	OFS              string
+	OPS              string
+	FLATSEP          string
+
+	FlushOnEveryRecord             bool
+	flushOnEveryRecordWasSpecified bool
+
+	// If unspecified on the command line, these take input-format-dependent
+	// defaults.  E.g. default FS is comma for DKVP but space for NIDX.
+	ofsWasSpecified bool
+	opsWasSpecified bool
+	orsWasSpecified bool
+
+	HeaderlessCSVOutput      bool
+	BarredPprintOutput       bool
+	RightAlignedPPRINTOutput bool
+	RightAlignedXTABOutput   bool
+
+	// JSON output: --jlistwrap on, --jvstack on
+	// JSON Lines output: --jlistwrap off, --jvstack off
+	WrapJSONOutputInOuterList bool // --jlistwrap
+	JSONOutputMultiline       bool // --jvstack
+	JVQuoteAll                bool // --jvquoteall
+	// Not using miller/types enum to avoid package cycle
+
+	CSVQuoteAll bool // --quote-all
+
+	// When we read things like
+	//
+	//   x:a=1,x:b=2
+	//
+	// which is how we write out nested data structures for non-nested formats
+	// (all but JSON), the default behavior is to unflatten them back to
+	//
+	//   {"x": {"a": 1}, {"b": 2}}
+	//
+	// unless the user explicitly asks to suppress that.
+	AutoUnflatten bool
+
+	// The default behavior is to flatten nested data structures like
+	//
+	//   {"x": {"a": 1}, {"b": 2}}
+	//
+	// down to
+	//
+	//   x:a=1,x:b=2
+	//
+	// which is how we write out nested data structures for non-nested formats
+	// (all but JSON) -- unless the user explicitly asks to suppress that.
+	AutoFlatten bool
+
+	// For floating-point numbers: "" means use the Go default.
+	FPOFMT string
+
+	// Fatal the process when error data in a given record is about to be output.
+	FailOnDataError bool
+}
+
+// ----------------------------------------------------------------
+type TOptions struct {
+	ReaderOptions TReaderOptions
+	WriterOptions TWriterOptions
+
+	// Data files to be operated on: e.g. given 'mlr cat foo.dat bar.dat', this
+	// is ["foo.dat", "bar.dat"].
+	FileNames []string
+
+	// DSL files to be loaded for every put/filter operation -- like 'put -f'
+	// or 'filter -f' but specified up front on the command line, suitable for
+	// .mlrrc. Use-case is someone has DSL functions they always want to be
+	// defined.
+	//
+	// Risk of CVE if this is in .mlrrc so --load and --mload are explicitly
+	// denied in the .mlrrc reader.
+	DSLPreloadFileNames []string
+
+	NRProgressMod int64
+	DoInPlace     bool // mlr -I
+	NoInput       bool // mlr -n
+
+	HaveRandSeed bool
+	RandSeed     int64
+
+	PrintElapsedTime bool // mlr --time
+}
+
+// Not usable until FinalizeReaderOptions and FinalizeWriterOptions are called.
+func DefaultOptions() *TOptions {
+	return &TOptions{
+		ReaderOptions: DefaultReaderOptions(),
+		WriterOptions: DefaultWriterOptions(),
+
+		FileNames:           make([]string, 0),
+		DSLPreloadFileNames: make([]string, 0),
+		NoInput:             false,
+	}
+}
+
+// Not usable until FinalizeReaderOptions is called on it.
+func DefaultReaderOptions() TReaderOptions {
+	return TReaderOptions{
+		InputFileFormat: "dkvp", // TODO: constify at top, or maybe formats.DKVP in package
+		// FinalizeReaderOptions will compute IFSRegex and IPSRegex.
+		IRS:               "\n",
+		IFS:               ",",
+		IPS:               "=",
+		CommentHandling:   CommentsAreData,
+		FileInputEncoding: lib.FileInputEncodingDefault,
+		GeneratorOptions: TGeneratorOptions{
+			FieldName:     DEFAULT_GEN_FIELD_NAME,
+			StartAsString: DEFAULT_GEN_START_AS_STRING,
+			StepAsString:  DEFAULT_GEN_STEP_AS_STRING,
+			StopAsString:  DEFAULT_GEN_STOP_AS_STRING,
+		},
+		DedupeFieldNames: true,
+
+		// TODO: comment
+		RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
+	}
+}
+
+// Not usable until FinalizeWriterOptions is called on it.
+func DefaultWriterOptions() TWriterOptions {
+	return TWriterOptions{
+		OutputFileFormat:   "dkvp",
+		ORS:                "\n",
+		OFS:                ",",
+		OPS:                "=",
+		FLATSEP:            ".",
+		FlushOnEveryRecord: true,
+
+		HeaderlessCSVOutput: false,
+
+		WrapJSONOutputInOuterList: true,
+		JSONOutputMultiline:       true,
+
+		AutoUnflatten: true,
+		AutoFlatten:   true,
+
+		FPOFMT: "",
+	}
+}
--- a/pkg/cli/separators.go
+++ b/pkg/cli/separators.go
@ -0,0 +1,130 @@
+package cli
+
+const COLON = ":"
+const COMMA = ","
+const CR = "\\r"
+const CRCR = "\\r\\r"
+const CRLF = "\\r\\n"
+const CRLFCRLF = "\\r\\n\\r\\n"
+const EQUALS = "="
+const LF = "\\n"
+const LFLF = "\\n\\n"
+const NEWLINE = "\\n"
+const PIPE = "|"
+const SEMICOLON = ";"
+const SLASH = "/"
+const SPACE = " "
+const TAB = "\\t"
+
+const SPACES_REGEX = "( )+"
+const TABS_REGEX = "(\\t)+"
+const WHITESPACE_REGEX = "([ \\t])+"
+
+const ASCII_ESC = "\\x1b"
+const ASCII_ETX = "\\x04"
+const ASCII_FS = "\\x1c"
+const ASCII_GS = "\\x1d"
+const ASCII_NULL = "\\x01"
+const ASCII_RS = "\\x1e"
+const ASCII_SOH = "\\x02"
+const ASCII_STX = "\\x03"
+const ASCII_US = "\\x1f"
+
+const ASV_FS = "\\x1f"
+const ASV_RS = "\\x1e"
+const USV_FS = "\\xe2\\x90\\x9f"
+const USV_RS = "\\xe2\\x90\\x9e"
+
+const ASV_FS_FOR_HELP = "\\x1f"
+const ASV_RS_FOR_HELP = "\\x1e"
+const USV_FS_FOR_HELP = "U+241F (UTF-8 \\xe2\\x90\\x9f)"
+const USV_RS_FOR_HELP = "U+241E (UTF-8 \\xe2\\x90\\x9e)"
+
+const DEFAULT_JSON_FLATTEN_SEPARATOR = "."
+
+var SEPARATOR_NAMES_TO_VALUES = map[string]string{
+	"ascii_esc":  ASCII_ESC,
+	"ascii_etx":  ASCII_ETX,
+	"ascii_fs":   ASCII_FS,
+	"ascii_gs":   ASCII_GS,
+	"ascii_null": ASCII_NULL,
+	"ascii_rs":   ASCII_RS,
+	"ascii_soh":  ASCII_SOH,
+	"ascii_stx":  ASCII_STX,
+	"ascii_us":   ASCII_US,
+	"asv_fs":     ASV_FS,
+	"asv_rs":     ASV_RS,
+	"colon":      COLON,
+	"comma":      COMMA,
+	"cr":         CR,
+	"crcr":       CRCR,
+	"crlf":       CRLF,
+	"crlfcrlf":   CRLFCRLF,
+	"equals":     EQUALS,
+	"lf":         LF,
+	"lflf":       LFLF,
+	"newline":    NEWLINE,
+	"pipe":       PIPE,
+	"semicolon":  SEMICOLON,
+	"slash":      SLASH,
+	"space":      SPACE,
+	"tab":        TAB,
+	"usv_fs":     USV_FS,
+	"usv_rs":     USV_RS,
+}
+
+var SEPARATOR_REGEX_NAMES_TO_VALUES = map[string]string{
+	"spaces":     SPACES_REGEX,
+	"tabs":       TABS_REGEX,
+	"whitespace": WHITESPACE_REGEX,
+}
+
+// E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc.
+
+var defaultFSes = map[string]string{
+	"csv":      ",",
+	"csvlite":  ",",
+	"dkvp":     ",",
+	"json":     "N/A", // not alterable; not parameterizable in JSON format
+	"nidx":     " ",
+	"markdown": " ",
+	"pprint":   " ",
+	"tsv":      "\t",
+	"xtab":     "\n", // todo: windows-dependent ...
+}
+
+var defaultPSes = map[string]string{
+	"csv":      "N/A",
+	"csvlite":  "N/A",
+	"dkvp":     "=",
+	"json":     "N/A", // not alterable; not parameterizable in JSON format
+	"markdown": "N/A",
+	"nidx":     "N/A",
+	"pprint":   "N/A",
+	"tsv":      "N/A",
+	"xtab":     " ",
+}
+
+var defaultRSes = map[string]string{
+	"csv":      "\n",
+	"csvlite":  "\n",
+	"dkvp":     "\n",
+	"json":     "N/A", // not alterable; not parameterizable in JSON format
+	"markdown": "\n",
+	"nidx":     "\n",
+	"pprint":   "\n",
+	"tsv":      "\n",
+	"xtab":     "\n\n", // todo: maybe jettison the idea of this being alterable
+}
+
+var defaultAllowRepeatIFSes = map[string]bool{
+	"csv":      false,
+	"csvlite":  false,
+	"dkvp":     false,
+	"json":     false,
+	"markdown": false,
+	"nidx":     false,
+	"pprint":   true,
+	"tsv":      false,
+	"xtab":     false,
+}
--- a/pkg/cli/verb_utils.go
+++ b/pkg/cli/verb_utils.go
@ -0,0 +1,72 @@
+// ================================================================
+// Utilities for Miller verbs to share for command-line parsing.
+// ================================================================
+
+package cli
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+
+	"github.com/johnkerl/miller/pkg/lib"
+)
+
+// For flags with values, e.g. ["-n" "10"], while we're looking at the "-n" this let us see if the "10" slot exists.
+// The verb is nominally something from a ways earlier in args[]; the opt is nominally what's at args[argi-1].
+// So this function should be called with args[argi] pointing to the "10" slot.
+func VerbCheckArgCount(verb string, opt string, args []string, argi int, argc int, n int) {
+	if (argc - argi) < n {
+		fmt.Fprintf(os.Stderr, "%s %s: option \"%s\" missing argument(s).\n",
+			"mlr", verb, opt,
+		)
+		os.Exit(1)
+	}
+}
+
+// E.g. with ["-f", "a,b,c"], makes sure there is something in the "a,b,c" position, and returns it.
+func VerbGetStringArgOrDie(verb string, opt string, args []string, pargi *int, argc int) string {
+	VerbCheckArgCount(verb, opt, args, *pargi, argc, 1)
+	retval := args[*pargi]
+	*pargi += 1
+	return retval
+}
+
+// E.g. with ["-f", "a,b,c"], makes sure there is something in the "a,b,c" position,
+// splits it on commas, and returns it.
+func VerbGetStringArrayArgOrDie(verb string, opt string, args []string, pargi *int, argc int) []string {
+	stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
+	return lib.SplitString(stringArg, ",")
+}
+
+// E.g. with ["-n", "10"], makes sure there is something in the "10" position,
+// scans it as int, and returns it.
+func VerbGetIntArgOrDie(verb string, opt string, args []string, pargi *int, argc int) int64 {
+	flag := args[*pargi]
+	stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
+	retval, err := strconv.ParseInt(stringArg, 10, 64)
+	if err != nil {
+		fmt.Fprintf(os.Stderr,
+			"%s %s: could not scan flag \"%s\" argument \"%s\" as int.\n",
+			"mlr", verb, flag, stringArg,
+		)
+		os.Exit(1)
+	}
+	return retval
+}
+
+// E.g. with ["-n", "10.3"], makes sure there is something in the "10.3"
+// position, scans it as float, and returns it.
+func VerbGetFloatArgOrDie(verb string, opt string, args []string, pargi *int, argc int) float64 {
+	flag := args[*pargi]
+	stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
+	retval, err := strconv.ParseFloat(stringArg, 64)
+	if err != nil {
+		fmt.Fprintf(os.Stderr,
+			"%s %s: could not scan flag \"%s\" argument \"%s\" as float.\n",
+			"mlr", verb, flag, stringArg,
+		)
+		os.Exit(1)
+	}
+	return retval
+}