Export library code in pkg/ (#1391)

* Export library code in `pkg/`

* new doc page
This commit is contained in:
John Kerl 2023-09-10 17:15:13 -04:00 committed by GitHub
parent 93b7c8eac0
commit 268a96d002
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
358 changed files with 1076 additions and 693 deletions

4
pkg/cli/README.md Normal file
View file

@ -0,0 +1,4 @@
Datatypes for parsing the Miller command line, and the flags table.
* `pkg/climain` is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer chain of `put` then `filter`, and a JSON record-writer.
* `pkg/cli` contains datatypes and the flags table for the CLI-parser, which was split out to avoid a Go package-import cycle.

3
pkg/cli/doc.go Normal file
View file

@ -0,0 +1,3 @@
// Package cli contains the flags table and associated datatypes for parsing
// the Miller command line.
package cli

535
pkg/cli/flag_types.go Normal file
View file

@ -0,0 +1,535 @@
// ================================================================
// Miller support for command-line flags.
//
// * Flags are used for several purposes:
//
// o Command-line parsing the main mlr program.
//
// o Record-reader and record-writer options for a few verbs such as join and
// tee. E.g. `mlr --csv join -f foo.tsv --tsv ...`: the main input files are
// CSV but the join-in file is TSV>
//
// o Processing .mlrrc files.
//
// o Autogenerating on-line help for `mlr help flags`.
//
// o Autogenerating the manpage for `man mlr`.
//
// o Autogenerating webdocs (mkdocs).
//
// * For these reasons, flags are organized into tables; for documentation
// purposes, flags are organized into sections (see pkg/cli/option_parse.go).
//
// * The Flag struct separates out flag name (e.g. `--csv`), any alternate
// names (e.g. `-c`), any arguments the flag may take, a help string, and a
// command-line parser function.
//
// * The tabular structure may seem overwrought; in fact it has been a blessing
// to develop the tabular structure since these flags objects need to serve
// so many roles as listed above.
//
// * I don't use Go flags for a few reasons. The most important one is that I
// need to handle repeated flags, e.g. --from can be used more than once for
// mlr, and -f/-n/-r etc can be used more than once for mlr sort, etc. I also
// insist on total control of flag formatting including alphabetization of
// flags for on-line help and documentation systems.
// ================================================================
package cli
import (
"fmt"
"sort"
"strings"
"github.com/johnkerl/miller/pkg/colorizer"
"github.com/johnkerl/miller/pkg/lib"
)
// ----------------------------------------------------------------
// Data types used within the flags table.
// FlagParser is a function which takes a flag such as `--foo`.
// - It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
// - The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
// - Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
type FlagParser func(
args []string,
argc int,
pargi *int,
options *TOptions,
)
// ----------------------------------------------------------------
// FlagTable holds all the flags for Miller, organized into sections.
type FlagTable struct {
sections []*FlagSection
}
// FlagSection holds all the flags in a given cateogory, where these
// categories exist for documentation purposes.
//
// The name should be right-cased for webdocs. For on-line help and
// manpage use, it will get fully uppercased.
//
// The infoPrinter provides summary/overview for all flags in the
// section, for on-line help / webdocs.
type FlagSection struct {
name string
infoPrinter func()
flags []Flag
}
// Flag is a container for all runtime as well as documentation information for
// a flag.
type Flag struct {
// In most cases, the flag has just one spelling, like "--ifs".
name string
// In some cases, the flag has more than one spelling, like "-h" and
// "--help", or "-c" and "--csv". The altNames field can be omitted from
// struct initializers, which in Go means it will read as nil.
altNames []string
// If not "", a name for the flag's argument, for on-line help. E.g. the
// "bar" in ""--foo {bar}". It should always be written in curly braces.
arg string
// Help string for `mlr help flags`, `man mlr`, and webdocs.
// * It should be all one line within the source code. The text will be
// reformatted as a paragraph for on-line help / manpage, so there should
// be no attempt at line-breaking within the help string.
// * Any code bits should be marked with backticks. These look OK for
// on-line help / manpage, and render marvelously for webdocs which
// take markdown.
// * After changing flags you can run `make precommit` in the Miller
// repo base directory followed by `git diff` to see how the output
// looks. See also the README.md files in the docs and man directories
// for how to look at the autogenned docs pre-commit.
help string
// A function for parsing the command line, as described above.
parser FlagParser
// For format-conversion keystroke-savers, a matrix is plenty -- we don't
// need to print a tedious 60-line list.
suppressFlagEnumeration bool
}
// ================================================================
// FlagTable methods
// Sort organizes the sections in the table alphabetically, to make on-line
// help easier to read. This is done from func-init context so on-line help
// will always be easy to navigate.
func (ft *FlagTable) Sort() {
// Go sort API: for ascending sort, return true if element i < element j.
sort.Slice(ft.sections, func(i, j int) bool {
return strings.ToLower(ft.sections[i].name) < strings.ToLower(ft.sections[j].name)
})
}
// Parse is for parsing a flag on the command line. Given say `--foo`, if a
// Flag object is found which owns the flag, and if its parser accepts it (e.g.
// `bar` is present and spelt correctly if the flag-parser expects `--foo bar`)
// then the return value is true, else false.
func (ft *FlagTable) Parse(
args []string,
argc int,
pargi *int,
options *TOptions,
) bool {
for _, section := range ft.sections {
for _, flag := range section.flags {
if flag.Owns(args[*pargi]) {
// Let the flag-parser advance *pargi, depending on how many
// arguments follow the flag. E.g. `--ifs pipe` will advance
// *pargi by 2; `-I` will advance it by 1.
oargi := *pargi
flag.parser(args, argc, pargi, options)
nargi := *pargi
return nargi > oargi
}
}
}
return false
}
// ShowHelp prints all-in-one on-line help, nominally for `mlr help flags`.
func (ft *FlagTable) ShowHelp() {
for i, section := range ft.sections {
if i > 0 {
fmt.Println()
}
fmt.Println(colorizer.MaybeColorizeHelp(strings.ToUpper(section.name), true))
fmt.Println()
section.PrintInfo()
section.ShowHelpForFlags()
}
}
// ListFlagSections exposes some of the flags-table structure, so Ruby autogen
// scripts for on-line help and webdocs can traverse the structure with looping
// inside their own code.
func (ft *FlagTable) ListFlagSections() {
for _, section := range ft.sections {
fmt.Println(section.name)
}
}
// PrintInfoForSection exposes some of the flags-table structure, so Ruby
// autogen scripts for on-line help and webdocs can traverse the structure with
// looping inside their own code.
func (ft *FlagTable) ShowHelpForSection(sectionName string) bool {
for _, section := range ft.sections {
if sectionName == section.name {
section.PrintInfo()
section.ShowHelpForFlags()
return true
}
}
return false
}
// Sections are named like "CSV-only flags". `mlr help` uses `mlr help
// csv-only-flags`. The latter is downcased from the former, with spaces
// replaced by dashes -- hence "downdashed section name". Here we look up
// flag-section help given a downdashed section name.
func (ft *FlagTable) ShowHelpForSectionViaDowndash(downdashSectionName string) bool {
for _, section := range ft.sections {
if downdashSectionName == section.GetDowndashSectionName() {
fmt.Println(colorizer.MaybeColorizeHelp(strings.ToUpper(section.name), true))
section.PrintInfo()
section.ShowHelpForFlags()
return true
}
}
return false
}
// PrintInfoForSection exposes some of the flags-table structure, so Ruby
// autogen scripts for on-line help and webdocs can traverse the structure with
// looping inside their own code.
func (ft *FlagTable) PrintInfoForSection(sectionName string) bool {
for _, section := range ft.sections {
if sectionName == section.name {
section.PrintInfo()
return true
}
}
return false
}
// ListFlagsForSection exposes some of the flags-table structure, so Ruby
// autogen scripts for on-line help and webdocs can traverse the structure with
// looping inside their own code.
func (ft *FlagTable) ListFlagsForSection(sectionName string) bool {
for _, section := range ft.sections {
if sectionName == section.name {
section.ListFlags()
return true
}
}
return false
}
// Given flag named `--foo`, altName `-f`, and argument spec `{bar}`, the
// headline is `--foo or -f {bar}`. This is the bit which is highlighted in
// on-line help; its length is also used for alignment decisions in the on-line
// help and the manapge.
func (ft *FlagTable) ShowHeadlineForFlag(flagName string) bool {
for _, fs := range ft.sections {
for _, flag := range fs.flags {
if flag.Owns(flagName) {
fmt.Println(flag.GetHeadline())
return true
}
}
}
return false
}
// ShowHelpForFlag prints the flag's help-string all on one line. This is for
// webdoc usage where the browser does dynamic line-wrapping, as the user
// resizes the browser window.
func (ft *FlagTable) ShowHelpForFlag(flagName string) bool {
return ft.showHelpForFlagMaybeWithName(flagName, false)
}
// ShowHelpForFlagWithName prints the flag's name colorized, then flag's
// help-string all on one line. This is for on-line help usage.
func (ft *FlagTable) ShowHelpForFlagWithName(flagName string) bool {
return ft.showHelpForFlagMaybeWithName(flagName, true)
}
// showHelpForFlagMaybeWithName supports ShowHelpForFlag and ShowHelpForFlagWithName.
// webdoc usage where the browser does dynamic line-wrapping, as the user
// resizes the browser window.
func (ft *FlagTable) showHelpForFlagMaybeWithName(flagName string, showName bool) bool {
for _, fs := range ft.sections {
for _, flag := range fs.flags {
if flag.Owns(flagName) {
if showName {
fmt.Println(colorizer.MaybeColorizeHelp(flagName, true))
}
fmt.Println(flag.GetHelpOneLine())
return true
}
}
}
return false
}
// ShowHelpForFlagApproximateWithName is like ShowHelpForFlagWithName
// but allows substring matches. This is for on-line help usage.
func (ft *FlagTable) ShowHelpForFlagApproximateWithName(searchString string) bool {
for _, fs := range ft.sections {
for _, flag := range fs.flags {
if flag.Matches(searchString) {
fmt.Println(colorizer.MaybeColorizeHelp(flag.name, true))
fmt.Println(flag.GetHelpOneLine())
}
}
}
return false
}
// Map "CSV-only flags" to "csv-only-flags" etc. for the benefit of per-section
// help in `mlr help topics`.
func (ft *FlagTable) GetDowndashSectionNames() []string {
downdashSectionNames := make([]string, len(ft.sections))
for i, fs := range ft.sections {
// Get names like "CSV-only flags" from the FLAG_TABLE.
// Downcase and replace spaces with dashes to get names like
// "csv-only-flags"
downdashSectionNames[i] = fs.GetDowndashSectionName()
}
return downdashSectionNames
}
// NilCheck checks to see if any flag/section is missing help info. This arises
// since in Go you needn't specify all struct initializers, so for example a
// Flag struct-initializer which doesn't say `help: "..."` will have empty help
// string. This nil-checking doesn't need to be done on every Miller
// invocation, but rather, only at build time. The `mlr help` terminal has an
// entrypoint wherein a regression-test case can do `mlr help nil-check` and
// make this function exits cleanly.
func (ft *FlagTable) NilCheck() {
lib.InternalCodingErrorWithMessageIf(ft.sections == nil, "Nil table sections")
lib.InternalCodingErrorWithMessageIf(len(ft.sections) == 0, "Zero table sections")
for _, fs := range ft.sections {
fs.NilCheck()
}
fmt.Println("Flag-table nil check completed successfully.")
}
// ================================================================
// FlagSection methods
// Sort organizes the flags in the section alphabetically, to make on-line help
// easier to read. This is done from func-init context so on-line help will
// always be easy to navigate.
func (fs *FlagSection) Sort() {
// Go sort API: for ascending sort, return true if element i < element j.
sort.Slice(fs.flags, func(i, j int) bool {
return strings.ToLower(fs.flags[i].name) < strings.ToLower(fs.flags[j].name)
})
}
// ShowHelpForFlags prints all-in-one on-line help, nominally for `mlr help
// flags`.
func (fs *FlagSection) ShowHelpForFlags() {
for _, flag := range fs.flags {
// For format-conversion keystroke-savers, a matrix is plenty -- we don't
// need to print a tedious 60-line list.
if flag.suppressFlagEnumeration {
continue
}
flag.ShowHelp()
}
}
// PrintInfo exposes some of the flags-table structure, so Ruby autogen scripts
// for on-line help and webdocs can traverse the structure with looping inside
// their own code.
func (fs *FlagSection) PrintInfo() {
fs.infoPrinter()
fmt.Println()
}
// ListFlags exposes some of the flags-table structure, so Ruby autogen scripts
// for on-line help and webdocs can traverse the structure with looping inside
// their own code.
func (fs *FlagSection) ListFlags() {
for _, flag := range fs.flags {
fmt.Println(flag.name)
}
}
// Map "CSV-only flags" to "csv-only-flags" etc. for the benefit of per-section
// help in `mlr help topics`.
func (fs *FlagSection) GetDowndashSectionName() string {
return strings.ReplaceAll(strings.ToLower(fs.name), " ", "-")
}
// See comments above FlagTable's NilCheck method.
func (fs *FlagSection) NilCheck() {
lib.InternalCodingErrorWithMessageIf(fs.name == "", "Empty section name")
lib.InternalCodingErrorWithMessageIf(fs.infoPrinter == nil, "Nil infoPrinter for section "+fs.name)
lib.InternalCodingErrorWithMessageIf(fs.flags == nil, "Nil flags for section "+fs.name)
lib.InternalCodingErrorWithMessageIf(len(fs.flags) == 0, "Zero flags for section "+fs.name)
for _, flag := range fs.flags {
flag.NilCheck()
}
}
// ================================================================
// Flag methods
// Owns determines whether this object handles a command-line flag such as
// "--foo". This is used for command-line parsing, as well as for on-line help
// with exact match on flag name.
func (flag *Flag) Owns(input string) bool {
if flag.name == input {
return true
}
for _, name := range flag.altNames {
if name == input {
return true
}
}
return false
}
// Matches is like Owns but is for substring matching, for on-line help with
// approximate match on flag name.
func (flag *Flag) Matches(input string) bool {
if strings.Contains(flag.name, input) {
return true
}
for _, name := range flag.altNames {
if strings.Contains(name, input) {
return true
}
}
return false
}
// ShowHelp produces formatting for `mlr help flags` and manpage use.
// Example:
// * Flag name is `--foo`
// * altName is `-f`
// * Argument spec is `{bar}`
// * Help string is "Lorem ipsum dolor sit amet, consectetur adipiscing elit,
// sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim
// ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
// ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate
// velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat
// cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id
// est laborum."
// * The headline (see the GetHeadline function) is `--foo or -f {bar}`.
// * We place the headline left in a 25-character column, colorized with the
// help color.
// * We format the help text as 55-character lines and place them
// to the right.
// * The result looks like
//
// --foo or -f {bar} Lorem ipsum dolor sit amet, consectetur adipiscing
// elit, sed do eiusmod tempor incididunt ut labore et
// dolore magna aliqua. Ut enim ad minim veniam, quis
// nostrud exercitation ullamco laboris nisi ut aliquip
// ex ea commodo consequat. Duis aute irure dolor in
// reprehenderit in voluptate velit esse cillum dolore
// eu fugiat nulla pariatur. Excepteur sint occaecat
// cupidatat non proident, sunt in culpa qui officia
// deserunt mollit anim id est laborum.
//
// * If the headline is too long we put the first help line a line below like this:
//
// --foo-flag-is-very-very-long {bar}
// Lorem ipsum dolor sit amet, consectetur adipiscing
// elit, sed do eiusmod tempor incididunt ut labore et
// dolore magna aliqua. Ut enim ad minim veniam, quis
// nostrud exercitation ullamco laboris nisi ut aliquip
// ex ea commodo consequat. Duis aute irure dolor in
// reprehenderit in voluptate velit esse cillum dolore
// eu fugiat nulla pariatur. Excepteur sint occaecat
// cupidatat non proident, sunt in culpa qui officia
// deserunt mollit anim id est laborum.
//
func (flag *Flag) ShowHelp() {
headline := flag.GetHeadline()
displayHeadline := fmt.Sprintf("%-25s", headline)
broken := len(headline) >= 25
helpLines := lib.FormatAsParagraph(flag.help, 55)
if broken {
fmt.Printf("%s\n", colorizer.MaybeColorizeHelp(displayHeadline, true))
for _, helpLine := range helpLines {
fmt.Printf("%25s%s\n", " ", helpLine)
}
} else {
fmt.Printf("%s", colorizer.MaybeColorizeHelp(displayHeadline, true))
if len(helpLines) == 0 {
fmt.Println()
}
for i, helpLine := range helpLines {
if i == 0 {
fmt.Printf("%s\n", helpLine)
} else {
fmt.Printf("%25s%s\n", " ", helpLine)
}
}
}
}
// GetHeadline puts together the flag name, any altNames, and any argument spec
// into a single string for the left column of online help / manpage content.
// Given flag named `--foo`, altName `-f`, and argument spec `{bar}`, the
// headline is `--foo or -f {bar}`. This is the bit which is highlighted in
// on-line help; its length is also used for alignment decisions in the on-line
// help and the manapge.
func (flag *Flag) GetHeadline() string {
displayNames := make([]string, 1)
displayNames[0] = flag.name
if flag.altNames != nil {
displayNames = append(displayNames, flag.altNames...)
}
displayText := strings.Join(displayNames, " or ")
if flag.arg != "" {
displayText += " "
displayText += flag.arg
}
return displayText
}
// Gets the help string all on one line (just in case anyone typed it in using
// multiline string-literal backtick notation in Go). This is suitable for
// webdoc use where we create all one line, and the browser dynamically
// line-wraps as the user resizes the window.
func (flag *Flag) GetHelpOneLine() string {
return strings.Join(strings.Split(flag.help, "\n"), " ")
}
// See comments above FlagTable's NilCheck method.
func (flag *Flag) NilCheck() {
lib.InternalCodingErrorWithMessageIf(flag.name == "", "Empty flag name")
lib.InternalCodingErrorWithMessageIf(flag.help == "", "Empty flag help for flag "+flag.name)
lib.InternalCodingErrorWithMessageIf(flag.parser == nil, "Nil parser help for flag "+flag.name)
}
// ================================================================
// Helper methods
// NoOpParse1 is a helper function for flags which take no argument and are
// backward-compatibility no-ops.
func NoOpParse1(args []string, argc int, pargi *int, options *TOptions) {
*pargi += 1
}

View file

@ -0,0 +1,79 @@
package cli
// ================================================================
// Decide whether to insert a flatten or unflatten verb at the end of the
// chain. See also repl/verbs.go which handles the same issue in the REPL.
//
// ----------------------------------------------------------------
// PROBLEM TO BE SOLVED:
//
// JSON has nested structures and CSV et al. do not. For example:
// {
// "req" : {
// "method": "GET",
// "path": "api/check",
// }
// }
//
// For CSV we flatten this down to
//
// {
// "req.method": "GET",
// "req.path": "api/check"
// }
//
// ----------------------------------------------------------------
// APPROACH:
//
// Use the Principle of Least Surprise (POLS).
//
// * If input is JSON and output is JSON:
// o Records can be nested from record-read
// o They remain that way through the Miller record-processing stream
// o They are nested on record-write
// o No action needs to be taken
//
// * If input is JSON and output is non-JSON:
// o Records can be nested from record-read
// o They remain that way through the Miller record-processing stream
// o On record-write, nested structures will be converted to string (carriage
// returns and all) using json_stringify. People *might* want this but
// (using POLS) we will (by default) AUTO-FLATTEN for them. There is a
// --no-auto-unflatten CLI flag for those who want it.
//
// * If input is non-JSON and output is non-JSON:
// o If there is a "req.method" field, people should be able to do
// 'mlr sort -f req.method' with no surprises. (Again, POLS.) Therefore
// no auto-unflatten on input. People can insert an unflatten verb
// into their verb chain if they really want unflatten for non-JSON
// files.
// o The DSL can make nested data, so AUTO-FLATTEN at output.
//
// * If input is non-JSON and output is JSON:
// o Default is to auto-unflatten at output.
// o There is a --no-auto-unflatten for those who want it.
// ================================================================
func DecideFinalFlatten(writerOptions *TWriterOptions) bool {
ofmt := writerOptions.OutputFileFormat
if writerOptions.AutoFlatten {
if ofmt != "json" {
return true
}
}
return false
}
func DecideFinalUnflatten(options *TOptions) bool {
ifmt := options.ReaderOptions.InputFileFormat
ofmt := options.WriterOptions.OutputFileFormat
if options.WriterOptions.AutoUnflatten {
if ifmt != "json" {
if ofmt == "json" {
return true
}
}
}
return false
}

38
pkg/cli/mlrcli_util.go Normal file
View file

@ -0,0 +1,38 @@
package cli
import (
"fmt"
"os"
)
// CheckArgCount is for flags with values, e.g. ["-n" "10"], while we're
// looking at the "-n": this let us see if the "10" slot exists.
func CheckArgCount(args []string, argi int, argc int, n int) {
if (argc - argi) < n {
fmt.Fprintf(os.Stderr, "%s: option \"%s\" missing argument(s).\n", "mlr", args[argi])
fmt.Fprintf(os.Stderr, "Please run \"%s --help\" for detailed usage information.\n", "mlr")
os.Exit(1)
}
}
// SeparatorFromArg is for letting people do things like `--ifs pipe`
// rather than `--ifs '|'`.
func SeparatorFromArg(name string) string {
sep, ok := SEPARATOR_NAMES_TO_VALUES[name]
if ok {
return sep
} else {
return name
}
}
// SeparatorRegexFromArg is for letting people do things like `--ifs-regex whitespace`
// rather than `--ifs '([ \t])+'`.
func SeparatorRegexFromArg(name string) string {
sep, ok := SEPARATOR_REGEX_NAMES_TO_VALUES[name]
if ok {
return sep
} else {
return name
}
}

3032
pkg/cli/option_parse.go Normal file

File diff suppressed because it is too large Load diff

226
pkg/cli/option_types.go Normal file
View file

@ -0,0 +1,226 @@
// ================================================================
// Items which might better belong in miller/cli, but which are placed in a
// deeper package to avoid a package-dependency cycle between miller/cli and
// miller/transforming.
// ================================================================
package cli
import (
"regexp"
"github.com/johnkerl/miller/pkg/lib"
)
type TCommentHandling int
const (
CommentsAreData TCommentHandling = iota
SkipComments
PassComments
)
const DEFAULT_COMMENT_STRING = "#"
const DEFAULT_GEN_FIELD_NAME = "i"
const DEFAULT_GEN_START_AS_STRING = "1"
const DEFAULT_GEN_STEP_AS_STRING = "1"
const DEFAULT_GEN_STOP_AS_STRING = "100"
const DEFAULT_RECORDS_PER_BATCH = 500
type TGeneratorOptions struct {
FieldName string
StartAsString string
StepAsString string
StopAsString string
}
type TReaderOptions struct {
InputFileFormat string
IFS string
IPS string
IRS string
AllowRepeatIFS bool
IFSRegex *regexp.Regexp
IPSRegex *regexp.Regexp
DedupeFieldNames bool
// If unspecified on the command line, these take input-format-dependent
// defaults. E.g. default FS is comma for DKVP but space for NIDX;
// default AllowRepeatIFS is false for CSV but true for PPRINT.
ifsWasSpecified bool
ipsWasSpecified bool
irsWasSpecified bool
allowRepeatIFSWasSpecified bool
UseImplicitCSVHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool
CommentHandling TCommentHandling
CommentString string
// Fake internal-data-generator 'reader'
GeneratorOptions TGeneratorOptions
// For out-of-process handling of compressed data, via popen
Prepipe string
// For most things like gunzip we do 'gunzip < filename | mlr ...' if
// filename is present, else 'gunzip | mlr ...' if reading from stdin.
// However some commands like 'unzip -qc' are weird so this option lets
// people give the command and we won't insert the '<'.
PrepipeIsRaw bool
// For in-process gunzip/bunzip2/zcat (distinct from prepipe)
FileInputEncoding lib.TFileInputEncoding
// TODO: comment
RecordsPerBatch int64
}
// ----------------------------------------------------------------
type TWriterOptions struct {
OutputFileFormat string
ORS string
OFS string
OPS string
FLATSEP string
FlushOnEveryRecord bool
flushOnEveryRecordWasSpecified bool
// If unspecified on the command line, these take input-format-dependent
// defaults. E.g. default FS is comma for DKVP but space for NIDX.
ofsWasSpecified bool
opsWasSpecified bool
orsWasSpecified bool
HeaderlessCSVOutput bool
BarredPprintOutput bool
RightAlignedPPRINTOutput bool
RightAlignedXTABOutput bool
// JSON output: --jlistwrap on, --jvstack on
// JSON Lines output: --jlistwrap off, --jvstack off
WrapJSONOutputInOuterList bool // --jlistwrap
JSONOutputMultiline bool // --jvstack
JVQuoteAll bool // --jvquoteall
// Not using miller/types enum to avoid package cycle
CSVQuoteAll bool // --quote-all
// When we read things like
//
// x:a=1,x:b=2
//
// which is how we write out nested data structures for non-nested formats
// (all but JSON), the default behavior is to unflatten them back to
//
// {"x": {"a": 1}, {"b": 2}}
//
// unless the user explicitly asks to suppress that.
AutoUnflatten bool
// The default behavior is to flatten nested data structures like
//
// {"x": {"a": 1}, {"b": 2}}
//
// down to
//
// x:a=1,x:b=2
//
// which is how we write out nested data structures for non-nested formats
// (all but JSON) -- unless the user explicitly asks to suppress that.
AutoFlatten bool
// For floating-point numbers: "" means use the Go default.
FPOFMT string
// Fatal the process when error data in a given record is about to be output.
FailOnDataError bool
}
// ----------------------------------------------------------------
type TOptions struct {
ReaderOptions TReaderOptions
WriterOptions TWriterOptions
// Data files to be operated on: e.g. given 'mlr cat foo.dat bar.dat', this
// is ["foo.dat", "bar.dat"].
FileNames []string
// DSL files to be loaded for every put/filter operation -- like 'put -f'
// or 'filter -f' but specified up front on the command line, suitable for
// .mlrrc. Use-case is someone has DSL functions they always want to be
// defined.
//
// Risk of CVE if this is in .mlrrc so --load and --mload are explicitly
// denied in the .mlrrc reader.
DSLPreloadFileNames []string
NRProgressMod int64
DoInPlace bool // mlr -I
NoInput bool // mlr -n
HaveRandSeed bool
RandSeed int64
PrintElapsedTime bool // mlr --time
}
// Not usable until FinalizeReaderOptions and FinalizeWriterOptions are called.
func DefaultOptions() *TOptions {
return &TOptions{
ReaderOptions: DefaultReaderOptions(),
WriterOptions: DefaultWriterOptions(),
FileNames: make([]string, 0),
DSLPreloadFileNames: make([]string, 0),
NoInput: false,
}
}
// Not usable until FinalizeReaderOptions is called on it.
func DefaultReaderOptions() TReaderOptions {
return TReaderOptions{
InputFileFormat: "dkvp", // TODO: constify at top, or maybe formats.DKVP in package
// FinalizeReaderOptions will compute IFSRegex and IPSRegex.
IRS: "\n",
IFS: ",",
IPS: "=",
CommentHandling: CommentsAreData,
FileInputEncoding: lib.FileInputEncodingDefault,
GeneratorOptions: TGeneratorOptions{
FieldName: DEFAULT_GEN_FIELD_NAME,
StartAsString: DEFAULT_GEN_START_AS_STRING,
StepAsString: DEFAULT_GEN_STEP_AS_STRING,
StopAsString: DEFAULT_GEN_STOP_AS_STRING,
},
DedupeFieldNames: true,
// TODO: comment
RecordsPerBatch: DEFAULT_RECORDS_PER_BATCH,
}
}
// Not usable until FinalizeWriterOptions is called on it.
func DefaultWriterOptions() TWriterOptions {
return TWriterOptions{
OutputFileFormat: "dkvp",
ORS: "\n",
OFS: ",",
OPS: "=",
FLATSEP: ".",
FlushOnEveryRecord: true,
HeaderlessCSVOutput: false,
WrapJSONOutputInOuterList: true,
JSONOutputMultiline: true,
AutoUnflatten: true,
AutoFlatten: true,
FPOFMT: "",
}
}

130
pkg/cli/separators.go Normal file
View file

@ -0,0 +1,130 @@
package cli
const COLON = ":"
const COMMA = ","
const CR = "\\r"
const CRCR = "\\r\\r"
const CRLF = "\\r\\n"
const CRLFCRLF = "\\r\\n\\r\\n"
const EQUALS = "="
const LF = "\\n"
const LFLF = "\\n\\n"
const NEWLINE = "\\n"
const PIPE = "|"
const SEMICOLON = ";"
const SLASH = "/"
const SPACE = " "
const TAB = "\\t"
const SPACES_REGEX = "( )+"
const TABS_REGEX = "(\\t)+"
const WHITESPACE_REGEX = "([ \\t])+"
const ASCII_ESC = "\\x1b"
const ASCII_ETX = "\\x04"
const ASCII_FS = "\\x1c"
const ASCII_GS = "\\x1d"
const ASCII_NULL = "\\x01"
const ASCII_RS = "\\x1e"
const ASCII_SOH = "\\x02"
const ASCII_STX = "\\x03"
const ASCII_US = "\\x1f"
const ASV_FS = "\\x1f"
const ASV_RS = "\\x1e"
const USV_FS = "\\xe2\\x90\\x9f"
const USV_RS = "\\xe2\\x90\\x9e"
const ASV_FS_FOR_HELP = "\\x1f"
const ASV_RS_FOR_HELP = "\\x1e"
const USV_FS_FOR_HELP = "U+241F (UTF-8 \\xe2\\x90\\x9f)"
const USV_RS_FOR_HELP = "U+241E (UTF-8 \\xe2\\x90\\x9e)"
const DEFAULT_JSON_FLATTEN_SEPARATOR = "."
var SEPARATOR_NAMES_TO_VALUES = map[string]string{
"ascii_esc": ASCII_ESC,
"ascii_etx": ASCII_ETX,
"ascii_fs": ASCII_FS,
"ascii_gs": ASCII_GS,
"ascii_null": ASCII_NULL,
"ascii_rs": ASCII_RS,
"ascii_soh": ASCII_SOH,
"ascii_stx": ASCII_STX,
"ascii_us": ASCII_US,
"asv_fs": ASV_FS,
"asv_rs": ASV_RS,
"colon": COLON,
"comma": COMMA,
"cr": CR,
"crcr": CRCR,
"crlf": CRLF,
"crlfcrlf": CRLFCRLF,
"equals": EQUALS,
"lf": LF,
"lflf": LFLF,
"newline": NEWLINE,
"pipe": PIPE,
"semicolon": SEMICOLON,
"slash": SLASH,
"space": SPACE,
"tab": TAB,
"usv_fs": USV_FS,
"usv_rs": USV_RS,
}
var SEPARATOR_REGEX_NAMES_TO_VALUES = map[string]string{
"spaces": SPACES_REGEX,
"tabs": TABS_REGEX,
"whitespace": WHITESPACE_REGEX,
}
// E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc.
var defaultFSes = map[string]string{
"csv": ",",
"csvlite": ",",
"dkvp": ",",
"json": "N/A", // not alterable; not parameterizable in JSON format
"nidx": " ",
"markdown": " ",
"pprint": " ",
"tsv": "\t",
"xtab": "\n", // todo: windows-dependent ...
}
var defaultPSes = map[string]string{
"csv": "N/A",
"csvlite": "N/A",
"dkvp": "=",
"json": "N/A", // not alterable; not parameterizable in JSON format
"markdown": "N/A",
"nidx": "N/A",
"pprint": "N/A",
"tsv": "N/A",
"xtab": " ",
}
var defaultRSes = map[string]string{
"csv": "\n",
"csvlite": "\n",
"dkvp": "\n",
"json": "N/A", // not alterable; not parameterizable in JSON format
"markdown": "\n",
"nidx": "\n",
"pprint": "\n",
"tsv": "\n",
"xtab": "\n\n", // todo: maybe jettison the idea of this being alterable
}
var defaultAllowRepeatIFSes = map[string]bool{
"csv": false,
"csvlite": false,
"dkvp": false,
"json": false,
"markdown": false,
"nidx": false,
"pprint": true,
"tsv": false,
"xtab": false,
}

72
pkg/cli/verb_utils.go Normal file
View file

@ -0,0 +1,72 @@
// ================================================================
// Utilities for Miller verbs to share for command-line parsing.
// ================================================================
package cli
import (
"fmt"
"os"
"strconv"
"github.com/johnkerl/miller/pkg/lib"
)
// For flags with values, e.g. ["-n" "10"], while we're looking at the "-n" this let us see if the "10" slot exists.
// The verb is nominally something from a ways earlier in args[]; the opt is nominally what's at args[argi-1].
// So this function should be called with args[argi] pointing to the "10" slot.
func VerbCheckArgCount(verb string, opt string, args []string, argi int, argc int, n int) {
if (argc - argi) < n {
fmt.Fprintf(os.Stderr, "%s %s: option \"%s\" missing argument(s).\n",
"mlr", verb, opt,
)
os.Exit(1)
}
}
// E.g. with ["-f", "a,b,c"], makes sure there is something in the "a,b,c" position, and returns it.
func VerbGetStringArgOrDie(verb string, opt string, args []string, pargi *int, argc int) string {
VerbCheckArgCount(verb, opt, args, *pargi, argc, 1)
retval := args[*pargi]
*pargi += 1
return retval
}
// E.g. with ["-f", "a,b,c"], makes sure there is something in the "a,b,c" position,
// splits it on commas, and returns it.
func VerbGetStringArrayArgOrDie(verb string, opt string, args []string, pargi *int, argc int) []string {
stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
return lib.SplitString(stringArg, ",")
}
// E.g. with ["-n", "10"], makes sure there is something in the "10" position,
// scans it as int, and returns it.
func VerbGetIntArgOrDie(verb string, opt string, args []string, pargi *int, argc int) int64 {
flag := args[*pargi]
stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
retval, err := strconv.ParseInt(stringArg, 10, 64)
if err != nil {
fmt.Fprintf(os.Stderr,
"%s %s: could not scan flag \"%s\" argument \"%s\" as int.\n",
"mlr", verb, flag, stringArg,
)
os.Exit(1)
}
return retval
}
// E.g. with ["-n", "10.3"], makes sure there is something in the "10.3"
// position, scans it as float, and returns it.
func VerbGetFloatArgOrDie(verb string, opt string, args []string, pargi *int, argc int) float64 {
flag := args[*pargi]
stringArg := VerbGetStringArgOrDie(verb, opt, args, pargi, argc)
retval, err := strconv.ParseFloat(stringArg, 64)
if err != nil {
fmt.Fprintf(os.Stderr,
"%s %s: could not scan flag \"%s\" argument \"%s\" as float.\n",
"mlr", verb, flag, stringArg,
)
os.Exit(1)
}
return retval
}