From 6ccf34766b64d32d5e327d15052c21f26722e705 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 24 Mar 2021 00:07:20 -0400 Subject: [PATCH] Neatens for parser-experimental, and go test ./... --- .gitignore | 7 + docs/build.rst.in | 1 + go/.gitignore | 7 - go/.vimrc | 4 +- go/README.md | 193 +----------------- go/build | 2 +- go/parser-experiments/README.md | 2 - go/reg-test/expected/case-cli-help.sh.out | 5 +- go/src/auxents/repl/session.go | 4 +- go/src/cli/mlrcli.go | 1 - go/src/cli/mlrcli_usage.go | 24 ++- go/src/cliutil/option_parse.go | 2 +- go/src/dsl/ast_test.go | 60 ------ go/src/lib/lib_test.go | 5 +- go/u/tester | 11 - .../.vimrc | 0 parser-experiments/README.md | 3 + .../one/build | 6 +- .../one/main.go | 0 .../one/run | 0 .../one/semi1.bnf | 0 .../one/semi2.bnf | 0 .../two/brackets.bnf | 0 .../two/build | 0 .../two/emit01.bnf | 0 .../two/emit02.bnf | 0 .../two/main.go | 0 .../two/mktags | 0 .../two/run | 0 .../two/semi3.bnf | 0 .../two/temp.bnf | 0 31 files changed, 41 insertions(+), 296 deletions(-) delete mode 100644 go/parser-experiments/README.md delete mode 100644 go/src/dsl/ast_test.go delete mode 100755 go/u/tester rename {go/parser-experiments => parser-experiments}/.vimrc (100%) create mode 100644 parser-experiments/README.md rename {go/parser-experiments => parser-experiments}/one/build (92%) rename {go/parser-experiments => parser-experiments}/one/main.go (100%) rename {go/parser-experiments => parser-experiments}/one/run (100%) rename {go/parser-experiments => parser-experiments}/one/semi1.bnf (100%) rename {go/parser-experiments => parser-experiments}/one/semi2.bnf (100%) rename {go/parser-experiments => parser-experiments}/two/brackets.bnf (100%) rename {go/parser-experiments => parser-experiments}/two/build (100%) rename {go/parser-experiments => parser-experiments}/two/emit01.bnf (100%) rename {go/parser-experiments => parser-experiments}/two/emit02.bnf (100%) rename {go/parser-experiments => parser-experiments}/two/main.go (100%) rename {go/parser-experiments => parser-experiments}/two/mktags (100%) rename {go/parser-experiments => parser-experiments}/two/run (100%) rename {go/parser-experiments => parser-experiments}/two/semi3.bnf (100%) rename {go/parser-experiments => parser-experiments}/two/temp.bnf (100%) diff --git a/.gitignore b/.gitignore index 3bc057a4c..97296a984 100644 --- a/.gitignore +++ b/.gitignore @@ -114,3 +114,10 @@ c/stream/Makefile c/unit_test/Makefile man/Makefile parser-experiments/two/src + +parser-experiments/one/src/github.com +parser-experiments/one/src/experimental +parser-experiments/one/main + +parser-experiments/two/src +parser-experiments/two/main diff --git a/docs/build.rst.in b/docs/build.rst.in index 4796a82b2..14236e65a 100644 --- a/docs/build.rst.in +++ b/docs/build.rst.in @@ -168,6 +168,7 @@ In this example I am using version 3.4.0; of course that will change for subsequ * Publish the release * Check the release-specific docs: + * Look at https://miller.readthedocs.io for new-version docs, after a few minutes' propagation time. * Notify: diff --git a/go/.gitignore b/go/.gitignore index 7aa2b4dad..3295623f5 100644 --- a/go/.gitignore +++ b/go/.gitignore @@ -6,10 +6,3 @@ pkg/ r s mlrgo - -parser-experiments/one/src/github.com -parser-experiments/one/src/experimental -parser-experiments/one/main - -parser-experiments/two/src -parser-experiments/two/main diff --git a/go/.vimrc b/go/.vimrc index e992cc177..3e8a4f8d7 100644 --- a/go/.vimrc +++ b/go/.vimrc @@ -1,3 +1 @@ -map \d :w:!clear;echo Building ...; echo; build-go -map \f :w:!clear;echo Building ...; echo; build -map \t :w:!clear;echo; tester +map \f :w:!clear;echo Building ...; echo; build; echo; main diff --git a/go/README.md b/go/README.md index 0a981febb..85c2cafcc 100644 --- a/go/README.md +++ b/go/README.md @@ -1,191 +1,4 @@ -# Status of the Go port +Running the GOCC parser-generator for the full Miller grammar takes a few minutes. That's +a bit painful for experimentation; hence this. -* This will be a full Go port of [Miller](https://miller.readthedocs.io/). Things are currently rough and iterative and incomplete. I don't have a firm timeline but I suspect it will take a few more months of late-evening/spare-time work. -* The released Go port will become Miller 6.0. As noted below, this will be a win both at the source-code level, and for users of Miller. -* I hope to retain backward compatibility at the command-line level as much as possible. -* In the meantime I will still keep fixing bugs, doing some features, etc. in C on Miller 5.x -- in the near term, support for Miller's C implementation continues as before. - -# Port-completion criteria - -* `reg-test/run` completes -- either completing/fixing the C/Go source-code discrepancies, or accepting the changes as backward incomptabilities -* Double-checking all Miller issues ever, in case I fixed/implemented something but didn't have reg-test coverage -* All `TODO`/`xxx` comments in Go, BNF source code, and case-files are resolved -* Release notes including Go-only features, and C/Go backward-incompatibilities -* Docs updated at https://miller.readthedocs.io/ (source-controlled at [../docs](../docs/)) -* Equivalent of `./configure`, whatever that turns out to be - -# Trying out the Go port - -* Caveat: *lots* of things present in the C implementation are currently missing in the Go implementation. So if something doesn't work, it's almost certainly because it doesn't work *yet*. -* That said, if anyone is interested in playing around with it and giving early feedback, I'll be happy for it. -* Building: - * Clone the Miller repo - * `cd go` - * `./build` should create `mlr`, and print the two lines `Compile OK` and `Test OK`. If it doesn't do this on your platform, please [file an issue](https://github.com/johnkerl/miller/issues). -* Platforms tried so far: - * macOS with Go 1.14, and Linux Mint with Go 1.10 - * Windows I have not tried at all -* On-line help: - * `mlr --help` advertises some things the Go implementation doesn't actually do yet. - * `mlr --help-all-verbs` correctly lists verbs which do things in the Go implementation. -* See also https://github.com/johnkerl/miller/issues/372 - -# Benefits of porting to Go - -* The [lack of a streaming (record-by-record) JSON reader](http://johnkerl.org/miller/doc/file-formats.html#JSON_non-streaming) in the C implementation ([issue 99](https://github.com/johnkerl/miller/issues/99)) is immediately solved in the Go implementation. -* In the C implementation, [arrays were not supported in the DSL](http://johnkerl.org/miller/doc/file-formats.html#Arrays); in the Go implementation they are. -* [Flattening nested map structures to output records](http://johnkerl.org/miller/doc/file-formats.html#Formatting_JSON_options) was clumsy. Now, Miller will be a JSON-to-JSON processor, if your inputs and outputs are both JSON; JSON input and output will be idiomatic. -* The quoted-DKVP feature from [issue 266](https://github.com/johnkerl/miller/issues/266) will be easily addressed. -* String/number-formatting issues in [issue 211](https://github.com/johnkerl/miller/issues/211), [issue 178](https://github.com/johnkerl/miller/issues/178), [issue 151](https://github.com/johnkerl/miller/issues/151), and [issue 259](https://github.com/johnkerl/miller/issues/259) will be fixed during the Go port. -* I think some DST/timezone issues such as [issue 359](https://github.com/johnkerl/miller/issues/359) will be easier to fix using the Go datetime library than using the C datetime library -* The code will be easier to read and, I hope, easier for others to contribute to. What this means is it should be quicker and easier to add new features to Miller -- after the development-time cost of the port itself is paid, of course. - -# Why Go - -* As noted above, multiple Miller issues will benefit from stronger library support. -* Channels/goroutines are an excellent for Miller's reader/mapper/mapper/mapper/writer record-stream architecture. -* Since I did timing experiments in 2015, I found Go to be faster than it was then. -* In terms of CPU-cycle-count, Go is a bit slower than C (it does more things, like bounds-checking arrays and so on) -- but by leveraging concurrency over a couple processors, I find that it's competitive in terms of wall-time. -* Go is an up-and-coming language, with good reason -- it's mature, stable, with few of C's weaknesses and many of C's strengths. -* The source code will be easier to read/maintain/write, by myself and others. - -# Things which may change - -Please see https://github.com/johnkerl/miller/issues/372. - -# Efficiency of the Go port - -As I wrote [here](http://johnkerl.org/miller/doc/whyc.html) back in 2015 I couldn't get Rust or Go (or any other language I tried) to do some test-case processing as quickly as C, so I stuck with C. - -Either Go has improved since 2015, or I'm a better Go programmer than I used to be, or both -- but as of 2020 I can get Go-Miller to process data about as quickly as C-Miller. - -Note: in some sense Go-Miller is *less* efficient but in a way that doesn't significantly affect wall time. Namely, doing `mlr cat` on a million-record data file on my bargain-value MacBook Pro, the C version takes about 2.5 seconds and the Go version takes about 3 seconds. So in terms of wall time -- which is what we care most about, how long we have to wait -- it's about the same. - -A way to look a little deeper at resource usage is to run `htop`, while processing a 10x larger file, so it'll take 25 or 30 seconds rather than 2.5 or 3. This way we can look at the steady-state resource consumption. I found that the C version -- which is purely single-threaded -- is taking 100% CPU. And the Go version, which uses concurrency and channels and `MAXPROCS=4`, with reader/transformer/writer each on their own CPU, is taking about 240% CPU. So Go-Miller is taking up not just a little more CPU, but a lot more -- yet, it does more work in parallel, and finishes the job in about the same amount of time. - -Even commodity hardware has multiple CPUs these days -- and the Go code is *much* easier to read, extend, and improve than the C code -- so I'll call this a net win for Miller. - -# Developer information - -## Source-code goals - -Donald Knuth famously said: *Programs are meant to be read by humans and only incidentally for computers to execute.* - -During the coding of Miller, I've been guided by the following: - -* *Miller should be pleasant to read.* - * If you want to fix a bug, you should be able to quickly and confidently find out where and how. - * If you want to learn something about Go channels, or lexing/parsing in Go -- especially if you don't already know much about them -- the comments should help you learn what you want to. - * If you're the kind of person who reads other people's code for fun, well, the code should be fun, as well as readable. - * `README.md` files throughout the directory tree are intended to give you a sense of what is where, what to read first and and what doesn't need reading right away, and so on -- so you spend a minimum of time being confused or frustrated. - * Names of files, variables, functions, etc. should be fully spelled out (e.g. `NewEvaluableLeafNode`), except for a small number of most-used names where a longer name would cause unnecessary line-wraps (e.g. `Mlrval` instead of `MillerValue` since this appears very very often). - * Code should not be too clever. This includes some reasonable amounts of code duplication from time to time, to keep things inline, rather than lasagna code. - * Things should be transparent. For example, `mlr -n put -v '$y = 3 + 0.1 * $x'` shows you the abstract syntax tree derived from the DSL expression. - * Comments should be robust with respect to reasonably anticipated changes. For example, one package should cross-link to another in its comments, but I try to avoid mentioning specific filenames too much in the comments and README files since these may change over time. I make an exception for stable points such as [mlr.go](./mlr.go), [mlr.bnf](./src/parsing/mlr.bnf), [stream.go](./src/stream/stream.go), etc. -* *Miller should be pleasant to write.* - * It should be quick to answer the question *Did I just break anything?* -- hence the `build` and `reg_test/run` regression scripts. - * It should be quick to find out what to do next as you iteratively develop -- see for example [cst/README.md](https://github.com/johnkerl/miller/blob/master/go/src/dsl/cst/README.md). -* *The language should be an asset, not a liability.* - * One of the reasons I chose Go is that (personally anyway) I find it to be reasonably efficient, well-supported with standard libraries, straightforward, and fun. I hope you enjoy it as much as I have. - -## Directory structure - -Information here is for the benefit of anyone reading/using the Miller Go code. To use the Miller tool at the command line, you don't need to know any of this if you don't want to. :) - -## Directory-structure overview - -Miller is a multi-format record-stream processor, where a **record** is a -sequence of key-value pairs. The basic **stream** operation is: - -* **read** records in some specified file format; -* **transform** the input records to output records in some user-specified way, using a **chain** of **transformers** (also sometimes called **verbs**) -- sort, filter, cut, put, etc.; -* **write** the records in some specified file format. - -So, in broad overview, the key packages are: - -* [src/stream](./src/stream) -- connect input -> transforms -> output via Go channels -* [src/input](./src/input) -- read input records -* [src/transforming](./src/transforming) -- transform input records to output records -* [src/output](./src/output) -- write output records -* The rest are details to support this. - -## Directory-structure details - -### Dependencies - -* Miller dependencies are all in the Go standard library, except two: - * GOCC lexer/parser code-generator from [github.com/goccmack/gocc](https://github.com/goccmack/gocc): - * This package defines the grammar for Miller's domain-specific language (DSL) for the Miller `put` and `filter` verbs. And, GOCC is a joy to use. :) - * It is used on the terms of its open-source license. - * [golang.org/x/term](https://pkg.go.dev/golang.org/x/term): - * Just a one-line Miller callsite for is-a-terminal checking for the [Miller REPL](https://github.com/johnkerl/miller/blob/go-mod/go/src/auxents/repl/README.md). - * It is used on the terms of its open-source license. -* See also [./go.mod](go.mod). Setup: - * `go get github.com/goccmack/gocc` - * `go get golang.org/x/term` - -### Miller per se - -* The main entry point is [mlr.go](./mlr.go); everything else in [src](./src). -* [src/lib](./src/lib): - * Implementation of the [`Mlrval`](./src/types/mlrval.go) datatype which includes string/int/float/boolean/void/absent/error types. These are used for record values, as well as expression/variable values in the Miller `put`/`filter` DSL. See also below for more details. - * [`Mlrmap`](./src/types/mlrmap.go) is the sequence of key-value pairs which represents a Miller record. The key-lookup mechanism is optimized for Miller read/write usage patterns -- please see [mlrmap.go](./src/types/mlrmap.go) for more details. - * [`context`](./src/types/context.go) supports AWK-like variables such as `FILENAME`, `NF`, `NR`, and so on. -* [src/cli](./src/cli) is the flag-parsing logic for supporting Miller's command-line interface. When you type something like `mlr --icsv --ojson put '$sum = $a + $b' then filter '$sum > 1000' myfile.csv`, it's the CLI parser which makes it possible for Miller to construct a CSV record-reader, a transformer-chain of `put` then `filter`, and a JSON record-writer. -* [src/cliutil](./src/cliutil) contains datatypes for the CLI-parser, which was split out to avoid a Go package-import cycle. -* [src/stream](./src/stream) is as above -- it uses Go channels to pipe together file-reads, to record-reading/parsing, to a chain of record-transformers, to record-writing/formatting, to terminal standard output. -* [src/input](./src/input) is as above -- one record-reader type per supported input file format, and a factory method. -* [src/output](./src/output) is as above -- one record-writer type per supported output file format, and a factory method. -* [src/transforming](./src/transforming) contains the abstract record-transformer interface datatype, as well as the Go-channel chaining mechanism for piping one transformer into the next. -* [src/transformers](./src/transformers) is all the concrete record-transformers such as `cat`, `tac`, `sort`, `put`, and so on. I put it here, not in `transforming`, so all files in `transformers` would be of the same type. -* [src/parsing](./src/parsing) contains a single source file, `mlr.bnf`, which is the lexical/semantic grammar file for the Miller `put`/`filter` DSL using the GOCC framework. All subdirectories of `src/parsing/` are autogen code created by GOCC's processing of `mlr.bnf`. -* [src/dsl](./src/dsl) contains [`ast_types.go`](src/dsl/ast_types.go) which is the abstract syntax tree datatype shared between GOCC and Miller. I didn't use a `src/dsl/ast` naming convention, although that would have been nice, in order to avoid a Go package-dependency cycle. -* [src/dsl/cst](./src/dsl/cst) is the concrete syntax tree, constructed from an AST produced by GOCC. The CST is what is actually executed on every input record when you do things like `$z = $x * 0.3 * $y`. Please see the [src/dsl/cst/README.md](./src/dsl/cst/README.md) for more information. - -## Nil-record conventions - -Through out the code, records are passed by reference (as are most things, for -that matter, to reduce unnecessary data copies). In particular, records can be -nil through the reader/transformer/writer sequence. - -* Record-readers produce an end-of-stream marker (within the `RecordAndContext` struct) to signify end of input stream. -* Each transformer takes a record-pointer as input and produces a sequence of zero or more record-pointers. - * Many transformers, such as `cat`, `cut`, `rename`, etc. produce one output record per input record. - * The `filter` transformer produces one or zero output records per input record depending on whether the record passed the filter. - * The `nothing` transformer produces zero output records. - * The `sort` and `tac` transformers are *non-streaming* -- they produce zero output records per input record, and instead retain each input record in a list. Then, when the end-of-stream marker is received, they sort/reverse the records and emit them, then they emit the end-of-stream marker. - * Many transformers such as `stats1` and `count` also retain input records, then produce output once there is no more input to them. -* An end-of-stream marker is passed to record-writers so that they may produce final output. - * Most writers produce their output one record at a time. - * The pretty-print writer produces no output until end of stream (or schema change), since it needs to compute the max width down each column. - -## Memory management - -* Go has garbage collection which immediately simplifies the coding compared to the C port. -* Pointers are used freely for record-processing: record-readers allocate pointed records; pointed records are passed on Go channels from record-readers to record-transformers to record-writers. - * Any transformer which passes an input record through is fine -- be it unmodifed as in `mlr cat` or modified as in `mlr cut`. - * If a transformer drops a record (`mlr filter` in false cases, for example, or `mlr nothing`) it will be GCed. - * One caveat is any transformer which produces multiples, e.g. `mlr repeat` -- this needs to explicitly copy records instead of producing multiple pointers to the same record. -* Right-hand-sides of DSL expressions all pass around pointers to records and Mlrvals. - * Lvalue expressions return pointed `*types.Mlrmap` so they can be assigned to; rvalue expressions return non-pointed `types.Mlrval` but these are very shallow copies -- the int/string/etc types are copied but maps/arrays are passed by reference in the rvalue expression-evaluators. -* Copy-on-write is done on map/array put -- for example, in the assignment phase of a DSL statement, where an rvalue is assigned to an lvalue. - -## More about mlrvals - -[`Mlrval`](./src/types/mlrval.go) is the datatype of record values, as well as expression/variable values in the Miller `put`/`filter` DSL. It includes string/int/float/boolean/void/absent/error types, not unlike PHP's `zval`. - -* Miller's `absent` type is like Javascript's `undefined` -- it's for times when there is no such key, as in a DSL expression `$out = $foo` when the input record is `$x=3,y=4` -- there is no `$foo` so `$foo` has `absent` type. Nothing is written to the `$out` field in this case. See also [here](http://johnkerl.org/miller/doc/reference.html#Null_data:_empty_and_absent) for more information. -* Miller's `void` type is like Javascript's `null` -- it's for times when there is a key with no value, as in `$out = $x` when the input record is `$x=,$y=4`. This is an overlap with `string` type, since a void value looks like an empty string. I've gone back and forth on this (including when I was writing the C implementation) -- whether to retain `void` as a distinct type from empty-string, or not. I ended up keeping it as it made the `Mlrval` logic easier to understand. -* Miller's `error` type is for things like doing type-uncoerced addition of strings. Data-dependent errors are intended to result in `(error)`-valued output, rather than crashing Miller. See also [here](http://johnkerl.org/miller/doc/reference.html#Data_types) for more information. -* Miller's number handling makes auto-overflow from int to float transparent, while preserving the possibility of 64-bit bitwise arithmetic. - * This is different from JavaScript, which has only double-precision floats and thus no support for 64-bit numbers (note however that there is now [`BigInt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt)). - * This is also different from C and Go, wherein casts are necessary -- without which int arithmetic overflows. - * See also [here](http://johnkerl.org/miller/doc/reference.html#Arithmetic) for the semantics of Miller arithmetic, which the [`Mlrval`](./src/types/mlrval.go) class implements. - -## Software-testing methodology - -See [./reg-test/README.md](./reg-test/README.md). - -## Source-code indexing - -Please see https://sourcegraph.com/github.com/johnkerl/miller +As of 2021-03-24 I moved this out of the `../go` tree. It doesn't compile anymore, and would need some dependency work to compile again. diff --git a/go/build b/go/build index aaa5b189e..d4f28f9c8 100755 --- a/go/build +++ b/go/build @@ -9,5 +9,5 @@ fi set -euo pipefail ./build-go -./u/tester +go test ./... ./reg-test/run -n $verbose diff --git a/go/parser-experiments/README.md b/go/parser-experiments/README.md deleted file mode 100644 index 8d333720a..000000000 --- a/go/parser-experiments/README.md +++ /dev/null @@ -1,2 +0,0 @@ -Running the GOCC parser-generator for the full Miller grammar takes a few minutes. That's -a bit painful for experimentation; hence this. diff --git a/go/reg-test/expected/case-cli-help.sh.out b/go/reg-test/expected/case-cli-help.sh.out index abeb36820..baf0e101f 100644 --- a/go/reg-test/expected/case-cli-help.sh.out +++ b/go/reg-test/expected/case-cli-help.sh.out @@ -103,7 +103,6 @@ Options: ================================================================ Usage: mlr fill-down [options] If a given record has a missing value for a given field, fill that from - the corresponding value from a previous record, if any. By default, a 'missing' field either is absent, or has the empty-string value. With -a, a field is 'missing' only if it is absent. @@ -337,7 +336,7 @@ Shows the least frequently occurring distinct values for specified field names. The first entry is the statistical anti-mode; the remaining are runners-up. Options: -f {one or more comma-separated field names}. Required flag. --n {count}. Optional flag defaulting to %!l(int=10)ld. +-n {count}. Optional flag defaulting to 10. -b Suppress counts; show only field values. -o {name} Field name for output count. Default "count". See also "mlr most-frequent". @@ -347,7 +346,7 @@ Shows the most frequently occurring distinct values for specified field names. The first entry is the statistical mode; the remaining are runners-up. Options: -f {one or more comma-separated field names}. Required flag. --n {count}. Optional flag defaulting to %!l(int=10)ld. +-n {count}. Optional flag defaulting to 10. -b Suppress counts; show only field values. -o {name} Field name for output count. Default "count". See also "mlr least-frequent". diff --git a/go/src/auxents/repl/session.go b/go/src/auxents/repl/session.go index 50467e5b6..f5469e02f 100644 --- a/go/src/auxents/repl/session.go +++ b/go/src/auxents/repl/session.go @@ -124,7 +124,7 @@ func (this *Repl) handleSession(istream *os.File) { } if err != nil { - fmt.Fprintf(os.Stderr, "%s %s: %w", this.exeName, this.replName, err) + fmt.Fprintf(os.Stderr, "%s %s: %v", this.exeName, this.replName, err) os.Exit(1) } @@ -183,7 +183,7 @@ func (this *Repl) handleMultiLine( } if err != nil { - fmt.Fprintf(os.Stderr, "%s %s: %w\n", this.exeName, this.replName, err) + fmt.Fprintf(os.Stderr, "%s %s: %v\n", this.exeName, this.replName, err) os.Exit(1) } diff --git a/go/src/cli/mlrcli.go b/go/src/cli/mlrcli.go index 914de8b37..fa2d43c44 100644 --- a/go/src/cli/mlrcli.go +++ b/go/src/cli/mlrcli.go @@ -7,7 +7,6 @@ package cli //// ---------------------------------------------------------------- //#define DEFAULT_OFMT "%lf" //#define DEFAULT_OQUOTING QUOTE_MINIMAL -//#define DEFAULT_JSON_FLATTEN_SEPARATOR ":" //#define DEFAULT_OOSVAR_FLATTEN_SEPARATOR ":" //#define DEFAULT_COMMENT_STRING "#" // diff --git a/go/src/cli/mlrcli_usage.go b/go/src/cli/mlrcli_usage.go index eed98caeb..5b7ae40e6 100644 --- a/go/src/cli/mlrcli_usage.go +++ b/go/src/cli/mlrcli_usage.go @@ -5,6 +5,7 @@ import ( "os" "miller/src/lib" + "miller/src/cliutil" "miller/src/version" ) @@ -242,9 +243,8 @@ func mainUsageDataFormatExamples(o *os.File, argv0 string) { `) } -// TODO: ASV_FS_FOR_HELP, ASV_RS_FOR_HELP); func mainUsageDataFormatOptions(o *os.File, argv0 string) { - fmt.Fprintln(o, + fmt.Fprintf(o, ` --idkvp --odkvp --dkvp Delimited key-value pairs, e.g "a=1,b=2" (this is Miller's default format). @@ -259,11 +259,8 @@ func mainUsageDataFormatOptions(o *os.File, argv0 string) { --itsv --otsv --tsv Keystroke-savers for "--icsv --ifs tab", "--ocsv --ofs tab", "--csv --fs tab". --iasv --oasv --asv Similar but using ASCII FS %s and RS %s\n", - ASV_FS_FOR_HELP, ASV_RS_FOR_HELP); --iusv --ousv --usv Similar but using Unicode FS %s\n", - USV_FS_FOR_HELP); and RS %s\n", - USV_RS_FOR_HELP); --icsvlite --ocsvlite --csvlite Comma-separated value (or tab-separated with --fs tab, etc.). The 'lite' CSV does not handle @@ -276,11 +273,8 @@ func mainUsageDataFormatOptions(o *os.File, argv0 string) { "--ocsvlite --ofs tab", "--csvlite --fs tab". -t Synonymous with --tsvlite. --iasvlite --oasvlite --asvlite Similar to --itsvlite et al. but using ASCII FS %s and RS %s\n", - ASV_FS_FOR_HELP, ASV_RS_FOR_HELP); --iusvlite --ousvlite --usvlite Similar to --itsvlite et al. but using Unicode FS %s\n", - USV_FS_FOR_HELP); and RS %s\n", - USV_RS_FOR_HELP); --ipprint --opprint --pprint Pretty-printed tabular (produces no output until all input is in). @@ -311,7 +305,6 @@ func mainUsageDataFormatOptions(o *os.File, argv0 string) { --oflatsep {string} Separator for flattening multi-level JSON keys, e.g. '{"a":{"b":3}}' becomes a:b => 3 for non-JSON formats. Defaults to %s.\n", - DEFAULT_JSON_FLATTEN_SEPARATOR); -p is a keystroke-saver for --nidx --fs space --repifs @@ -320,7 +313,18 @@ func mainUsageDataFormatOptions(o *os.File, argv0 string) { Please use --iformat1 --oformat2 rather than --format1 --oformat2. The latter sets up input and output flags for format1, not all of which - are overridden in all cases by setting output format to format2.`) + are overridden in all cases by setting output format to format2.`, + + cliutil.ASV_FS_FOR_HELP, + cliutil.ASV_RS_FOR_HELP, + cliutil.USV_FS_FOR_HELP, + cliutil.USV_RS_FOR_HELP, + cliutil.ASV_FS_FOR_HELP, + cliutil.ASV_RS_FOR_HELP, + cliutil.USV_FS_FOR_HELP, + cliutil.USV_RS_FOR_HELP, + cliutil.DEFAULT_JSON_FLATTEN_SEPARATOR, + ) fmt.Println() fmt.Println() } diff --git a/go/src/cliutil/option_parse.go b/go/src/cliutil/option_parse.go index fee382cb0..b85b752bb 100644 --- a/go/src/cliutil/option_parse.go +++ b/go/src/cliutil/option_parse.go @@ -18,11 +18,11 @@ const ASV_RS = "\x1e" const USV_FS = "\xe2\x90\x9f" const USV_RS = "\xe2\x90\x9e" -// TODO: move somewhere else; maybe cliutil const ASV_FS_FOR_HELP = "0x1f" const ASV_RS_FOR_HELP = "0x1e" const USV_FS_FOR_HELP = "U+241F (UTF-8 0xe2909f)" const USV_RS_FOR_HELP = "U+241E (UTF-8 0xe2909e)" +const DEFAULT_JSON_FLATTEN_SEPARATOR = ":" // Returns true if the current flag was handled. Exported for use by join. func ParseReaderOptions( diff --git a/go/src/dsl/ast_test.go b/go/src/dsl/ast_test.go deleted file mode 100644 index b8d3b3294..000000000 --- a/go/src/dsl/ast_test.go +++ /dev/null @@ -1,60 +0,0 @@ -// Most tests are in reg-test/run - -package dsl - -import ( - "fmt" - "testing" - - "miller/src/dsl" - "miller/src/parsing/lexer" - "miller/src/parsing/parser" -) - -func testSingle(sourceString []byte) (*dsl.AST, error) { - fmt.Printf("Input: %s\n", sourceString) - theLexer := lexer.NewLexer(sourceString) - theParser := parser.NewParser() - interfaceAST, err := theParser.Parse(theLexer) - if err == nil { - return interfaceAST.(*dsl.AST), nil - } else { - return nil, err - } -} - -func TestFail(t *testing.T) { - _, err := testSingle([]byte("a b ; d e f")) - if err == nil { - t.Fatal("Expected parse error") - } else { - fmt.Printf("Parsing failed as expected: %v\n", err) - } -} - -func TestPassOne(t *testing.T) { - ast, err := testSingle([]byte("$x = 3")) - if err != nil { - t.Fatal(err.Error()) - } - fmt.Println("AST:") - ast.Print() -} - -func TestPassTwo(t *testing.T) { - ast, err := testSingle([]byte("$x = 3; $y = 0xef")) - if err != nil { - t.Fatal(err.Error()) - } - fmt.Println("AST:") - ast.Print() -} - -func TestPassThree(t *testing.T) { - ast, err := testSingle([]byte("$x = 3; $y = 0xef; $z = true")) - if err != nil { - t.Fatal(err.Error()) - } - fmt.Println("AST:") - ast.Print() -} diff --git a/go/src/lib/lib_test.go b/go/src/lib/lib_test.go index 1865ac764..0de1685a7 100644 --- a/go/src/lib/lib_test.go +++ b/go/src/lib/lib_test.go @@ -6,13 +6,12 @@ package lib import ( - "miller/src/lib" "testing" ) func TestRegexReplaceOnce(t *testing.T) { regexString := "[a-z]" - regex := lib.CompileMillerRegexOrDie(regexString) + regex := CompileMillerRegexOrDie(regexString) replacement := "X" input := "abcde" @@ -22,7 +21,7 @@ func TestRegexReplaceOnce(t *testing.T) { t.Fatal() } - subOutput := lib.RegexReplaceOnce(regex, input, replacement) + subOutput := RegexReplaceOnce(regex, input, replacement) if subOutput != "Xbcde" { t.Fatal() } diff --git a/go/u/tester b/go/u/tester deleted file mode 100755 index b6f2ec8c2..000000000 --- a/go/u/tester +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -# ================================================================ -# GO-SOURCE TESTS -# Note reg-test/run has FAR more test cases -# ================================================================ - -# go test -v ./... doesn't work since it triggers unready things in ./parser-experiments/. - -go test miller/src/types -go test src/dsl/ast_test.go diff --git a/go/parser-experiments/.vimrc b/parser-experiments/.vimrc similarity index 100% rename from go/parser-experiments/.vimrc rename to parser-experiments/.vimrc diff --git a/parser-experiments/README.md b/parser-experiments/README.md new file mode 100644 index 000000000..2de8b6c9c --- /dev/null +++ b/parser-experiments/README.md @@ -0,0 +1,3 @@ +Running the GOCC parser-generator for the full Miller grammar takes a few minutes. That's a bit painful for experimentation; hence this. + +As of 2021-03-24 I moved this out of the `../go` tree, to make `go test ./...` work there. This code now doesn't compile anymore, and would need some dependency work to compile again. diff --git a/go/parser-experiments/one/build b/parser-experiments/one/build similarity index 92% rename from go/parser-experiments/one/build rename to parser-experiments/one/build index 706e4bf23..250e66492 100755 --- a/go/parser-experiments/one/build +++ b/parser-experiments/one/build @@ -29,9 +29,11 @@ mkdir -p $dir # Run the parser-generator # Build the bin/gocc executable: -#go get github.com/goccmack/gocc -go get github.com/johnkerl/gocc +go get github.com/goccmack/gocc +#go get github.com/johnkerl/gocc bingocc="$GOPATH/bin/gocc" + +export GOPATH="../go" if [ ! -x "$bingocc" ]; then exit 1 fi diff --git a/go/parser-experiments/one/main.go b/parser-experiments/one/main.go similarity index 100% rename from go/parser-experiments/one/main.go rename to parser-experiments/one/main.go diff --git a/go/parser-experiments/one/run b/parser-experiments/one/run similarity index 100% rename from go/parser-experiments/one/run rename to parser-experiments/one/run diff --git a/go/parser-experiments/one/semi1.bnf b/parser-experiments/one/semi1.bnf similarity index 100% rename from go/parser-experiments/one/semi1.bnf rename to parser-experiments/one/semi1.bnf diff --git a/go/parser-experiments/one/semi2.bnf b/parser-experiments/one/semi2.bnf similarity index 100% rename from go/parser-experiments/one/semi2.bnf rename to parser-experiments/one/semi2.bnf diff --git a/go/parser-experiments/two/brackets.bnf b/parser-experiments/two/brackets.bnf similarity index 100% rename from go/parser-experiments/two/brackets.bnf rename to parser-experiments/two/brackets.bnf diff --git a/go/parser-experiments/two/build b/parser-experiments/two/build similarity index 100% rename from go/parser-experiments/two/build rename to parser-experiments/two/build diff --git a/go/parser-experiments/two/emit01.bnf b/parser-experiments/two/emit01.bnf similarity index 100% rename from go/parser-experiments/two/emit01.bnf rename to parser-experiments/two/emit01.bnf diff --git a/go/parser-experiments/two/emit02.bnf b/parser-experiments/two/emit02.bnf similarity index 100% rename from go/parser-experiments/two/emit02.bnf rename to parser-experiments/two/emit02.bnf diff --git a/go/parser-experiments/two/main.go b/parser-experiments/two/main.go similarity index 100% rename from go/parser-experiments/two/main.go rename to parser-experiments/two/main.go diff --git a/go/parser-experiments/two/mktags b/parser-experiments/two/mktags similarity index 100% rename from go/parser-experiments/two/mktags rename to parser-experiments/two/mktags diff --git a/go/parser-experiments/two/run b/parser-experiments/two/run similarity index 100% rename from go/parser-experiments/two/run rename to parser-experiments/two/run diff --git a/go/parser-experiments/two/semi3.bnf b/parser-experiments/two/semi3.bnf similarity index 100% rename from go/parser-experiments/two/semi3.bnf rename to parser-experiments/two/semi3.bnf diff --git a/go/parser-experiments/two/temp.bnf b/parser-experiments/two/temp.bnf similarity index 100% rename from go/parser-experiments/two/temp.bnf rename to parser-experiments/two/temp.bnf