diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5ea29ed16..287d929c7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -109,6 +109,7 @@ nav: - "Auxiliary commands": "reference-main-auxiliary-commands.md" - "Manual page": "manpage.md" - "Building from source": "build.md" + - "Miller as a library": "miller-as-library.md" - "How to create a new release": "how-to-release.md" - "Documents for previous releases": "release-docs.md" - "Glossary": "glossary.md" diff --git a/docs/src/miller-as-library.md b/docs/src/miller-as-library.md new file mode 100644 index 000000000..49c381138 --- /dev/null +++ b/docs/src/miller-as-library.md @@ -0,0 +1,202 @@ + +
+package main
+
+import (
+ "fmt"
+
+ "github.com/johnkerl/miller/pkg/bifs"
+ "github.com/johnkerl/miller/pkg/mlrval"
+)
+
+func main() {
+ a := mlrval.FromInt(2)
+ b := mlrval.FromInt(60)
+ c := bifs.BIF_pow(a, b)
+ fmt.Println(c.String())
+}
+
+
+```
+$ go build main1.go
+$ ./main1
+1152921504606846976
+```
+
+Or simply:
+```
+$ go run main1.go
+1152921504606846976
+```
+
+## Another example use
+
+
+package main
+
+import (
+ "bufio"
+ "container/list"
+ "errors"
+ "fmt"
+ "os"
+
+ "github.com/johnkerl/miller/pkg/cli"
+ "github.com/johnkerl/miller/pkg/input"
+ "github.com/johnkerl/miller/pkg/output"
+ "github.com/johnkerl/miller/pkg/transformers"
+ "github.com/johnkerl/miller/pkg/types"
+)
+
+func convert_csv_to_json(fileNames []string) error {
+ options := &cli.TOptions{
+ ReaderOptions: cli.TReaderOptions{
+ InputFileFormat: "csv",
+ IFS: ",",
+ IRS: "\n",
+ RecordsPerBatch: 1,
+ },
+ WriterOptions: cli.TWriterOptions{
+ OutputFileFormat: "json",
+ },
+ }
+ outputStream := os.Stdout
+ outputIsStdout := true
+
+ // Since Go is concurrent, the context struct needs to be duplicated and
+ // passed through the channels along with each record.
+ initialContext := types.NewContext()
+
+ // Instantiate the record-reader.
+ // RecordsPerBatch is tracked separately from ReaderOptions since join/repl
+ // may use batch size of 1.
+ recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch)
+ if err != nil {
+ return err
+ }
+
+ // Instantiate the record-writer
+ recordWriter, err := output.Create(&options.WriterOptions)
+ if err != nil {
+ return err
+ }
+
+ cat, err := transformers.NewTransformerCat(
+ false, // doCounters bool,
+ "", // counterFieldName string,
+ nil, // groupByFieldNames []string,
+ false, // doFileName bool,
+ false, // doFileNum bool,
+ )
+ if err != nil {
+ return err
+ }
+ recordTransformers := []transformers.IRecordTransformer{cat}
+
+ // Set up the reader-to-transformer and transformer-to-writer channels.
+ readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext
+ writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext
+
+ // We're done when a fatal error is registered on input (file not found,
+ // etc) or when the record-writer has written all its output. We use
+ // channels to communicate both of these conditions.
+ inputErrorChannel := make(chan error, 1)
+ doneWritingChannel := make(chan bool, 1)
+ dataProcessingErrorChannel := make(chan bool, 1)
+
+ readerDownstreamDoneChannel := make(chan bool, 1)
+
+ // Start the reader, transformer, and writer. Let them run until fatal input
+ // error or end-of-processing happens.
+ bufferedOutputStream := bufio.NewWriter(outputStream)
+
+ go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel)
+ go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers,
+ writerChannel, options)
+ go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel,
+ dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout)
+
+ var retval error
+ done := false
+ for !done {
+ select {
+ case ierr := <-inputErrorChannel:
+ retval = ierr
+ break
+ case _ = <-dataProcessingErrorChannel:
+ retval = errors.New("exiting due to data error") // details already printed
+ break
+ case _ = <-doneWritingChannel:
+ done = true
+ break
+ }
+ }
+
+ bufferedOutputStream.Flush()
+
+ return retval
+}
+
+func main() {
+ err := convert_csv_to_json(os.Args[1:])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "%v\n", err)
+ }
+}
+
+
++host,status +apoapsis.east.our.org,up +nadir.west.our.org,down ++ +``` +$ go build main2.go +$ ./main2 data/hostnames.csv +{"host": "apoapsis.east.our.org", "status": "up"} +{"host": "nadir.west.our.org", "status": "down"} +``` + + + diff --git a/docs/src/miller-as-library.md.in b/docs/src/miller-as-library.md.in new file mode 100644 index 000000000..b7051165b --- /dev/null +++ b/docs/src/miller-as-library.md.in @@ -0,0 +1,54 @@ +# Miller as a library + +Very initially and experimentally, as of Miller 6.9.1, Go developers will be able to access Miller source +code --- moved from `internal/pkg/` to `pkg/` --- within their own Go projects. + +## Setup + +``` +$ mkdir use-mlr + +$ cd cd use-mlr + +$ go mod init github.com/johnkerl/miller-library-example +go: creating new go.mod: module github.com/johnkerl/miller-library-example + +# One of: +$ go get github.com/johnkerl/miller +$ go get github.com/johnkerl/miller@0f27a39a9f92d4c633dd29d99ad203e95a484dd3 +# Etc. + +$ go mod tidy +``` + +## One example use + +GENMD-INCLUDE-ESCAPED(miller-as-library/main1.go) + +``` +$ go build main1.go +$ ./main1 +1152921504606846976 +``` + +Or simply: +``` +$ go run main1.go +1152921504606846976 +``` + +## Another example use + +GENMD-INCLUDE-ESCAPED(miller-as-library/main2.go) + +GENMD-INCLUDE-ESCAPED(data/hostnames.csv) + +``` +$ go build main2.go +$ ./main2 data/hostnames.csv +{"host": "apoapsis.east.our.org", "status": "up"} +{"host": "nadir.west.our.org", "status": "down"} +``` + + + diff --git a/docs/src/miller-as-library/main1.go b/docs/src/miller-as-library/main1.go new file mode 100644 index 000000000..c56f5a0db --- /dev/null +++ b/docs/src/miller-as-library/main1.go @@ -0,0 +1,15 @@ +package main + +import ( + "fmt" + + "github.com/johnkerl/miller/pkg/bifs" + "github.com/johnkerl/miller/pkg/mlrval" +) + +func main() { + a := mlrval.FromInt(2) + b := mlrval.FromInt(60) + c := bifs.BIF_pow(a, b) + fmt.Println(c.String()) +} diff --git a/docs/src/miller-as-library/main2.go b/docs/src/miller-as-library/main2.go new file mode 100644 index 000000000..07d4be50e --- /dev/null +++ b/docs/src/miller-as-library/main2.go @@ -0,0 +1,111 @@ +package main + +import ( + "bufio" + "container/list" + "errors" + "fmt" + "os" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/input" + "github.com/johnkerl/miller/pkg/output" + "github.com/johnkerl/miller/pkg/transformers" + "github.com/johnkerl/miller/pkg/types" +) + +func convert_csv_to_json(fileNames []string) error { + options := &cli.TOptions{ + ReaderOptions: cli.TReaderOptions{ + InputFileFormat: "csv", + IFS: ",", + IRS: "\n", + RecordsPerBatch: 1, + }, + WriterOptions: cli.TWriterOptions{ + OutputFileFormat: "json", + }, + } + outputStream := os.Stdout + outputIsStdout := true + + // Since Go is concurrent, the context struct needs to be duplicated and + // passed through the channels along with each record. + initialContext := types.NewContext() + + // Instantiate the record-reader. + // RecordsPerBatch is tracked separately from ReaderOptions since join/repl + // may use batch size of 1. + recordReader, err := input.Create(&options.ReaderOptions, options.ReaderOptions.RecordsPerBatch) + if err != nil { + return err + } + + // Instantiate the record-writer + recordWriter, err := output.Create(&options.WriterOptions) + if err != nil { + return err + } + + cat, err := transformers.NewTransformerCat( + false, // doCounters bool, + "", // counterFieldName string, + nil, // groupByFieldNames []string, + false, // doFileName bool, + false, // doFileNum bool, + ) + if err != nil { + return err + } + recordTransformers := []transformers.IRecordTransformer{cat} + + // Set up the reader-to-transformer and transformer-to-writer channels. + readerChannel := make(chan *list.List, 2) // list of *types.RecordAndContext + writerChannel := make(chan *list.List, 1) // list of *types.RecordAndContext + + // We're done when a fatal error is registered on input (file not found, + // etc) or when the record-writer has written all its output. We use + // channels to communicate both of these conditions. + inputErrorChannel := make(chan error, 1) + doneWritingChannel := make(chan bool, 1) + dataProcessingErrorChannel := make(chan bool, 1) + + readerDownstreamDoneChannel := make(chan bool, 1) + + // Start the reader, transformer, and writer. Let them run until fatal input + // error or end-of-processing happens. + bufferedOutputStream := bufio.NewWriter(outputStream) + + go recordReader.Read(fileNames, *initialContext, readerChannel, inputErrorChannel, readerDownstreamDoneChannel) + go transformers.ChainTransformer(readerChannel, readerDownstreamDoneChannel, recordTransformers, + writerChannel, options) + go output.ChannelWriter(writerChannel, recordWriter, &options.WriterOptions, doneWritingChannel, + dataProcessingErrorChannel, bufferedOutputStream, outputIsStdout) + + var retval error + done := false + for !done { + select { + case ierr := <-inputErrorChannel: + retval = ierr + break + case _ = <-dataProcessingErrorChannel: + retval = errors.New("exiting due to data error") // details already printed + break + case _ = <-doneWritingChannel: + done = true + break + } + } + + bufferedOutputStream.Flush() + + return retval +} + +func main() { + err := convert_csv_to_json(os.Args[1:]) + if err != nil { + fmt.Fprintf(os.Stderr, "%v\n", err) + } +}