From 36f3c3cb0f8818209bb8989c674d888fe4ae60fa Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 26 Nov 2022 18:32:16 -0500 Subject: [PATCH] Clarify locations of performance info --- README-docs.md | 4 +- cmd/sizes/main.go | 4 +- docs/mkdocs.yml | 2 +- docs/src/10min.md | 124 ++++- docs/src/csv-with-and-without-headers.md | 36 ++ docs/src/data-cleaning-examples.md | 12 + docs/src/data-diving-examples.md | 46 ++ docs/src/date-time-examples.md | 12 + docs/src/dkvp-examples.md | 6 + docs/src/file-formats.md | 54 +++ docs/src/flatten-unflatten.md | 57 +++ docs/src/installing-miller.md | 6 + docs/src/internationalization.md | 9 + docs/src/keystroke-savers.md | 12 + docs/src/log-processing-examples.md | 12 + docs/src/miller-programming-language.md | 48 ++ docs/src/misc-examples.md | 17 +- docs/src/new-in-miller-6.md | 12 + docs/src/operating-on-all-fields.md | 18 + docs/src/operating-on-all-records.md | 24 + docs/src/parsing-and-formatting-fields.md | 48 ++ docs/src/performance.md | 2 + docs/src/performance.md.in | 2 + docs/src/programming-examples.md | 6 + docs/src/questions-about-joins.md | 24 + docs/src/questions-about-then-chaining.md | 24 + docs/src/randomizing-examples.md | 3 + docs/src/record-heterogeneity.md | 60 +++ docs/src/reference-dsl-builtin-functions.md | 3 + docs/src/reference-dsl-control-structures.md | 75 +++ docs/src/reference-dsl-differences.md | 16 +- docs/src/reference-dsl-filter-statements.md | 12 + .../reference-dsl-higher-order-functions.md | 63 +++ docs/src/reference-dsl-operators.md | 15 + docs/src/reference-dsl-output-statements.md | 91 +++- docs/src/reference-dsl-syntax.md | 30 ++ docs/src/reference-dsl-time.md | 39 ++ docs/src/reference-dsl-unset-statements.md | 9 + .../reference-dsl-user-defined-functions.md | 15 + docs/src/reference-dsl-variables.md | 88 +++- docs/src/reference-dsl.md | 21 + docs/src/reference-main-arrays.md | 30 ++ docs/src/reference-main-compressed-data.md | 6 + docs/src/reference-main-data-types.md | 18 + docs/src/reference-main-flag-list.md | 4 + docs/src/reference-main-maps.md | 21 + docs/src/reference-main-null-data.md | 36 ++ docs/src/reference-main-number-formatting.md | 18 + docs/src/reference-main-overview.md | 15 + .../src/reference-main-regular-expressions.md | 22 +- docs/src/reference-main-separators.md | 21 + docs/src/reference-main-strings.md | 15 + docs/src/reference-verbs.md | 436 +++++++++++++++++- docs/src/repl.md | 3 + docs/src/scripting.md | 36 ++ docs/src/shapes-of-data.md | 42 ++ docs/src/shell-commands.md | 9 + docs/src/sorting.md | 78 ++++ docs/src/special-symbols-and-formatting.md | 36 ++ docs/src/statistics-examples.md | 9 + docs/src/two-pass-algorithms.md | 87 ++++ docs/src/unix-toolkit-context.md | 6 + 62 files changed, 2076 insertions(+), 33 deletions(-) diff --git a/README-docs.md b/README-docs.md index 1cc5972eb..58410fdac 100644 --- a/README-docs.md +++ b/README-docs.md @@ -19,8 +19,8 @@ * Running `make` within the `docs` directory handles both of those steps. * TL;DR just `make docs` from the Miller base directory * Quick-editing loop: - * In one terminal, cd to this directory and leave `mkdocs serve` running. - * In another terminal, cd to the `src` subdirectory of `docs` and edit `*.md.in`. + * In one terminal, cd to the `docs` directory and leave `mkdocs serve` running. + * In another terminal, cd to the `docs/src` subdirectory and edit `*.md.in`. * Run `genmds` to re-create all the `*.md` files, or `genmds foo.md.in` to just re-create the `foo.md.in` file you just edited, or (simplest) just `make` within the `docs/src` subdirectory. * In your browser, visit http://127.0.0.1:8000 * This doesn't write HTML in `docs/site`; HTML is served up directly in the browser -- this is nice for previewing interactive edits. diff --git a/cmd/sizes/main.go b/cmd/sizes/main.go index 20ff09e27..33abe92b4 100644 --- a/cmd/sizes/main.go +++ b/cmd/sizes/main.go @@ -14,8 +14,8 @@ import ( func main() { var mvs [2]mlrval.Mlrval - mvs[0] = *mlrval.FromString("hello") - mvs[1] = *mlrval.FromString("world") + mvs[0] = *mlrval.FromString("h") + mvs[1] = *mlrval.FromString("abcdefghijklmnopqrstuvwzyx") mvs[0].ShowSizes() fmt.Println() mvs[1].ShowSizes() diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 7e1b0676a..b1302abc6 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -76,6 +76,7 @@ nav: - "CPU/multicore usage": "cpu.md" - "Scripting with Miller": "scripting.md" - "Miller environment variables": "reference-main-env-vars.md" + - "Performance": "performance.md" - 'Types reference': - "Data types": "reference-main-data-types.md" - "Strings": "reference-main-strings.md" @@ -104,7 +105,6 @@ nav: - "Why?": "why.md" - "Why call it Miller?": "etymology.md" - "How original is Miller?": "originality.md" - - "Performance": "performance.md" - 'Misc. reference': - "Auxiliary commands": "reference-main-auxiliary-commands.md" - "Manual page": "manpage.md" diff --git a/docs/src/10min.md b/docs/src/10min.md index 33f7252da..5cadaa229 100644 --- a/docs/src/10min.md +++ b/docs/src/10min.md @@ -39,6 +39,9 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream But `mlr cat` can also do format conversion -- for example, you can pretty-print in tabular format: @@ -58,6 +61,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream `mlr head` and `mlr tail` count records rather than lines. Whether you're getting the first few records or the last few, the CSV header is included either way: @@ -71,6 +77,9 @@ yellow,triangle,true,1,11,43.6498,9.8870 red,square,true,2,15,79.2778,0.0130 red,circle,true,3,16,13.8103,2.9010 red,square,false,4,48,77.5542,7.4670 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -82,6 +91,9 @@ purple,triangle,false,7,65,80.1405,5.8240
 yellow,circle,true,8,73,63.9785,4.2370
 yellow,circle,true,9,87,63.5058,8.3350
 purple,square,false,10,91,72.3735,8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -108,6 +120,9 @@ purple,square,false,10,91,72.3735,8.2430
   "rate": 8.2430
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can sort on a single field: @@ -127,6 +142,9 @@ purple square false 10 91 72.3735 8.2430 yellow triangle true 1 11 43.6498 9.8870 purple triangle false 5 51 81.2290 8.5910 purple triangle false 7 65 80.1405 5.8240 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Or, you can sort primarily alphabetically on one field, then secondarily numerically descending on another field, and so on: @@ -146,6 +164,9 @@ red square true 2 15 79.2778 0.0130 purple triangle false 7 65 80.1405 5.8240 purple triangle false 5 51 81.2290 8.5910 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If there are fields you don't want to see in your data, you can use `cut` to keep only the ones you want, in the same order they appeared in the input data: @@ -165,6 +186,9 @@ triangle false circle true circle true square false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream You can also use `cut -o` to keep specified fields, but in your preferred order: @@ -184,6 +208,9 @@ false triangle true circle true circle false square +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream You can use `cut -x` to omit fields you don't care about: @@ -203,6 +230,9 @@ purple 7 65 80.1405 5.8240 yellow 8 73 63.9785 4.2370 yellow 9 87 63.5058 8.3350 purple 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Even though Miller's main selling point is name-indexing, sometimes you really want to refer to a field name by its positional index. Use `$[[3]]` to access the name of field 3 or `$[[[3]]]` to access the value of field 3: @@ -222,6 +252,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -239,6 +272,9 @@ purple triangle NEW  7  65    80.1405  5.8240
 yellow circle   NEW  8  73    63.9785  4.2370
 yellow circle   NEW  9  87    63.5058  8.3350
 purple square   NEW  10 91    72.3735  8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can find the full list of verbs at the [Verbs Reference](reference-verbs.md) page. @@ -256,6 +292,9 @@ red square true 2 15 79.2778 0.0130 red circle true 3 16 13.8103 2.9010 red square false 4 48 77.5542 7.4670 red square false 6 64 77.1991 9.5310 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -265,6 +304,9 @@ red   square false 6 64    77.1991  9.5310
 color shape  flag k index quantity rate
 red   square true 2 15    79.2778  0.0130
 red   circle true 3 16    13.8103  2.9010
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Computing new fields @@ -289,6 +331,9 @@ purple triangle false 7 65 80.1405 5.8240 13.760388049450551 purple_triangl yellow circle true 8 73 63.9785 4.2370 15.09995279679018 yellow_circle yellow circle true 9 87 63.5058 8.3350 7.619172165566886 yellow_circle purple square false 10 91 72.3735 8.2430 8.779995147397793 purple_square +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream When you create a new field, it can immediately be used in subsequent statements: @@ -311,6 +356,9 @@ purple triangle false 7 65 80.1405 5.8240 66 4363 yellow circle true 8 73 63.9785 4.2370 74 5484 yellow circle true 9 87 63.5058 8.3350 88 7753 purple square false 10 91 72.3735 8.2430 92 8474 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream For `put` and `filter` we were able to type out expressions using a programming-language syntax. @@ -331,6 +379,9 @@ Zone,Total MWh 17,39.8 24,7.4 30,50.5 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -342,6 +393,9 @@ Zone Total MWh
 17   39.8
 14   27.2
 24   7.4
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
For `put` and `filter` expressions, use `${...}`: @@ -355,6 +409,9 @@ Zone Total MWh Total KWh 17 39.8 39800 24 7.4 7400 30 50.5 50500 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also the [section on field names](reference-dsl-variables.md#field-names). @@ -401,6 +458,9 @@ a,b,c 1,2,3 4,5,6 7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Chaining verbs together @@ -415,6 +475,12 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This works fine -- but Miller also lets you chain verbs together using the word `then`. Think of this as a Miller-internal pipe that lets you use fewer keystrokes: @@ -427,6 +493,9 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream As another convenience, you can put the filename first using `--from`. When you're interacting with your data at the command line, this makes it easier to up-arrow and append to the previous command: @@ -439,6 +508,9 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -452,6 +524,9 @@ shape  quantity
 square 72.3735
 circle 63.5058
 circle 63.9785
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Sorts and stats @@ -468,6 +543,9 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Lots of Miller commands take a `-g` option for group-by: here, `head -n 1 -g shape` outputs the first record for each distinct value of the `shape` field. This means we're finding the record with highest `index` field for each distinct `shape` field: @@ -480,6 +558,9 @@ color shape flag k index quantity rate yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 purple triangle false 7 65 80.1405 5.8240 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Statistics can be computed with or without group-by field(s): @@ -493,6 +574,9 @@ shape quantity_count quantity_min quantity_mean quantity_max triangle 3 43.6498 68.33976666666666 81.229 square 4 72.3735 76.60114999999999 79.2778 circle 3 13.8103 47.0982 63.9785 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -507,6 +591,9 @@ circle   red    1              13.8103      13.8103            13.8103
 triangle purple 2              80.1405      80.68475000000001  81.229
 circle   yellow 2              63.5058      63.742149999999995 63.9785
 square   purple 1              72.3735      72.3735            72.3735
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If your output has a lot of columns, you can use XTAB format to line things up vertically for you instead: @@ -524,6 +611,9 @@ rate_p75 8.5910 rate_p90 9.8870 rate_p99 9.8870 rate_p100 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Unicode and internationalization @@ -556,6 +646,9 @@ UTF-8 data. For example: κόκκινο κύκλος αληθινό 3 16 13.8103 2.9010 κίτρινο κύκλος αληθινό 8 73 63.9785 4.2370 κίτρινο κύκλος αληθινό 9 87 63.5058 8.3350 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -573,6 +666,9 @@ UTF-8 data. For example:
 κόκκινο τετράγωνο ψευδές  6  64      77.1991  9.5310
 μοβ     τρίγωνο   ψευδές  7  65      80.1405  5.8240
 μοβ     τετράγωνο ψευδές  10 91      72.3735  8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -590,6 +686,9 @@ UTF-8 data. For example:
 желтый     КРУГ        истина 8  73     63.9785    4.2370   6
 желтый     КРУГ        истина 9  87     63.5058    8.3350   6
 фиолетовый КВАДРАТ     ложь   10 91     72.3735    8.2430   10
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## File formats and format conversion @@ -689,6 +788,9 @@ a matter of specifying input-format and output-format flags: "rate": 0.0130 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -698,6 +800,9 @@ a matter of specifying input-format and output-format flags:
 color,shape,flag,k,index,quantity,rate
 yellow,triangle,true,1,11,43.6498,9.8870
 red,square,true,2,15,79.2778,0.0130
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
However, if JSON data has map-valued or array-valued fields, Miller gives you choices on how to @@ -738,6 +843,9 @@ We can convert this to CSV, or other tabular formats:
 hostname,pid,req.id,req.method,req.path,req.host,req.headers.host,req.headers.user-agent,res.status_code,res.header.content-type,res.header.content-encoding
 localhost,12345,6789,GET,api/check,foo.bar,bar.baz,browser,200,text,plain
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -755,6 +863,9 @@ req.headers.user-agent      browser
 res.status_code             200
 res.header.content-type     text
 res.header.content-encoding plain
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
These transformations are reversible: @@ -786,6 +897,12 @@ These transformations are reversible: } } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See the [flatten/unflatten page](flatten-unflatten.md) for more information. @@ -875,9 +992,14 @@ If you like, you can first copy off your original data somewhere else, before do Lastly, using `tee` within `put`, you can split your input data into separate files per one or more field names: -
+
 mlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 cat circle.csv
diff --git a/docs/src/csv-with-and-without-headers.md b/docs/src/csv-with-and-without-headers.md
index 944255e55..7c02477b2 100644
--- a/docs/src/csv-with-and-without-headers.md
+++ b/docs/src/csv-with-and-without-headers.md
@@ -41,6 +41,9 @@ John,23,present
 Fred,34,present
 Alice,56,missing
 Carol,45,present
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Following that, you can rename the positionally indexed labels to names with meaning for your context. For example: @@ -54,6 +57,9 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
Likewise, if you need to produce CSV which is lacking its header, you can pipe Miller's output to the system command `sed 1d`, or you can use Miller's `--headerless-csv-output` option: @@ -68,6 +74,9 @@ red,square,1,80,0.219668,0.001257,0.792778,2.944117 red,circle,1,84,0.209017,0.290052,0.138103,5.065034 red,square,0,243,0.956274,0.746720,0.775542,7.117831 purple,triangle,0,257,0.435535,0.859129,0.812290,5.753095 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -79,6 +88,9 @@ red,square,1,80,0.219668,0.001257,0.792778,2.944117
 red,circle,1,84,0.209017,0.290052,0.138103,5.065034
 red,square,0,243,0.956274,0.746720,0.775542,7.117831
 purple,triangle,0,257,0.435535,0.859129,0.812290,5.753095
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Lastly, often we say "CSV" or "TSV" when we have positionally indexed data in columns which are separated by commas or tabs, respectively. In this case it's perhaps simpler to **just use NIDX format** which was designed for this purpose. (See also [File Formats](file-formats.md).) For example: @@ -98,6 +110,9 @@ Lastly, often we say "CSV" or "TSV" when we have positionally indexed data in co 1 Carol 3 present +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Headerless CSV with duplicate field values @@ -134,6 +149,9 @@ see something happened: -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream What happened? @@ -162,6 +180,9 @@ One solution is to use `--implicit-csv-header`, or its shorter alias `--hi`: -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Another solution is to use [NIDX format](file-formats.md#nidx-index-numbered-toolkit-style): @@ -178,6 +199,9 @@ Another solution is to use [NIDX format](file-formats.md#nidx-index-numbered-too -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Either way, since there is no explicit header, fields are named `1` through `9`. We can use the @@ -195,6 +219,9 @@ xsn,ysn,x,y,t,a,e29,e31,e32 -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -209,6 +236,9 @@ xsn,ysn,x,y,t,a,e29,e31,e32
 -331268.59231736,4537221.43295653,22,1,13.1,1,0.978,0.978,0.962
 -330341.96688431,4537221.43295653,23,1,13.1,1,0.978,0.978,0.962
 -326635.46515209,4537221.43295653,27,1,13.1,2,0.978,0.972,0.958
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Regularizing ragged CSV @@ -240,6 +270,9 @@ a,b,c 1,2,3 4,5, 6,7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream or, more simply, @@ -257,6 +290,9 @@ a,b,c 1,2,3 4,5, 6,7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also the [record-heterogeneity page](record-heterogeneity.md). diff --git a/docs/src/data-cleaning-examples.md b/docs/src/data-cleaning-examples.md index 77c08e680..59906d5f9 100644 --- a/docs/src/data-cleaning-examples.md +++ b/docs/src/data-cleaning-examples.md @@ -40,6 +40,9 @@ barney false betty true fred true wilma true +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -51,6 +54,9 @@ barney 0
 betty  1
 fred   1
 wilma  1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
A second option is to flag badly formatted data within the output stream: @@ -64,6 +70,9 @@ barney false true betty true true fred true true wilma 1 false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Or perhaps to flag badly formatted data outside the output stream: @@ -80,6 +89,9 @@ betty true fred true wilma 1 Malformed at NR=4 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream A third way is to abort the process on first instance of bad data: diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 4a6275403..99c9f546b 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -70,6 +70,9 @@ point_longitude -81.707664 line Residential construction Masonry point_granularity 3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream A few simple queries: @@ -88,6 +91,9 @@ BAKER COUNTY 70 BRADFORD COUNTY 31 HAMILTON COUNTY 35 UNION COUNTY 15 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -97,6 +103,9 @@ UNION COUNTY        15
 line        count
 Residential 30838
 Commercial  5796
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Categorization of total insured value: @@ -108,6 +117,9 @@ Categorization of total insured value: tiv_2012_min 73.37 tiv_2012_mean 2571004.0973420837 tiv_2012_max 1701000000 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -121,6 +133,9 @@ Wood                Residential 73.37        113493.01704925536 649046.12
 Reinforced Concrete Commercial  6416016.01   20212428.681839883 60570000
 Reinforced Masonry  Commercial  1287817.34   4621372.981117158  16650000
 Steel Frame         Commercial  29790000     133492500          1701000000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -135,6 +150,9 @@ hu_site_deductible_p90  76.5
 hu_site_deductible_p95  6829.2
 hu_site_deductible_p99  126270
 hu_site_deductible_p100 7380000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -153,6 +171,7 @@ BROWARD COUNTY      0                      148500                 3258900
 CALHOUN COUNTY      0                      33339.6                33339.6
 CHARLOTTE COUNTY    5400                   52650                  250994.7
 CITRUS COUNTY       1332.9                 79974.9                483785.1
+Memory profile started.
 
@@ -165,6 +184,9 @@ tiv_2011_tiv_2012_ols_m 0.9835583980337723
 tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
 tiv_2011_tiv_2012_r2    0.9468258417320189
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -207,6 +229,9 @@ tiv_2011_tiv_2012_ols_m 1.2301
 tiv_2011_tiv_2012_ols_b -596.6239
 tiv_2011_tiv_2012_ols_n 657
 tiv_2011_tiv_2012_r2    0.9335
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Color/shape data @@ -241,6 +266,9 @@ red circle 1 84 0.209017 0.290052 0.138103 5.065034 red square 0 243 0.956274 0.746720 0.775542 7.117831 purple triangle 0 257 0.435535 0.859129 0.812290 5.753095 red square 0 322 0.201551 0.953110 0.771991 5.612050 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Look at uncategorized stats (using [creach](https://github.com/johnkerl/scripts/blob/master/fundam/creach) for spacing). @@ -263,6 +291,9 @@ v_min -0.092709 v_mean 0.49778696586624427 v_max 1.0725 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The histogram shows the different distribution of 0/1 flags: @@ -284,6 +315,9 @@ bin_lo bin_hi flag_count u_count v_count 0.8900000000000002 0.9900000000000002 0 995 993 0.9900000000000002 1.0900000000000003 4020 1013 939 1.0900000000000003 1.1900000000000002 0 0 25 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Look at univariate stats by color and shape. In particular, color-dependent flag probabilities pop out, aligning with their original Bernoulli probabilities from the data-generator script: @@ -301,6 +335,9 @@ orange 0 0.5214521452145214 1 0.001235 0.49053241584158375 0.9988 purple 0 0.09019264448336252 1 0.000266 0.49400496322241666 0.999647 0.000364 0.4970507127845888 0.999975 red 0 0.3031674208144796 1 0.000671 0.49255964641241273 0.999882 -0.092709 0.4965350941607402 1.0725 yellow 0 0.8924274593064402 1 0.0013 0.4971291160651098 0.999923 0.000711 0.5106265987261144 0.999919 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -313,6 +350,9 @@ shape    flag_min flag_mean           flag_max u_min    u_mean              u_ma
 circle   0        0.3998456194519491  1        0.000044 0.498554505982246   0.999923 -0.092709 0.49552416171362396 1.0725
 square   0        0.39611178614823817 1        0.000188 0.4993854558930749  0.999969 0.000089  0.49653825929526124 0.999975
 triangle 0        0.4015421115065243  1        0.000881 0.49685854240806604 0.999661 0.000717  0.5010495260972719  0.999995
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Look at bivariate stats by color and shape. In particular, `u,v` pairwise correlation for red circles pops out: @@ -323,6 +363,9 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
           u_v_corr              w_x_corr
 0.1334180491027861 -0.011319841199866178
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -350,4 +393,7 @@ orange triangle -0.030456661186085785    -0.1318699981926352
 yellow   circle  -0.06477331572781474    0.07369449819706045
   blue   circle  -0.10234761901929677  -0.030528539069837757
  green triangle  -0.10901825107358765   -0.04848782060162929
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/date-time-examples.md b/docs/src/date-time-examples.md index 5bcbdac01..3453f1a6e 100644 --- a/docs/src/date-time-examples.md +++ b/docs/src/date-time-examples.md @@ -40,6 +40,9 @@ we can use [strptime](reference-verbs.md#strptime) to parse the date field into
 date,event
 2018-03-07,discovery
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Caveat: localtime-handling in timezones with DST is still a work in progress; see [https://github.com/johnkerl/miller/issues/170](https://github.com/johnkerl/miller/issues/170) . See also [https://github.com/johnkerl/miller/issues/208](https://github.com/johnkerl/miller/issues/208) -- thanks @aborruso! @@ -105,6 +108,9 @@ Then, filter for adjacent difference not being 86400 (the number of seconds in a
 n=774,date=2014-04-19,qoh=130140,datestamp=1397865600,datestamp_delta=259200
 n=1119,date=2015-03-31,qoh=181625,datestamp=1427760000,datestamp_delta=172800
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Given this, it's now easy to see where the gaps are: @@ -124,6 +130,9 @@ n=777,1=2014-04-21,2=130368 n=778,1=2014-04-22,2=130368 n=779,1=2014-04-23,2=130849 n=780,1=2014-04-24,2=131026 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -141,4 +150,7 @@ n=1122,1=2015-04-02,2=181718
 n=1123,1=2015-04-03,2=181835
 n=1124,1=2015-04-04,2=182104
 n=1125,1=2015-04-05,2=182528
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/dkvp-examples.md b/docs/src/dkvp-examples.md index 2f3e3b510..e4e1c8238 100644 --- a/docs/src/dkvp-examples.md +++ b/docs/src/dkvp-examples.md @@ -147,6 +147,9 @@ eks pan 2 0.522151 ekspan 2.522151 str str int float str float wye wye 3 0.338318 wyewye 3.338318 str str int float str float eks wye 4 0.134188 ekswye 4.134188 str str int float str float wye pan 5 0.863624 wyepan 5.863624 str str int float str float +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## DKVP I/O in Ruby @@ -265,4 +268,7 @@ eks pan 2 0.522151 ekspan 2.522151 String String Integer Float String Float wye wye 3 0.338318 wyewye 3.338318 String String Integer Float String Float eks wye 4 0.134188 ekswye 4.134188 String String Integer Float String Float wye pan 5 0.863624 wyepan 5.863624 String String Integer Float String Float +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/file-formats.md b/docs/src/file-formats.md index 8611a7a22..0fe20e71b 100644 --- a/docs/src/file-formats.md +++ b/docs/src/file-formats.md @@ -172,6 +172,9 @@ An **array of single-level objects** is, quite simply, **a table**: "shape": "square" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -190,6 +193,9 @@ An **array of single-level objects** is, quite simply, **a table**:
   "v": 0.001257
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Single-level JSON data goes back and forth between JSON and tabular formats @@ -202,6 +208,9 @@ in the direct way: color u v yellow 0.632170 0.988721 red 0.219668 0.001257 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -219,6 +228,9 @@ purple triangle 0    65 0.684281 0.582372 0.801405 5.805148
 yellow circle   1    73 0.603365 0.423708 0.639785 7.006414
 yellow circle   1    87 0.285656 0.833516 0.635058 6.350036
 purple square   0    91 0.259926 0.824322 0.723735 6.854221
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### Nested JSON objects @@ -260,6 +272,9 @@ input as well as output in JSON format, JSON structure is preserved throughout t } } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream But if the input format is JSON and the output format is not (or vice versa) then key-concatenation applies: @@ -273,6 +288,9 @@ flag i attributes.color attributes.shape values.u values.v values.w values.x 1 15 red square 0.219668 0.001257 0.792778 2.944117 1 16 red circle 0.209017 0.290052 0.138103 5.065034 0 48 red square 0.956274 0.746720 0.775542 7.117831 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This is discussed in more detail on the page [Flatten/unflatten: JSON vs. tabular formats](flatten-unflatten.md). @@ -319,6 +337,9 @@ Miller handles this: "rate": 0.0130 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -327,6 +348,9 @@ Miller handles this:
 
 {"color": "yellow", "shape": "triangle", "flag": "true", "k": 1, "index": 11, "quantity": 43.6498, "rate": 9.8870}
 {"color": "red", "shape": "square", "flag": "true", "k": 2, "index": 15, "quantity": 79.2778, "rate": 0.0130}
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that for _input_ data, either is acceptable: whether you use `--ijson` or `--ijsonl`, Miller @@ -348,6 +372,9 @@ eks,pan,2,0.758679,0.522151 wye,wye,3,0.204603,0.338318 eks,wye,4,0.381399,0.134188 wye,pan,5,0.573288,0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -360,6 +387,9 @@ eks pan 2 0.758679 0.522151
 wye wye 3 0.204603 0.338318
 eks wye 4 0.381399 0.134188
 wye pan 5 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that while Miller is a line-at-a-time processor and retains input lines in memory only where necessary (e.g. for sort), pretty-print output requires it to accumulate all input lines (so that it can compute maximum column widths) before producing any output. This has two consequences: (a) pretty-print output won't work on `tail -f` contexts, where Miller will be waiting for an end-of-file marker which never arrives; (b) pretty-print output for large files is constrained by available machine memory. @@ -381,6 +411,9 @@ For output only (this isn't supported in the input-scanner as of 5.0.0) you can | eks | wye | 4 | 0.381399 | 0.134188 | | wye | pan | 5 | 0.573288 | 0.863624 | +-----+-----+---+----------+----------+ +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Markdown tabular @@ -398,6 +431,9 @@ Markdown format looks like this: | wye | wye | 3 | 0.204603 | 0.338318 | | eks | wye | 4 | 0.381399 | 0.134188 | | wye | pan | 5 | 0.573288 | 0.863624 | +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream which renders like this when dropped into various web tools (e.g. github comments): @@ -486,6 +522,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Such data are easy to generate, e.g. in Ruby with @@ -551,6 +590,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Example with index-numbered input: @@ -571,6 +613,9 @@ early light 1=oh,2=say,3=can,4=you,5=see 1=by,2=the,3=dawn's 1=early,2=light +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Example with index-numbered input and output: @@ -591,6 +636,9 @@ early light say can the dawn's light +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Data-conversion keystroke-savers @@ -681,6 +729,9 @@ type quantity green 678.12 purple 456.78 orange 123.45 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -692,4 +743,7 @@ type   quantity
 green  678.12
 purple 456.78
 orange 123.45
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/flatten-unflatten.md b/docs/src/flatten-unflatten.md index 7a3c138d2..7f12042df 100644 --- a/docs/src/flatten-unflatten.md +++ b/docs/src/flatten-unflatten.md @@ -103,6 +103,9 @@ Flattened to CSV format: a,b.x,b.y 1,2,3 4,5,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Flattened to pretty-print format: @@ -114,6 +117,9 @@ Flattened to pretty-print format: a b.x b.y 1 2 3 4 5 6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Using flatten-separator `:` instead of the default `.`: @@ -125,6 +131,9 @@ Using flatten-separator `:` instead of the default `.`: a b:x b:y 1 2 3 4 5 6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If the maps are more deeply nested, each level of map keys is joined in: @@ -150,6 +159,9 @@ If the maps are more deeply nested, each level of map keys is joined in: a b.s.w b.s.x b.t.y b.t.z 1 2 3 4 5 6 7 8 9 10 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream **Unflattening** is simply the reverse -- from non-JSON back to JSON: @@ -175,6 +187,9 @@ a b.s.w b.s.x b.t.y b.t.z a,b.x,b.y 1,2,3 4,5,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -197,6 +212,12 @@ a,b.x,b.y
   }
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Converting arrays between JSON and non-JSON @@ -226,6 +247,9 @@ If the input data contains arrays, these are also flattened similarly: the a b.1 b.2 1 2 3 4 5 6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If the arrays are more deeply nested, each level of arrays keys is joined in: @@ -251,6 +275,9 @@ If the arrays are more deeply nested, each level of arrays keys is joined in: a b.1.1 b.1.2 b.2.1 b.2.2 1 2 3 4 5 6 7 8 9 10 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream In the nested-data examples shown here, nested map values are shown containing @@ -280,6 +307,9 @@ though not shown here) nested map values can contain arrays, and vice versa. a,b.1,b.2 1,2,3 4,5,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -296,6 +326,12 @@ a,b.1,b.2
   "b": [5, 6]
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Auto-inferencing of arrays on unflatten @@ -323,6 +359,9 @@ a.1,a.2,a.3 "a": [4, 5, 6] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -346,6 +385,9 @@ a.1,a.3,a.5
   }
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Manual control @@ -393,6 +435,9 @@ Using JSON output, we can see that `splita` has produced an array-valued field n "components": ["nadir", "west", "our", "org"] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Using CSV output, with default auto-flatten, we get `components.1` through `components.4`: @@ -404,6 +449,9 @@ Using CSV output, with default auto-flatten, we get `components.1` through `comp host,status,components.1,components.2,components.3,components.4 apoapsis.east.our.org,up,apoapsis,east,our,org nadir.west.our.org,down,nadir,west,our,org +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Using CSV output, without default auto-flatten, we get a JSON-stringified encoding of the `components` field: @@ -415,6 +463,9 @@ Using CSV output, without default auto-flatten, we get a JSON-stringified encodi host,status,components apoapsis.east.our.org,up,"[""apoapsis"", ""east"", ""our"", ""org""]" nadir.west.our.org,down,"[""nadir"", ""west"", ""our"", ""org""]" +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Now suppose we ran this @@ -435,6 +486,9 @@ host nadir.west.our.org status down a ["nadir", "west", "our", "org"] b ["nadir", "west", "our", "org"] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream into a file [data/hostnames.xtab](./data/hostnames.xtab): @@ -476,6 +530,9 @@ leave `b` JSON-stringified: "b": "[\"nadir\", \"west\", \"our\", \"org\"]" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also the diff --git a/docs/src/installing-miller.md b/docs/src/installing-miller.md index b5ae44227..926fc94c6 100644 --- a/docs/src/installing-miller.md +++ b/docs/src/installing-miller.md @@ -70,6 +70,9 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -87,6 +90,9 @@ purple triangle false 7  65    80.1405  5.8240
 yellow circle   true  8  73    63.9785  4.2370
 yellow circle   true  9  87    63.5058  8.3350
 purple square   false 10 91    72.3735  8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If you run into issues on these checks, please check out the resources on the [community page](community.md) for help. diff --git a/docs/src/internationalization.md b/docs/src/internationalization.md index 5fadcab1e..520025c42 100644 --- a/docs/src/internationalization.md +++ b/docs/src/internationalization.md @@ -50,6 +50,9 @@ Support for internationalization includes: κόκκινο κύκλος αληθινό 3 16 13.8103 2.9010 κίτρινο κύκλος αληθινό 8 73 63.9785 4.2370 κίτρινο κύκλος αληθινό 9 87 63.5058 8.3350 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -67,6 +70,9 @@ Support for internationalization includes:
 κόκκινο τετράγωνο ψευδές  6  64      77.1991  9.5310
 μοβ     τρίγωνο   ψευδές  7  65      80.1405  5.8240
 μοβ     τετράγωνο ψευδές  10 91      72.3735  8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -84,4 +90,7 @@ Support for internationalization includes:
 желтый     КРУГ        истина 8  73     63.9785    4.2370   6
 желтый     КРУГ        истина 9  87     63.5058    8.3350   6
 фиолетовый КВАДРАТ     ложь   10 91     72.3735    8.2430   10
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/keystroke-savers.md b/docs/src/keystroke-savers.md index 1cc2485a1..709becf14 100644 --- a/docs/src/keystroke-savers.md +++ b/docs/src/keystroke-savers.md @@ -27,6 +27,9 @@ In our examples so far we've often made use of `mlr --icsv --opprint` or `mlr -- color shape flag k index quantity rate yellow triangle true 1 11 43.6498 9.8870 red square true 2 15 79.2778 0.0130 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -53,6 +56,9 @@ red    square   true 2 15    79.2778  0.0130
   "rate": 0.0130
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can get the full list [here](file-formats.md#data-conversion-keystroke-savers). @@ -69,6 +75,9 @@ color shape flag k index quantity rate purple square false 10 91 72.3735 8.2430 yellow circle true 9 87 63.5058 8.3350 yellow circle true 8 73 63.9785 4.2370 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -79,6 +88,9 @@ shape  quantity
 square 72.3735
 circle 63.5058
 circle 63.9785
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If there's more than one input file, you can use `--mfrom`, then however many file names, then `--` to indicate the end of your input-file-name list: diff --git a/docs/src/log-processing-examples.md b/docs/src/log-processing-examples.md index ad0b2a333..c835c735e 100644 --- a/docs/src/log-processing-examples.md +++ b/docs/src/log-processing-examples.md @@ -86,6 +86,9 @@ type hit_mean A1 0.8571428571428571 A4 0.7142857142857143 A9 0.09090909090909091 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -102,6 +105,9 @@ time                 batch_size num_filtered time_delta num_filtered_delta
 2016-09-02T12:35:20Z 100        554          7          61
 2016-09-02T12:35:36Z 100        612          16         58
 2016-09-02T12:35:42Z 100        728          6          116
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Alternatively, we can simply group the similar data for a better look: @@ -158,6 +164,9 @@ time batch_size num_filtered 1472819720 100 554 1472819736 100 612 1472819742 100 728 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -212,6 +221,9 @@ time                 batch_size num_filtered
 2016-09-02T12:35:20Z 100        554
 2016-09-02T12:35:36Z 100        612
 2016-09-02T12:35:42Z 100        728
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Parsing log-file output diff --git a/docs/src/miller-programming-language.md b/docs/src/miller-programming-language.md index ed41a4f6c..dd2ac8356 100644 --- a/docs/src/miller-programming-language.md +++ b/docs/src/miller-programming-language.md @@ -41,6 +41,9 @@ purple triangle false 7 65 80.1405 5.8240 466.738272 yellow circle true 8 73 63.9785 4.2370 271.0769045 yellow circle true 9 87 63.5058 8.3350 529.3208430000001 purple square false 10 91 72.3735 8.2430 596.5747605000001 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream When we type that, a few things are happening: @@ -69,6 +72,9 @@ purple triangle false 7 6500 80.1405 5.8240 466.738272 yellow circle true 8 7300 63.9785 4.2370 271.0769045 yellow circle true 9 8700 63.5058 8.3350 529.3208430000001 purple square false 10 9100 72.3735 8.2430 596.5747605000001 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -89,6 +95,9 @@ purple triangle false 7  6500  80.1405  5.8240 466.738272
 yellow circle   true  8  7300  63.9785  4.2370 271.0769045
 yellow circle   true  9  8700  63.5058  8.3350 529.3208430000001
 purple square   false 10 9100  72.3735  8.2430 596.5747605000001
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
One of Miller's key features is the ability to express data-transformation right there at the keyboard, interactively. But if you find yourself using expressions repeatedly, you can put everything between the single quotes into a file and refer to that using `put -f`: @@ -116,6 +125,9 @@ purple triangle false 7 6500 80.1405 5.8240 466.738272 yellow circle true 8 7300 63.9785 4.2370 271.0769045 yellow circle true 9 8700 63.5058 8.3350 529.3208430000001 purple square false 10 9100 72.3735 8.2430 596.5747605000001 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This becomes particularly important on Windows. Quite a bit of effort was put into making Miller on Windows be able to handle the kinds of single-quoted expressions we're showing here, but if you get syntax-error messages on Windows using examples in this documentation, you can put the parts between single quotes into a file and refer to that using `mlr put -f` -- or, use the triple-double-quote trick as described in the [Miller on Windows page](miller-on-windows.md). @@ -146,6 +158,9 @@ purple square false 10 91 72.3735 8.2430 sum 652.7185 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If you want the end-block output to be the only output, and not include the records from the input data, you can use `mlr put -q`: @@ -156,6 +171,9 @@ If you want the end-block output to be the only output, and not include the reco
 sum
 652.7185
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -167,6 +185,9 @@ sum
   "sum": 652.7185
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -184,6 +205,9 @@ sum
   "sum": 652.7185
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
We'll see in the documentation for [stats1](reference-verbs.md#stats1) that there's a lower-keystroking way to get counts and sums of things: @@ -198,6 +222,9 @@ We'll see in the documentation for [stats1](reference-verbs.md#stats1) that ther "quantity_count": 10 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream So, take this sum/count example as an indication of the kinds of things you can do using Miller's programming language. @@ -249,6 +276,9 @@ a b c nf nr fnr filename filenum newnf 1 2 3 3 1 1 data/a.csv 1 8 4 5 6 3 2 2 data/a.csv 1 8 7 8 9 3 3 1 data/b.csv 2 8 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Functions and local variables @@ -283,6 +313,9 @@ purple triangle false 7 65 80.1405 5.8240 5040 yellow circle true 8 73 63.9785 4.2370 40320 yellow circle true 9 87 63.5058 8.3350 362880 purple square false 10 91 72.3735 8.2430 3628800 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Note that here we used the `-f` flag to `put` to load our function @@ -320,6 +353,9 @@ end {
 count_of_red sum_of_red
 4            247.84139999999996
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Miller's else-if is spelled `elif`. @@ -350,6 +386,9 @@ print a,b,c 1,2,3 4,5,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -364,6 +403,9 @@ KEY IS a VALUE IS 4
 KEY IS b VALUE IS 5
 KEY IS c VALUE IS 6
 
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Here we used the local variables `k` and `v`. Now we've seen four kinds of variables: @@ -416,6 +458,9 @@ For example, you can sum up all the `$a` values across records without having to "b": 5 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -438,4 +483,7 @@ For example, you can sum up all the `$a` values across records without having to
   "sum_of_a": 5
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/misc-examples.md b/docs/src/misc-examples.md index 0a528d436..5720c6853 100644 --- a/docs/src/misc-examples.md +++ b/docs/src/misc-examples.md @@ -177,9 +177,14 @@ And, suppose you want to compute the differences in the counters between adjacen First, rename counter columns to make them distinct: -
+
 mlr --csv rename count,previous_count data/previous_counters.csv > data/prevtemp.csv
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 cat data/prevtemp.csv
@@ -192,9 +197,14 @@ orange,694
 purple,12
 
-
+
 mlr --csv rename count,current_count data/current_counters.csv > data/currtemp.csv
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 cat data/currtemp.csv
@@ -223,6 +233,9 @@ orange 694            670           -24
 yellow 0              27            (error)
 blue   6838           6944          106
 purple 12             0             (error)
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See also the [record-heterogeneity page](record-heterogeneity.md). diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md index 13e17a06f..17f6f63bb 100644 --- a/docs/src/new-in-miller-6.md +++ b/docs/src/new-in-miller-6.md @@ -165,6 +165,9 @@ purple,square,false,10,91,72.3735,8.2430 yellow,triangle,true,1,11,43.6498,9.8870 purple,triangle,false,5,51,81.2290,8.5910 purple,triangle,false,7,65,80.1405,5.8240 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
## Data processing @@ -226,6 +229,9 @@ For example (see [https://github.com/johnkerl/miller/issues/178](https://github. "a": "0123" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -238,6 +244,9 @@ For example (see [https://github.com/johnkerl/miller/issues/178](https://github.
   "y": 1.230000000
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### Deduping of repeated field names @@ -339,6 +348,9 @@ This works in Miller 6 (and worked in Miller 5 as well) and is supported:
 input=1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Please see the [section on emit statements](reference-dsl-output-statements.md#emit1-and-emitemitpemitf) diff --git a/docs/src/operating-on-all-fields.md b/docs/src/operating-on-all-fields.md index 452f4486d..0d9df3571 100644 --- a/docs/src/operating-on-all-fields.md +++ b/docs/src/operating-on-all-fields.md @@ -40,6 +40,9 @@ a_b_c,def,g_h_i 123,4567,890 2468,1357,3579 9987,3312,4543 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -50,6 +53,9 @@ a_b_c def  g_h_i
 123   4567 890
 2468  1357 3579
 9987  3312 4543
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can also do this with a for-loop: @@ -73,6 +79,9 @@ a_b_c def g_h_i 123 4567 890 2468 1357 3579 9987 3312 4543 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Bulk rename of fields with carriage returns @@ -106,6 +115,9 @@ field A,field B 1,2 3,3 6,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Search-and-replace over all fields @@ -137,6 +149,9 @@ for (k in $*) { a,b,c thX quick,brown fox,jumpXd ovXr,thX,lazy dogs +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Full field renames and reassigns @@ -177,4 +192,7 @@ z=0.758679,KEYFIELD=eks,i=3,b=pan,y=0.758679,x=0.522151 z=0.204603,KEYFIELD=wye,i=6,b=wye,y=0.204603,x=0.338318 z=0.381399,KEYFIELD=eks,i=10,b=wye,y=0.381399,x=0.134188 z=0.573288,KEYFIELD=wye,i=15,b=pan,y=0.573288,x=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/operating-on-all-records.md b/docs/src/operating-on-all-records.md index 6663f1c18..483dba319 100644 --- a/docs/src/operating-on-all-records.md +++ b/docs/src/operating-on-all-records.md @@ -86,6 +86,9 @@ after all the input is read. "sum": 119 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream And if all we want is the final output and not the input data, we can use `put @@ -111,6 +114,9 @@ And if all we want is the final output and not the input data, we can use `put "sum": 119 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream As discussed a bit more on the page on [streaming processing and memory @@ -173,6 +179,9 @@ cat,54 "sum": 119 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The downside to this, of course, is that this retains all records (plus data-structure overhead) in memory, so you're limited to processing files that fit in your computer's memory. The upside, though, is that you can do random access over the records using things like @@ -232,6 +241,9 @@ The third option is to retain records in an [array](reference-main-arrays.md), t "sum": 119 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Just as with the retain-as-map approach, the downside is the overhead of @@ -276,6 +288,9 @@ array will have [null-gaps](reference-main-arrays.md) in it: ] [ ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream You can index `@records` by `@count` rather than `NR` to get a contiguous array: @@ -319,6 +334,9 @@ You can index `@records` by `@count` rather than `NR` to get a contiguous array: "sum": 91 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If you use a map to retain records, then this is a non-issue: maps can retain whatever values you like: @@ -360,6 +378,9 @@ If you use a map to retain records, then this is a non-issue: maps can retain wh "sum": 91 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Do note that Miller [maps](reference-main-maps.md) preserve insertion order, so @@ -404,6 +425,9 @@ interested in: "sum": 91 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Sorting diff --git a/docs/src/parsing-and-formatting-fields.md b/docs/src/parsing-and-formatting-fields.md index 1f2d5426e..d9f13f1b2 100644 --- a/docs/src/parsing-and-formatting-fields.md +++ b/docs/src/parsing-and-formatting-fields.md @@ -53,6 +53,9 @@ Robert,"Bob,Bobby,Biker","2,4,6" "codes": "2,4,6" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Then we can use the [`splita`](reference-dsl-builtin-functions.md#splita) function to split the @@ -74,6 +77,9 @@ Then we can use the [`splita`](reference-dsl-builtin-functions.md#splita) functi "codes": "2,4,6" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Likewise we can split the `codes` field. Since these look like numbers, we can again use `splita` @@ -97,6 +103,9 @@ substrings, with no type inference: "codes": [2, 4, 6] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -115,6 +124,9 @@ substrings, with no type inference:
   "codes": ["2", "4", "6"]
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
We can do operations on the array, then use [joinv](reference-dsl-builtin-functions.md#joinv) to put them @@ -140,6 +152,9 @@ back together: "codes": "200,400,600" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -153,6 +168,9 @@ back together:
 name,nicknames,codes
 Alice,"Allie,Skater","100,300,500"
 Robert,"Bob,Bobby,Biker","200,400,600"
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The full list of split functions includes @@ -195,6 +213,9 @@ host,status xy01.east,up ab02.west,down ac91.west,up +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Flatten/unflatten: representing arrays in CSV @@ -219,6 +240,9 @@ _flatten/unflatten strategy_: array-valued fields are turned into multiple CSV c "codes": ["2", "4", "6"] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -228,6 +252,9 @@ _flatten/unflatten strategy_: array-valued fields are turned into multiple CSV c
 name,nicknames,codes.1,codes.2,codes.3
 Alice,"Allie,Skater",1,3,5
 Robert,"Bob,Bobby,Biker",2,4,6
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See the [flatten/unflatten: converting between JSON and tabular formats¶](flatten-unflatten.md) @@ -279,6 +306,9 @@ stamp,event "pieces": [5, 19, "07", 56] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -293,6 +323,9 @@ stamp      event description
 5-18:53:22 close 5 day(s) 18 hour(s) 53 minute(s) 22 seconds(s)
 5-19:07:34 open  5 day(s) 19 hour(s) 07 minute(s) 34 seconds(s)
 5-19:07:56 close 5 day(s) 19 hour(s) 07 minute(s) 56 seconds(s)
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Using regular expressions and capture groups @@ -312,6 +345,9 @@ stamp event description 5-18:53:22 close 5 day(s) 18 hour(s) 53 minute(s) 22 seconds(s) 5-19:07:34 open 5 day(s) 19 hour(s) 07 minute(s) 34 seconds(s) 5-19:07:56 close 5 day(s) 19 hour(s) 07 minute(s) 56 seconds(s) +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Special case: timestamps @@ -337,6 +373,9 @@ sec dhms 100 1m40s 10000 2h46m40s 1000000 11d13h46m40s +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Please see @@ -353,6 +392,9 @@ One way to handle currencies is to sub out the currency marker (like `$`) as wel
 d=1234.56
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Nesting and unnesting fields @@ -368,6 +410,9 @@ For example: name nicknames codes Alice Allie,Skater 1,3,5 Robert Bob,Bobby,Biker 2,4,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -380,6 +425,9 @@ Alice  Skater    1,3,5
 Robert Bob       2,4,6
 Robert Bobby     2,4,6
 Robert Biker     2,4,6
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See [documentation on the nest verb](reference-verbs.md#nest) for general information on how to do this. diff --git a/docs/src/performance.md b/docs/src/performance.md index a6de11989..cb03424f1 100644 --- a/docs/src/performance.md +++ b/docs/src/performance.md @@ -16,6 +16,8 @@ Quick links: # Performance +See also the [performance-benchmarks section](new-in-miller-6.md#performance-benchmarks). + ## Disclaimer In a previous version of this page, I compared Miller to some items in the Unix toolkit in terms of run time. But such comparisons are very much not apples-to-apples: diff --git a/docs/src/performance.md.in b/docs/src/performance.md.in index d1123e5da..8e0919d4d 100644 --- a/docs/src/performance.md.in +++ b/docs/src/performance.md.in @@ -1,5 +1,7 @@ # Performance +See also the [performance-benchmarks section](new-in-miller-6.md#performance-benchmarks). + ## Disclaimer In a previous version of this page, I compared Miller to some items in the Unix toolkit in terms of run time. But such comparisons are very much not apples-to-apples: diff --git a/docs/src/programming-examples.md b/docs/src/programming-examples.md index a8e42db58..e2f92944a 100644 --- a/docs/src/programming-examples.md +++ b/docs/src/programming-examples.md @@ -89,6 +89,9 @@ end { 83 89 97 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Mandelbrot-set generator @@ -228,6 +231,9 @@ CHARS = @X*o-. @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream But using a very small font size (as small as my Mac will let me go), and by choosing the coordinates to zoom in on a particular part of the complex plane, we can get a nice little picture: diff --git a/docs/src/questions-about-joins.md b/docs/src/questions-about-joins.md index b8bde2d46..32d1aec1d 100644 --- a/docs/src/questions-about-joins.md +++ b/docs/src/questions-about-joins.md @@ -30,6 +30,9 @@ hostname ipaddr nadir.east.our.org 10.3.1.18 zenith.west.our.org 10.3.1.27 apoapsis.east.our.org 10.4.5.94 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -46,6 +49,9 @@ ipaddr    timestamp  bytes
 10.3.1.27 1448762599 0
 10.3.1.18 1448762598 73425
 10.4.5.94 1448762599 12200
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -57,6 +63,9 @@ ipaddr    hostname              timestamp  bytes
 10.4.5.94 apoapsis.east.our.org 1448762579 17445
 10.4.5.94 apoapsis.east.our.org 1448762589 8899
 10.4.5.94 apoapsis.east.our.org 1448762599 12200
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The issue is that Miller's `join`, by default (before 5.1.0), took input sorted (lexically ascending) by the sort keys on both the left and right files. This design decision was made intentionally to parallel the Unix/Linux system `join` command, which has the same semantics. The benefit of this default is that the joiner program can stream through the left and right files, needing to load neither entirely into memory. The drawback, of course, is that is requires sorted input. @@ -77,6 +86,9 @@ ipaddr hostname timestamp bytes 10.3.1.27 zenith.west.our.org 1448762599 0 10.3.1.18 nadir.east.our.org 1448762598 73425 10.4.5.94 apoapsis.east.our.org 1448762599 12200 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream General advice is to make sure the left-file is relatively small, e.g. containing name-to-number mappings, while saving large amounts of data for the right file. @@ -107,6 +119,9 @@ Joining on color the results are as expected: id,code,color 4,ff0000,red 2,00ff00,green +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream However, if we ask for left-unpaireds, since there's no `color` column, we get a row not having the same column names as the other: @@ -121,6 +136,9 @@ id,code,color id,code 3,0000ff +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream To fix this, we can use **unsparsify**: @@ -135,6 +153,9 @@ id,code,color 4,ff0000,red 2,00ff00,green 3,0000ff, +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Thanks to @aborruso for the tip! @@ -199,4 +220,7 @@ id status name task 20 idle Carol mix 10 idle Bob knead 30 occupied Alice clean +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/questions-about-then-chaining.md b/docs/src/questions-about-then-chaining.md index 7f558ee8a..b1c937051 100644 --- a/docs/src/questions-about-then-chaining.md +++ b/docs/src/questions-about-then-chaining.md @@ -45,6 +45,9 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream After that, run it with the next `then` step included: @@ -59,6 +62,9 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Now if you use `then` to include another verb after that, the columns `Status`, `Payment_Type`, and `count` will be the input to that verb. @@ -75,6 +81,12 @@ paid cash 2 pending debit 1 pending credit 1 paid debit 1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## NR is not consecutive after then-chaining @@ -100,6 +112,9 @@ why don't I see `NR=1` and `NR=2` here??
 a=eks,b=pan,i=2,x=0.758679,y=0.522151,NR=2
 a=wye,b=pan,i=5,x=0.573288,y=0.863624,NR=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The reason is that `NR` is computed for the original input records and isn't dynamically updated. By contrast, `NF` is dynamically updated: it's the number of fields in the current record, and if you add/remove a field, the value of `NF` will change: @@ -109,6 +124,9 @@ The reason is that `NR` is computed for the original input records and isn't dyn
 nf1=3,u=4,nf2=5,nf3=3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
`NR`, by contrast (and `FNR` as well), retains the value from the original input stream, and records may be dropped by a `filter` within a `then`-chain. To recover consecutive record numbers, you can use out-of-stream variables as follows: @@ -130,6 +148,9 @@ nf1=3,u=4,nf2=5,nf3=3 a b i x y nr1 nr2 eks pan 2 0.758679 0.522151 2 1 wye pan 5 0.573288 0.863624 5 2 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Or, simply use `mlr cat -n`: @@ -140,4 +161,7 @@ Or, simply use `mlr cat -n`:
 n=1,a=eks,b=pan,i=2,x=0.758679,y=0.522151
 n=2,a=wye,b=pan,i=5,x=0.573288,y=0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/randomizing-examples.md b/docs/src/randomizing-examples.md index 3a389e9a7..e30e5a189 100644 --- a/docs/src/randomizing-examples.md +++ b/docs/src/randomizing-examples.md @@ -117,6 +117,9 @@ bin_lo bin_hi u_count s_count 1.88 1.92 [64]#...................[9554] [326]#...................[3703] 1.92 1.96 [64]#...................[9554] [326]#...................[3703] 1.96 2 [64]#...................[9554] [326]#...................[3703] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Randomly selecting words from a list diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index ba80fc3c0..ec3540480 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -41,6 +41,9 @@ a,b,c 1,2,3 4,5,6 7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream It has three records (written here using JSON Lines formatting): @@ -52,6 +55,9 @@ It has three records (written here using JSON Lines formatting): {"a": 1, "b": 2, "c": 3} {"a": 4, "b": 5, "c": 6} {"a": 7, "b": 8, "c": 9} +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Here every row has the same keys, in the same order: `a,b,c`. @@ -66,6 +72,9 @@ a b c 1 2 3 4 5 6 7 8 9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ### Fillable data @@ -80,6 +89,9 @@ a,b,c 1,2,3 4,,6 ,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -89,6 +101,9 @@ a,b,c
 {"a": 1, "b": 2, "c": 3}
 {"a": 4, "b": "", "c": 6}
 {"a": "", "b": 8, "c": 9}
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
This example is still homogeneous, though: every row has the same keys, in the same order: `a,b,c`. @@ -105,6 +120,9 @@ a b c 1 2 3 4 filler 6 filler 8 9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ### Ragged data @@ -162,6 +180,9 @@ with 1) for too-long rows: "4": 10 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ### Irregular data @@ -199,6 +220,9 @@ the keys: {"a": 1, "b": 2, "c": 3} {"a": 4, "b": 5, "c": 6} {"a": 7, "b": 8, "c": 9} +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The `regularize` verb tries to re-order subsequent rows to look like the first @@ -232,6 +256,9 @@ data for items which are present, but won't log data for items which aren't. "reimaged": true } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This data is called **sparse** (from the [data-storage term](https://en.wikipedia.org/wiki/Sparse_matrix)). @@ -266,6 +293,9 @@ every record has the same keys: "reimaged": true } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Since this data is now homogeneous (rectangular), it pretty-prints nicely: @@ -278,6 +308,9 @@ host status volume purpose reimaged xy01.east running /dev/sda1 - - xy92.west running - - - xy55.east - /dev/sda1 failover true +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Reading and writing heterogeneous data @@ -317,6 +350,9 @@ For these formats, record-heterogeneity comes naturally: xy01.east running /dev/sda1 xy92.west running failover xy55.east /dev/sda1 true +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -334,6 +370,9 @@ purpose  failover
 host     xy55.east
 volume   /dev/sda1
 reimaged true
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -343,6 +382,9 @@ reimaged true
 host=xy01.east,status=running,volume=/dev/sda1
 host=xy92.west,status=running
 purpose=failover,host=xy55.east,volume=/dev/sda1,reimaged=true
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Even then, we may wish to put like with like, using the [`group-like`](reference-verbs.md#group-like) verb: @@ -356,6 +398,9 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -367,6 +412,9 @@ resource=/path/to/second/file,loadsec=0.32,ok=true
 resource=/some/other/path,loadsec=0.97,ok=false
 record_count=100,resource=/path/to/file
 record_count=150,resource=/path/to/second/file
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### Rectangular file formats: CSV and pretty-print @@ -429,6 +477,9 @@ record_count resource resource loadsec ok /some/other/path 0.97 false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -443,6 +494,9 @@ resource             loadsec ok
 record_count resource
 100          /path/to/file
 150          /path/to/second/file
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Miller handles explicit header changes as just shown. If your CSV input contains ragged data -- if there are implicit header changes (no intervening blank line and new header line) as seen above -- you can use `--allow-ragged-csv-input` (or keystroke-saver `--ragged`). @@ -457,6 +511,9 @@ a,b,c a,b,c,4 7,8,9,10 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Processing heterogeneous data @@ -493,4 +550,7 @@ count=300,color=blue count=450 count=500,color=green count=600 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index bbb65a5eb..6d803f1b8 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -37,6 +37,9 @@ PURPLE tr**ngl* false 7 65 80.1405 5.8240 YELLOW c*rcl* true 8 73 63.9785 4.2370 YELLOW c*rcl* true 9 87 63.5058 8.3350 PURPLE sq**r* false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream the `toupper` and `gsub` bits are _functions_. diff --git a/docs/src/reference-dsl-control-structures.md b/docs/src/reference-dsl-control-structures.md index 16de01613..032f2336b 100644 --- a/docs/src/reference-dsl-control-structures.md +++ b/docs/src/reference-dsl-control-structures.md @@ -29,6 +29,9 @@ x=0 x=1 x=2 x=3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -40,6 +43,9 @@ x=0
 x=1,y=0,z=0
 x=2,y=0.3010299956639812,z=0.5486620049392715
 x=3,y=0.4771212547196624,z=0.6907396432228734
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -49,6 +55,9 @@ x=3,y=0.4771212547196624,z=0.6907396432228734
 a=abc_123
 a=some other name
 a=xyz_789
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -62,6 +71,9 @@ a=xyz_789
 a=abc_123,b=left_abc,c=right_123
 a=some other name
 a=xyz_789,b=left_xyz,c=right_789
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
This produces heteregenous output which Miller, of course, has no problems with (see [Record Heterogeneity](record-heterogeneity.md)). But if you want homogeneous output, the curly braces can be replaced with a semicolon between the expression and the body statements. This causes `put` to evaluate the boolean expression (along with any side effects, namely, regex-captures `\1`, `\2`, etc.) but doesn't use it as a criterion for whether subsequent assignments should be executed. Instead, subsequent assignments are done unconditionally: @@ -78,6 +90,9 @@ a b c abc_123 left_abc right_123 some other name left_ right_ xyz_789 left_xyz right_789 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Note that pattern-action blocks are just a syntactic variation of if-statements. The following do the same thing: @@ -136,6 +151,9 @@ Miller's `while` and `do-while` are unsurprising in comparison to various langua
 x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -151,6 +169,9 @@ x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
 
 x=1,y=2,3=,4=,5=,foo=bar
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
A `break` or `continue` within nested conditional blocks or if-statements will, @@ -219,6 +240,9 @@ NR = 5 key: i value: 5 key: x value: 0.573288 key: y value: 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -234,6 +258,9 @@ NR = 5
 
 key: a valuetype: int
 key: b valuetype: map
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that the value corresponding to a given key may be gotten as through a **computed field name** using square brackets as in `$[e]` for stream records, or by indexing the looped-over variable using square brackets. @@ -256,6 +283,9 @@ value: 20 valuetype: string value: {} valuetype: map value: four valuetype: string value: true valuetype: bool +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
### Key-value for-loops @@ -294,6 +324,9 @@ label1 label2 f1 f2 f3 sum1 sum2 sum3 blue green 100 240 350 690 690 690 red green 120 11 195 326 326 326 yellow blue 140 0 240 380 380 380 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -306,6 +339,9 @@ eks pan 2 0.758679 0.522151 string string int    float  float
 wye wye 3 0.204603 0.338318 string string int    float  float
 eks wye 4 0.381399 0.134188 string string int    float  float
 wye pan 5 0.573288 0.863624 string string int    float  float
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that the value of the current field in the for-loop can be gotten either using the bound variable `value`, or through a **computed field name** using square brackets as in `$[key]`. @@ -331,6 +367,9 @@ eks pan 2 0.758679 0.522151 3.28083 13.12332 wye wye 3 0.204603 0.338318 3.542921 14.171684 eks wye 4 0.381399 0.134188 4.515587 18.062348 wye pan 5 0.573288 0.863624 6.4369119999999995 25.747647999999998 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream It can be confusing to modify the stream record while iterating over a copy of it, so instead you might find it simpler to use a local variable in the loop and only update the stream record after the loop: @@ -353,6 +392,9 @@ eks pan 2 0.758679 0.522151 3.28083 wye wye 3 0.204603 0.338318 3.542921 eks wye 4 0.381399 0.134188 4.515587 wye pan 5 0.573288 0.863624 6.4369119999999995 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream You can also start iterating on sub-maps of an out-of-stream or local variable; you can loop over nested keys; you can loop over all out-of-stream variables. The bound variables are bound to a copy of the sub-map as it was before the loop started. The sub-map is specified by square-bracketed indices after `in`, and additional deeper indices are bound to loop key-variables. The terminal values are bound to the loop value-variable whenever the keys are not too shallow. The value-variable may refer to a terminal (string, number) or it may be map-valued if the map goes deeper. Example indexing is as follows: @@ -396,6 +438,9 @@ That's confusing in the abstract, so a concrete example is in order. Suppose the } } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Then we can get at various values as follows: @@ -422,6 +467,9 @@ Then we can get at various values as follows: key=1,valuetype=int key=3,valuetype=map key=6,valuetype=map +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -446,6 +494,9 @@ key=6,valuetype=map
 
 key1=3,key2=4,valuetype=int
 key1=6,key2=7,valuetype=map
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -469,6 +520,9 @@ key1=6,key2=7,valuetype=map
 
 key1=7,key2=8,valuetype=int
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### C-style triple-for loops @@ -491,6 +545,9 @@ eks pan 2 0.758679 0.522151 3 wye wye 3 0.204603 0.338318 6 eks wye 4 0.381399 0.134188 10 wye pan 5 0.573288 0.863624 15 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -512,6 +569,9 @@ eks pan 2 0.758679 0.522151 3    3
 wye wye 3 0.204603 0.338318 6    7
 eks wye 4 0.381399 0.134188 10   15
 wye pan 5 0.573288 0.863624 15   31
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Notes: @@ -544,6 +604,9 @@ a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 x_sum=2.26476 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Since uninitialized out-of-stream variables default to 0 for addition/subtraction and 1 for multiplication when they appear on expression right-hand sides (not quite as in `awk`, where they'd default to 0 either way), the above can be written more succinctly as @@ -561,6 +624,9 @@ a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 x_sum=2.26476 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The **put -q** option suppresses printing of each output record, with only `emit` statements being output. So to get only summary outputs, you could write @@ -573,6 +639,9 @@ The **put -q** option suppresses printing of each output record, with only `emit
 x_sum=2.26476
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
We can do similarly with multiple out-of-stream variables: @@ -590,6 +659,9 @@ We can do similarly with multiple out-of-stream variables:
 x_count=5
 x_sum=2.26476
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
This is of course (see also [here](reference-dsl.md#verbs-compared-to-dsl)) not much different than @@ -599,6 +671,9 @@ This is of course (see also [here](reference-dsl.md#verbs-compared-to-dsl)) not
 x_count=5,x_sum=2.26476
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that it's a syntax error for begin/end blocks to refer to field names (beginning with `$`), since begin/end blocks execute outside the context of input records. diff --git a/docs/src/reference-dsl-differences.md b/docs/src/reference-dsl-differences.md index 5a9ef3015..430d2dec2 100644 --- a/docs/src/reference-dsl-differences.md +++ b/docs/src/reference-dsl-differences.md @@ -44,7 +44,7 @@ semicolon where one is needed . The parser tries to remind you about semicolons whenever there's a chance a missing semicolon might be involved in a parse error. -
+
 mlr --csv --from example.csv put -q '
   begin {
     @count = 0 # No semicolon required -- before closing curly brace
@@ -52,6 +52,11 @@ error.
   $x=1         # No semicolon required -- at end of expression
 '
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 mlr --csv --from example.csv put -q '
@@ -171,6 +176,9 @@ avoid this, use the dot operator for string-concatenation instead.
 
 [ a b c ]
 [abc]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Similarly, a final newline is printed for you; use [`printn`](reference-dsl-output-statements.md#print-statements) to avoid this. @@ -222,6 +230,9 @@ word,value apple,37 ball,28 cat,54 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -238,6 +249,9 @@ cat,54
 Record 1 has word apple
 Record 2 has word ball
 Record 3 has word cat
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Also, slices for arrays and strings are _doubly inclusive_: `x[3:5]` gets you diff --git a/docs/src/reference-dsl-filter-statements.md b/docs/src/reference-dsl-filter-statements.md index 0a2de3dd3..d8d289e90 100644 --- a/docs/src/reference-dsl-filter-statements.md +++ b/docs/src/reference-dsl-filter-statements.md @@ -25,6 +25,9 @@ You can use the `filter` DSL keyword within the `put` verb. In fact, the followi color,shape,flag,k,index,quantity,rate red,square,true,2,15,79.2778,0.0130 red,circle,true,3,16,13.8103,2.9010 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -34,6 +37,9 @@ red,circle,true,3,16,13.8103,2.9010
 color,shape,flag,k,index,quantity,rate
 red,square,true,2,15,79.2778,0.0130
 red,circle,true,3,16,13.8103,2.9010
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The former, of course, is a little easier to type. For another example: @@ -46,6 +52,9 @@ color,shape,flag,k,index,quantity,rate yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -56,4 +65,7 @@ color,shape,flag,k,index,quantity,rate
 yellow,circle,true,8,73,63.9785,4.2370
 yellow,circle,true,9,87,63.5058,8.3350
 purple,square,false,10,91,72.3735,8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-dsl-higher-order-functions.md b/docs/src/reference-dsl-higher-order-functions.md index 7861b646b..6eafa832a 100644 --- a/docs/src/reference-dsl-higher-order-functions.md +++ b/docs/src/reference-dsl-higher-order-functions.md @@ -78,6 +78,9 @@ Evens: Odds: [9, 3, 1, 5, 7] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Map examples: @@ -119,6 +122,9 @@ Values with last digit >= 5: "apple": 199, "bottle": 107 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## apply @@ -169,6 +175,9 @@ Cubes: Sorted cubes: [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -228,6 +237,9 @@ Same, with upcased keys:
   "DALE": 2197,
   "EMBER": 6967871
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## reduce @@ -292,6 +304,9 @@ Product of values: Concatenation of values: 2,9,10,3,1,4,5,8,7,6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -366,6 +381,9 @@ String-join of values:
 {
   "joined": "823,13,199,191,107"
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## fold @@ -409,6 +427,9 @@ Sum with fold and 0 initial value: Sum with fold and 1000000 initial value: 1000055 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -465,6 +486,9 @@ Sum of values with fold and 1000000 initial value:
 {
   "sum": 1001333
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## sort @@ -519,6 +543,9 @@ Ascending: Descending: [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Map examples: @@ -610,6 +637,9 @@ Descending by value: "bottle": 107, "dale": 13 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Please see the [sorting page](sorting.md) for more examples. @@ -633,6 +663,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -645,6 +678,9 @@ red    circle true  3  16    13.8103  2.9010
 red    square false 4  48    77.5542  7.4670
 red    square false 6  64    77.1991  9.5310
 purple square false 10 91    72.3735  8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -655,6 +691,9 @@ color shape  flag  k index quantity rate
 red   square true  2 15    79.2778  0.0130
 red   square false 4 48    77.5542  7.4670
 red   square false 6 64    77.1991  9.5310
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -672,6 +711,9 @@ purple triangle false 7  65    80.1405  5.8240 false
 yellow circle   true  8  73    63.9785  4.2370 false
 yellow circle   true  9  87    63.5058  8.3350 false
 purple square   false 10 91    72.3735  8.2430 false
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -682,6 +724,9 @@ color  shape    flag  k index quantity rate
 red    circle   true  3 16    13.8103  2.9010
 purple triangle false 5 51    81.2290  8.5910
 red    square   false 6 64    77.1991  9.5310
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
This last example could also be done using a map: @@ -699,6 +744,9 @@ color shape flag k index quantity rate red circle true 3 16 13.8103 2.9010 purple triangle false 5 51 81.2290 8.5910 red square false 6 64 77.1991 9.5310 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Combined examples @@ -722,6 +770,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -770,6 +821,9 @@ Sorted, then cubed:
 
 Sorted, then cubed, then summed:
 2589905
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Caveats @@ -792,6 +846,9 @@ instead of
 [3, 4, 5]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### No IIFEs @@ -831,6 +888,9 @@ but this does:
 2187
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
### Built-in functions currently unsupported as arguments @@ -871,4 +931,7 @@ but this does:
 [1, 0.9238795325112867, 0.7071067811865476, 0.38268343236508984]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-dsl-operators.md b/docs/src/reference-dsl-operators.md index ba14512e7..ea17d5238 100644 --- a/docs/src/reference-dsl-operators.md +++ b/docs/src/reference-dsl-operators.md @@ -111,6 +111,9 @@ bar.baz bar.baz [ ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This also works on the left-hand sides of assignment statements: @@ -144,6 +147,9 @@ This also works on the left-hand sides of assignment statements: } } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream A few caveats: @@ -159,6 +165,9 @@ A few caveats: 6989 [ ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream * However (awkwardly), if you want to use `.` for map-traversal as well as string-concatenation in the same statement, you'll need to insert parentheses, as the default associativity is left-to-right: @@ -172,6 +181,9 @@ A few caveats: (error) [ ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -183,4 +195,7 @@ A few caveats:
 GET -- api/check
 [
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-dsl-output-statements.md b/docs/src/reference-dsl-output-statements.md index 0984b1fd5..845bc3949 100644 --- a/docs/src/reference-dsl-output-statements.md +++ b/docs/src/reference-dsl-output-statements.md @@ -102,11 +102,19 @@ purple,triangle,false,7,65,80.1405,5.8240 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 purple,square,false,10,91,72.3735,8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream -
+
 mlr --csv --from example.csv put -q 'tee > $shape.".csv", $*'
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 mlr --csv cat circle.csv
@@ -116,6 +124,9 @@ color,shape,flag,k,index,quantity,rate
 red,circle,true,3,16,13.8103,2.9010
 yellow,circle,true,8,73,63.9785,4.2370
 yellow,circle,true,9,87,63.5058,8.3350
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -127,6 +138,9 @@ red,square,true,2,15,79.2778,0.0130
 red,square,false,4,48,77.5542,7.4670
 red,square,false,6,64,77.1991,9.5310
 purple,square,false,10,91,72.3735,8.2430
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -137,6 +151,9 @@ color,shape,flag,k,index,quantity,rate
 yellow,triangle,true,1,11,43.6498,9.8870
 purple,triangle,false,5,51,81.2290,8.5910
 purple,triangle,false,7,65,80.1405,5.8240
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See also [Redirected-output statements](reference-dsl-output-statements.md#redirected-output-statements) for examples. @@ -384,6 +401,9 @@ id color shape flag k index quantity rate 8 yellow circle true 8 73 63.9785 4.2370 9 yellow circle true 9 87 63.5058 8.3350 10 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
And if you want indexing, redirects, etc., just assign to a temporary variable and use one of the other emit variants: @@ -406,6 +426,9 @@ id color shape flag k index quantity rate 8 yellow circle true 8 73 63.9785 4.2370 9 yellow circle true 9 87 63.5058 8.3350 10 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Emitf statements @@ -422,6 +445,9 @@ Use **emitf** to output several out-of-stream variables side-by-side in the same
 count=5,x_sum=2.26476,y_sum=2.585083
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Emit statements @@ -446,6 +472,9 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624 { "sum": 2.26476 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -453,6 +482,9 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624
 
 sum=2.26476
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If it's indexed then use as many names after `emit` as there are indices: @@ -468,6 +500,9 @@ If it's indexed then use as many names after `emit` as there are indices: "wye": 0.777891 } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -477,6 +512,9 @@ If it's indexed then use as many names after `emit` as there are indices:
 a=pan,sum=0.346791
 a=eks,sum=1.140078
 a=wye,sum=0.777891
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -498,6 +536,9 @@ a=wye,sum=0.777891
     }
   }
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -509,6 +550,9 @@ a=eks,b=pan,sum=0.758679
 a=eks,b=wye,sum=0.381399
 a=wye,b=wye,sum=0.204603
 a=wye,b=pan,sum=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -540,6 +584,9 @@ a=wye,b=pan,sum=0.573288
     }
   }
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -554,6 +601,9 @@ a=eks,b=pan,i=2,sum=0.758679
 a=eks,b=wye,i=4,sum=0.381399
 a=wye,b=wye,i=3,sum=0.204603
 a=wye,b=pan,i=5,sum=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Now for **emitp**: if you have as many names following `emit` as there are levels in the out-of-stream variable's map, then `emit` and `emitp` do the same thing. Where they differ is when you don't specify as many names as there are map levels. In this case, Miller needs to flatten multiple map indices down to output-record keys: `emitp` includes full prefixing (hence the `p` in `emitp`) while `emit` takes the deepest map key as the output-record key: @@ -577,6 +627,9 @@ Now for **emitp**: if you have as many names following `emit` as there are level } } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -586,6 +639,9 @@ Now for **emitp**: if you have as many names following `emit` as there are level
 a=pan,pan=0.346791
 a=eks,pan=0.758679,wye=0.381399
 a=wye,wye=0.204603,pan=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -595,6 +651,9 @@ a=wye,wye=0.204603,pan=0.573288
 pan=0.346791
 pan=0.758679,wye=0.381399
 wye=0.204603,pan=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -604,6 +663,9 @@ wye=0.204603,pan=0.573288
 a=pan,sum.pan=0.346791
 a=eks,sum.pan=0.758679,sum.wye=0.381399
 a=wye,sum.wye=0.204603,sum.pan=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -611,6 +673,9 @@ a=wye,sum.wye=0.204603,sum.pan=0.573288
 
 sum.pan.pan=0.346791,sum.eks.pan=0.758679,sum.eks.wye=0.381399,sum.wye.wye=0.204603,sum.wye.pan=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -622,6 +687,9 @@ sum.eks.pan 0.758679
 sum.eks.wye 0.381399
 sum.wye.wye 0.204603
 sum.wye.pan 0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Use **--flatsep** to specify the character which joins multilevel @@ -634,6 +702,9 @@ keys for `emitp` (it defaults to a colon): a=pan,sum/pan=0.346791 a=eks,sum/pan=0.758679,sum/wye=0.381399 a=wye,sum/wye=0.204603,sum/pan=0.573288 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -641,6 +712,9 @@ a=wye,sum/wye=0.204603,sum/pan=0.573288
 
 sum/pan/pan=0.346791,sum/eks/pan=0.758679,sum/eks/wye=0.381399,sum/wye/wye=0.204603,sum/wye/pan=0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -655,6 +729,9 @@ sum/eks/pan 0.758679
 sum/eks/wye 0.381399
 sum/wye/wye 0.204603
 sum/wye/pan 0.573288
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Multi-emit statements @@ -701,6 +778,9 @@ hat zee 196.3494502965293 385 0.5099985721987774 hat eks 189.0067933716193 389 0.48587864619953547 hat hat 182.8535323148762 381 0.47993053101017374 hat pan 168.5538067327806 363 0.4643355557376876 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream What this does is walk through the first out-of-stream variable (`@x_sum` in this example) as usual, then for each keylist found (e.g. `pan,wye`), include the values for the remaining out-of-stream variables (here, `@x_count` and `@x_mean`). You should use this when all out-of-stream variables in the emit statement have **the same shape and the same keylists**. @@ -723,6 +803,9 @@ eks pan 0.758679 1 eks wye 0.381399 1 wye wye 0.204603 1 wye pan 0.573288 1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -746,6 +829,9 @@ eks pan 1
 eks wye 1
 wye wye 1
 wye pan 1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -762,4 +848,7 @@ eks pan 0.758679 1
 eks wye 0.381399 1
 wye wye 0.204603 1
 wye pan 0.573288 1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-dsl-syntax.md b/docs/src/reference-dsl-syntax.md index 33ed3f4d7..970906b0c 100644 --- a/docs/src/reference-dsl-syntax.md +++ b/docs/src/reference-dsl-syntax.md @@ -35,6 +35,9 @@ i j k 7 8 15 8 9 17 9 10 19 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Newlines within the expression are ignored, which can help increase legibility of complex expressions: @@ -60,6 +63,9 @@ wye eks 10000 0.734806020620654365 0.884788571337605134 5 7 2 2 data/s pan wye 10001 0.870530722602517626 0.009854780514656930 5 8 3 2 data/small2 hat wye 10002 0.321507044286237609 0.568893318795083758 5 9 4 2 data/small2 pan zee 10003 0.272054845593895200 0.425789896597056627 5 10 5 2 data/small2 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -70,6 +76,9 @@ pan zee 10003 0.272054845593895200 0.425789896597056627 5  10 5   2       data/s
 
 x_y_corr
 -0.7479940285189345
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Expressions from files @@ -85,6 +94,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -96,6 +108,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562
 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352
 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655
 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You may, though, find it convenient to put expressions into files for reuse, and read them @@ -120,6 +135,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream If you have some of the logic in a file and you want to write the rest on the command line, you can **use the -f and -e options together**: @@ -142,6 +160,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,xy=0.9209970096813562 a=wye,b=wye,i=3,x=0.204603,y=0.338318,xy=0.3953750836016352 a=eks,b=wye,i=4,x=0.381399,y=0.134188,xy=0.40431623334340655 a=wye,b=pan,i=5,x=0.573288,y=0.863624,xy=1.036583592538489 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream A suggested use-case here is defining functions in files, and calling them from command-line expressions. @@ -168,6 +189,9 @@ Semicolons are optional after closing curly braces (which close conditionals and
 x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -175,6 +199,9 @@ x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
 
 x=1,y=2,3=,4=,5=,6=,7=,8=,9=,10=,foo=bar
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Semicolons are required between statements even if those statements are on separate lines. **Newlines** are for your convenience but have no syntactic meaning: line endings do not terminate statements. For example, adjacent assignment statements must be separated by semicolons even if those statements are on separate lines: @@ -216,6 +243,9 @@ mlr put ' s,t,u,v 3,-1,5,1 9,-1,41,2 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Bodies for all compound statements must be enclosed in **curly braces**, even if the body is a single statement: diff --git a/docs/src/reference-dsl-time.md b/docs/src/reference-dsl-time.md index cd6210ec5..7c6ba69c9 100644 --- a/docs/src/reference-dsl-time.md +++ b/docs/src/reference-dsl-time.md @@ -56,6 +56,9 @@ treating epoch-milliseconds as epoch-seconds.
 2017-07-14T02:40:00Z
 49503-02-10T02:40:00Z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can get the current system time, as epoch-seconds, using the @@ -113,6 +116,9 @@ We also have [sec2gmtdate](reference-dsl-builtin-functions.md#sec2gmtdate) DSL f 1970-01-01 2009-02-13 1930-11-18 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Local times with standard format; specifying timezones @@ -145,6 +151,9 @@ mlr : unknown time zone This/Is/A/Typo
 1970-01-01 02:00:00
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -152,6 +161,9 @@ mlr :  unknown time zone This/Is/A/Typo
 
 1969-12-31 21:00:00
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -175,6 +187,9 @@ mlr :  unknown time zone This/Is/A/Typo
 1969-12-31 21:00:00
 1969-12-31
 946789445
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -196,6 +211,9 @@ mlr :  unknown time zone This/Is/A/Typo
 1969-12-31 21:00:00
 1969-12-31
 946789445
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that for local times, Miller omits the `T` and the `Z` you see in GMT times. @@ -214,6 +232,9 @@ We also have the
 1970-01-01 02:00:00
 1969-12-31T22:00:00Z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -229,6 +250,9 @@ We also have the
 1970-01-01 02:00:00
 1970-01-01T03:00:00Z
 1969-12-31T22:00:00Z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Custom formats: strptime and strftime @@ -322,6 +346,9 @@ Examples:
 1970-01-01T00:00:00Z
 1970-01-01T00:00:00Z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -340,6 +367,9 @@ Examples:
 1970-01-01 00:00:00 +0000
 Thursday, January  1, 1970
 09:33 PM
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Unfortunately, names from `%A` and `%B` are only available in English, as an artifact of a design @@ -376,6 +406,9 @@ For historical reasons, Miller's `strftime` and `strptime` use different format 1970-01-02 10:17:36.789000 (error) 123456.789 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## strptime_local and strftime_local @@ -409,6 +442,9 @@ Wednesday, December 31, 1969 1970-01-01 08:00:00 +0800 Thursday, January 1, 1970 1582992000 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -434,6 +470,9 @@ Wednesday, December 31, 1969
 1970-01-01 08:00:00 +0800
 Thursday, January  1, 1970
 1582992000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Relative times diff --git a/docs/src/reference-dsl-unset-statements.md b/docs/src/reference-dsl-unset-statements.md index d7ced4177..bc0852804 100644 --- a/docs/src/reference-dsl-unset-statements.md +++ b/docs/src/reference-dsl-unset-statements.md @@ -38,6 +38,9 @@ b=pan,i=2,y=0.522151 b=wye,i=3,y=0.338318 b=wye,i=4,y=0.134188 b=pan,i=5,y=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This can also be done, of course, using `mlr cut -x`. You can also clear out-of-stream or local variables, at the base name level, or at an indexed sublevel: @@ -62,6 +65,9 @@ This can also be done, of course, using `mlr cut -x`. You can also clear out-of- } } {} +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -94,6 +100,9 @@ This can also be done, of course, using `mlr cut -x`. You can also clear out-of-
     }
   }
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If you use `unset all` (or `unset @*` which is synonymous), that will unset all out-of-stream variables which have been assigned up to that point. diff --git a/docs/src/reference-dsl-user-defined-functions.md b/docs/src/reference-dsl-user-defined-functions.md index d2be5a162..0a1ac5be6 100644 --- a/docs/src/reference-dsl-user-defined-functions.md +++ b/docs/src/reference-dsl-user-defined-functions.md @@ -45,6 +45,9 @@ eks pan 2 0.758679 0.522151 3.6808304227112796 2 wye wye 3 0.204603 0.338318 1.7412477437471126 6 eks wye 4 0.381399 0.134188 18.588317372151177 24 wye pan 5 0.573288 0.863624 211.38663947090302 120 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Properties of user-defined functions: @@ -99,6 +102,9 @@ NR=4 numcalls=10 NR=5 numcalls=15 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Properties of user-defined subroutines: @@ -179,6 +185,9 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle yellow circle true 8 73 63.9785 4.2370 yellow:circle yellow circle true 9 87 63.5058 8.3350 yellow:circle purple square false 10 91 72.3735 8.2430 purple:square +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -205,6 +214,9 @@ purple triangle false 7  65    80.1405  5.8240 purple:triangle above
 yellow circle   true  8  73    63.9785  4.2370 yellow:circle above
 yellow circle   true  9  87    63.5058  8.3350 yellow:circle above
 purple square   false 10 91    72.3735  8.2430 purple:square above
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that you need a semicolon after the closing curly brace of the function literal. @@ -238,6 +250,9 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle above yellow circle true 8 73 63.9785 4.2370 yellow:circle above yellow circle true 9 87 63.5058 8.3350 yellow:circle above purple square false 10 91 72.3735 8.2430 purple:square above +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See the [page on higher-order functions](reference-dsl-higher-order-functions.md) for more. diff --git a/docs/src/reference-dsl-variables.md b/docs/src/reference-dsl-variables.md index 40c63d8a2..e2144864c 100644 --- a/docs/src/reference-dsl-variables.md +++ b/docs/src/reference-dsl-variables.md @@ -36,15 +36,23 @@ If field names have **special characters** such as `.` then you can use braces, You may also use a **computed field name** in square brackets, e.g. -
+
 echo a=3,b=4 | mlr filter '$["x"] < 0.5'
 
+
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
+
 echo s=green,t=blue,a=3,b=4 | mlr put '$[$s."_".$t] = $a * $b'
 
 s=green,t=blue,a=3,b=4,green_blue=12
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Notes: @@ -74,6 +82,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -85,6 +96,9 @@ a=eks,b=pan,NEW=2,x=0.758679,y=0.522151
 a=wye,b=wye,NEW=3,x=0.204603,y=0.338318
 a=eks,b=wye,NEW=4,x=0.381399,y=0.134188
 a=wye,b=pan,NEW=5,x=0.573288,y=0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -96,6 +110,9 @@ a=eks,b=pan,i=NEW,x=0.758679,y=0.522151
 a=wye,b=wye,i=NEW,x=0.204603,y=0.338318
 a=eks,b=wye,i=NEW,x=0.381399,y=0.134188
 a=wye,b=pan,i=NEW,x=0.573288,y=0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -107,6 +124,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,NEW=b
 a=wye,b=wye,i=3,x=0.204603,y=0.338318,NEW=i
 a=eks,b=wye,i=4,x=0.381399,y=0.134188,NEW=x
 a=wye,b=pan,i=5,x=0.573288,y=0.863624,NEW=y
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -118,6 +138,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,NEW=pan
 a=wye,b=wye,i=3,x=0.204603,y=0.338318,NEW=3
 a=eks,b=wye,i=4,x=0.381399,y=0.134188,NEW=0.381399
 a=wye,b=pan,i=5,x=0.573288,y=0.863624,NEW=0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -129,6 +152,9 @@ a=eks,b=NEW,i=2,x=0.758679,y=0.522151
 a=wye,b=wye,i=NEW,x=0.204603,y=0.338318
 a=eks,b=wye,i=4,x=NEW,y=0.134188
 a=wye,b=pan,i=5,x=0.573288,y=NEW
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Right-hand side accesses to non-existent fields -- i.e. with index less than 1 or greater than `NF` -- return an absent value. Likewise, left-hand side accesses only refer to fields which already exist. For example, if a field has 5 records then assigning the name or value of the 6th (or 600th) field results in a no-op. @@ -142,6 +168,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=wye,i=4,x=0.381399,y=0.134188 a=wye,b=pan,i=5,x=0.573288,y=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -153,6 +182,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151
 a=wye,b=wye,i=3,x=0.204603,y=0.338318
 a=eks,b=wye,i=4,x=0.381399,y=0.134188
 a=wye,b=pan,i=5,x=0.573288,y=0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Out-of-stream variables @@ -170,6 +202,9 @@ You may use a **computed key** in square brackets, e.g.
 green_blue=12
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Out-of-stream variables are **scoped** to the `put` command in which they appear. In particular, if you have two or more `put` commands separated by `then`, each put will have its own set of out-of-stream variables: @@ -192,6 +227,9 @@ a=10,b=2,c=3 a=40,b=5,c=6 sum=5 sum=50 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Out-of-stream variables' **extent** is from the start to the end of the record stream, i.e. every time the `put` or `filter` statement referring to them is executed. @@ -219,6 +257,9 @@ a=wye,x_count=2 a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -228,6 +269,9 @@ a=wye,x_sum=0.777891
 a=pan,x_count=1,x_sum=0.346791
 a=eks,x_count=2,x_sum=1.140078
 a=wye,x_count=2,x_sum=0.777891
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Indices can be arbitrarily deep -- here there are two or more of them: @@ -267,6 +311,9 @@ a=hat,b=zee,x_count=385,x_sum=196.3494502965293 a=hat,b=eks,x_count=389,x_sum=189.0067933716193 a=hat,b=hat,x_count=381,x_sum=182.8535323148762 a=hat,b=pan,x_count=363,x_sum=168.5538067327806 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The idea is that `stats1`, and other Miller verbs, encapsulate frequently-used patterns with a minimum of keystroking (and run a little faster), whereas using out-of-stream variables you have more flexibility and control in what you do. @@ -296,6 +343,9 @@ x=1,y=0,z=0 x=2,y=0.3010299956639812,z=0.5486620049392715 x=3,y=0.4771212547196624,z=0.6907396432228734 num_total=5,num_positive=3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Local variables @@ -333,6 +383,9 @@ i=7,o=13.966128063060479 i=8,o=13.99248245928659 i=9,o=15.784270485515197 i=10,o=15.37686787628025 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Things which are completely unsurprising, resembling many other languages: @@ -424,6 +477,9 @@ inner_d 70 outer_a 10 outer_b 50 outer_c 60 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream And this example demonstrates the type-declaration rules: @@ -494,6 +550,9 @@ a i y 3 wye 3.3831800000000003 4 eks 1.34188 5 wye 8.636239999999999 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Likewise, you can assign map literals to out-of-stream variables or local variables; pass them as arguments to user-defined functions, return them from functions, and so on: @@ -513,6 +572,9 @@ a=eks,x=151.7358 a=wye,x=40.9206 a=eks,x=76.2798 a=wye,x=114.6576 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Like out-of-stream and local variables, map literals can be multi-level: @@ -546,6 +608,9 @@ Like out-of-stream and local variables, map literals can be multi-level: "non-numeric": 10 } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also the [Maps page](reference-main-maps.md). @@ -573,6 +638,9 @@ read/write access to environment variables, e.g. `ENV["HOME"]` or a=eks,b=pan,i=2,x=0.758679,y=0.522151 1=pan,2=pan,3=1,4=0.3467901443380824,5=0.7268028627434533 a=wye,b=eks,i=10000,x=0.734806020620654365,y=0.884788571337605134 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -595,6 +663,9 @@ a=wye,b=eks,i=10000,x=0.734806020620654365,y=0.884788571337605134,fnr=2
 a=pan,b=wye,i=10001,x=0.870530722602517626,y=0.009854780514656930,fnr=3
 a=hat,b=wye,i=10002,x=0.321507044286237609,y=0.568893318795083758,fnr=4
 a=pan,b=zee,i=10003,x=0.272054845593895200,y=0.425789896597056627,fnr=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Their values of `NF`, `NR`, `FNR`, `FILENUM`, and `FILENAME` change from one @@ -613,6 +684,9 @@ Their **scope is global**: you can refer to them in any `filter` or `put` statem a,b,c,nr 1,2,3,1 4,5,6,2 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -626,6 +700,9 @@ a,b,c,nr
 4,5,6,2
 4,5,6,2
 4,5,6,2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The **extent** is for the duration of the put/filter: in a `begin` statement (which executes before the first input record is consumed) you will find `NR=1` and in an `end` statement (which is executed after the last input record is consumed) you will find `NR` to be the total number of records ingested. @@ -839,6 +916,9 @@ Example recursive copy of out-of-stream variables: "count": 5 } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Example of out-of-stream variable assigned to full stream record, where the 2nd record is stashed, and the 4th record is overwritten with that: @@ -852,6 +932,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=wye,i=3,x=0.204603,y=0.338318 a=eks,b=pan,i=2,x=0.758679,y=0.522151 a=wye,b=pan,i=5,x=0.573288,y=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Example of full stream record assigned to an out-of-stream variable, finding the record for which the `x` field has the largest value in the input stream: @@ -876,6 +959,9 @@ a=wye,b=pan,i=5,x=0.573288,y=0.863624
 a   b   i x        y
 eks pan 2 0.758679 0.522151
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Keywords for filter and put diff --git a/docs/src/reference-dsl.md b/docs/src/reference-dsl.md index 46651921a..f9cd47032 100644 --- a/docs/src/reference-dsl.md +++ b/docs/src/reference-dsl.md @@ -39,6 +39,9 @@ Example: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream * Verbs are coded in Go @@ -56,6 +59,9 @@ Example: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream * You get to write your own DSL expressions @@ -120,6 +126,9 @@ apple,37,1 ball,28,2 cat,54,3 end +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The `print` statements for `begin` and `end` went out before the first record @@ -159,6 +168,9 @@ you might retain only the records whose `a` field has value `eks`:
 a=eks,b=pan,i=2,x=0.758679,y=0.522151
 a=eks,b=wye,i=4,x=0.381399,y=0.134188
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
or you might add a new field which is a function of existing fields: @@ -172,6 +184,9 @@ a=eks,b=pan,i=2,x=0.758679,y=0.522151,ab=eks_pan a=wye,b=wye,i=3,x=0.204603,y=0.338318,ab=wye_wye a=eks,b=wye,i=4,x=0.381399,y=0.134188,ab=eks_wye a=wye,b=pan,i=5,x=0.573288,y=0.863624,ab=wye_pan +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Differences between put and filter @@ -206,6 +221,9 @@ purple triangle false 5 51 81.2290 8.5910 high rate red square false 6 64 77.1991 9.5310 high rate purple triangle false 7 65 80.1405 5.8240 low rate purple square false 10 91 72.3735 8.2430 high rate +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -227,6 +245,9 @@ red    square false 6  64    77.1991  9.5310 squ  are
 yellow circle true  8  73    63.9785  4.2370 cir  cle
 yellow circle true  9  87    63.5058  8.3350 cir  cle
 purple square false 10 91    72.3735  8.2430 squ  are
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-main-arrays.md b/docs/src/reference-main-arrays.md index abdc3bb63..840b75a78 100644 --- a/docs/src/reference-main-arrays.md +++ b/docs/src/reference-main-arrays.md @@ -46,6 +46,9 @@ Array literals are written in square brackets braces with integer indices. Array 99, true ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream As with maps and argument-lists, trailing commas are supported: @@ -64,6 +67,9 @@ As with maps and argument-lists, trailing commas are supported:
 ["a", "b", "c"]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Also note that several [built-in functions](reference-dsl-builtin-functions.md) operate on arrays and/or return arrays. @@ -108,6 +114,9 @@ while positive indices read forward from the start. If an array has length `n` t 50 [10, 20] [40, 50] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Slicing @@ -135,6 +144,9 @@ x[4], x[5]]`. [30, 40, 50] [10, 20, 30, 40, 50] [20, 30, 40] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Out-of-bounds indexing @@ -157,6 +169,9 @@ behavior intentionally imitates Python.) 10 50 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -173,6 +188,9 @@ behavior intentionally imitates Python.)
 [10, 20]
 [10, 20, 30, 40, 50]
 []
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Auto-create results in maps @@ -197,6 +215,9 @@ as-yet-assigned local variable or out-of-stream variable results in "square": 8.2430, "circle": 8.3350 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream *This also means that auto-create results in maps, not arrays, even if keys are integers.* @@ -224,6 +245,9 @@ If you want to auto-extend an [array](reference-main-arrays.md), initialize it e "4": 7.4670 } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Auto-extend and null-gaps @@ -262,6 +286,9 @@ are called **null-gaps**.
 ["a", "b"]
 ["a", null, null, null, "e"]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Unset as shift @@ -281,6 +308,9 @@ Unsetting an array index results in shifting all higher-index elements down by o
 ["a", "b", "c", "d", "e"]
 ["a", "c", "d", "e"]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
More generally, you can get shift and pop operations by unsetting indices 1 and -1: diff --git a/docs/src/reference-main-compressed-data.md b/docs/src/reference-main-compressed-data.md index a54ed8026..f188cd942 100644 --- a/docs/src/reference-main-compressed-data.md +++ b/docs/src/reference-main-compressed-data.md @@ -46,6 +46,9 @@ red,square,false,6,64,77.1991,9.5310 yellow,triangle,true,1,11,43.6498,9.8870 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream This will decompress the input data on the fly, while leaving the disk file unmodified. This helps you save disk space, at the cost of some additional runtime CPU usage to decompress the data. @@ -81,6 +84,9 @@ red,square,false,6,64,77.1991,9.5310 yellow,triangle,true,1,11,43.6498,9.8870 yellow,circle,true,8,73,63.9785,4.2370 yellow,circle,true,9,87,63.5058,8.3350 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The benefit of `--prepipe` is that Miller will run the specified program once per diff --git a/docs/src/reference-main-data-types.md b/docs/src/reference-main-data-types.md index 7505acb81..280b1c63b 100644 --- a/docs/src/reference-main-data-types.md +++ b/docs/src/reference-main-data-types.md @@ -76,6 +76,9 @@ Examples: a,b,c 1.2,3,true 4,5.6,buongiorno +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -123,6 +126,9 @@ f  8.9
 tf float
 g  15.9
 tg float
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
On input, string values representable as boolean (e.g. `"true"`, `"false"`) @@ -153,6 +159,9 @@ or the id,blob 100,"{""a"":1,""b"":[2,3,4]}" 105,"{""a"":6,""b"":[7,8,9]}" +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -169,6 +178,9 @@ id,blob
   "blob": "{\"a\":6,\"b\":[7,8,9]}"
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -191,6 +203,9 @@ id,blob
   }
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -213,6 +228,9 @@ id,blob
   }
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
These have their respective operations to convert back to string: the diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md index a71e0aa8e..5a89323a2 100644 --- a/docs/src/reference-main-flag-list.md +++ b/docs/src/reference-main-flag-list.md @@ -33,6 +33,9 @@ Here are flags you can use when invoking Miller. For example, when you type "rate": 9.8870 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream the `--icsv` and `--ojson` bits are _flags_. See the [Miller command @@ -373,6 +376,7 @@ These are flags for profiling Miller performance. **Flags:** * `--cpuprofile {CPU-profile file name}`: Create a CPU-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. +* `--memprofile {Memory-profile file name}`: Create a memory-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. * `--time`: Print elapsed execution time in seconds to stderr at the end of the execution of the program. * `--traceprofile`: Create a trace-profile file for performance analysis. Instructions will be printed to stderr. This flag must be the very first thing after 'mlr' on the command line. diff --git a/docs/src/reference-main-maps.md b/docs/src/reference-main-maps.md index 4d8942d8d..53f0f2b3c 100644 --- a/docs/src/reference-main-maps.md +++ b/docs/src/reference-main-maps.md @@ -48,6 +48,9 @@ _Map literals_ are written in curly braces with string keys any [Miller data typ } true true +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream As with arrays and argument-lists, trailing commas are supported: @@ -70,6 +73,9 @@ As with arrays and argument-lists, trailing commas are supported: "b": 2, "c": 3 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The current record, accessible using `$*`, is a map. @@ -101,6 +107,9 @@ Color is yellow "rate": 0.0130 } Color is red +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The collection of all [out-of-stream variables](reference-dsl-variables.md#out-of-stream0variables), `@*`, is a map. @@ -126,6 +135,9 @@ The collection of all [out-of-stream variables](reference-dsl-variables.md#out-o }, "last_color": "purple" } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Also note that several [built-in functions](reference-dsl-builtin-functions.md) operate on maps and/or return maps. @@ -165,6 +177,9 @@ in **auto-create** of that variable as a map variable: "square": 8.2430, "circle": 8.3350 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream *This also means that auto-create results in maps, not arrays, even if keys are integers.* @@ -192,6 +207,9 @@ If you want to auto-extend an [array](reference-main-arrays.md), initialize it e "4": 7.4670 } } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Auto-deepen @@ -217,6 +235,9 @@ red square 17.011 red circle 2.9010 purple triangle 14.415 purple square 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Looping diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md index 6ddd2518f..32a400081 100644 --- a/docs/src/reference-main-null-data.md +++ b/docs/src/reference-main-null-data.md @@ -69,6 +69,9 @@ a=1,b=8 a=,b=4 x=9,b=10 a=5,b=7 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -80,6 +83,9 @@ a=3,b=2
 a=5,b=7
 a=,b=4
 x=9,b=10
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -91,6 +97,9 @@ a=5,b=7
 a=3,b=2
 a=1,b=8
 x=9,b=10
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
* Functions/operators which have one or more *empty* arguments produce empty output: e.g. @@ -100,6 +109,9 @@ x=9,b=10
 x=2,y=3,a=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -107,6 +119,9 @@ x=2,y=3,a=5
 
 x=,y=3,a=
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -114,6 +129,9 @@ x=,y=3,a=
 
 x=,y=3,a=,b=1.0986122886681096
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
with the exception that the `min` and `max` functions are special: if one argument is non-null, it wins: @@ -123,6 +141,9 @@ with the exception that the `min` and `max` functions are special: if one argume
 x=,y=3,a=3,b=
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
* Functions of *absent* variables (e.g. `mlr put '$y = log10($nonesuch)'`) evaluate to absent, and arithmetic/bitwise/boolean operators with both operands being absent evaluate to absent. Arithmetic operators with one absent operand return the other operand. More specifically, absent values act like zero for addition/subtraction, and one for multiplication: Furthermore, **any expression which evaluates to absent is not stored in the left-hand side of an assignment statement**: @@ -132,6 +153,9 @@ x=,y=3,a=3,b=
 x=2,y=3,b=3,c=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -139,6 +163,9 @@ x=2,y=3,b=3,c=5
 
 x=2,y=3,a=2,b=3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
* Likewise, for assignment to maps, **absent-valued keys or values result in a skipped assignment**. @@ -166,6 +193,9 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -177,6 +207,9 @@ record_count=100,resource=/path/to/file
 resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320
 record_count=150,resource=/path/to/second/file
 resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -188,6 +221,9 @@ record_count=100,resource=/path/to/file,loadmillis=0
 resource=/path/to/second/file,loadsec=0.32,ok=true,loadmillis=320
 record_count=150,resource=/path/to/second/file,loadmillis=0
 resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Arithmetic rules diff --git a/docs/src/reference-main-number-formatting.md b/docs/src/reference-main-number-formatting.md index 627cb1748..b2ef0d381 100644 --- a/docs/src/reference-main-number-formatting.md +++ b/docs/src/reference-main-number-formatting.md @@ -35,6 +35,9 @@ pipe the output to something else, particularly CSV. I use Miller's pretty-print
 x=   3.100,y=   4.300
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -42,6 +45,9 @@ x=   3.100,y=   4.300
 
 x=3.10000000e+00,y=4.30000000e+00
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## The format-values verb @@ -60,6 +66,9 @@ put`. For example:
 x=3.1,y=4.3,z=13.330000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -67,6 +76,9 @@ x=3.1,y=4.3,z=13.330000
 
 x=0xffff,y=0xff,z=00feff01
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Input conversion from hexadecimal is done automatically on fields handled by `mlr put` and `mlr filter` as long as the field value begins with `0x`. To apply output conversion to hexadecimal on a single column, you may use `fmtnum`, or the keystroke-saving [`hexfmt`](reference-dsl-builtin-functions.md#hexfmt) function. Example: @@ -76,6 +88,9 @@ Input conversion from hexadecimal is done automatically on fields handled by `ml
 x=0xffff,y=0xff,z=16711425
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -83,4 +98,7 @@ x=0xffff,y=0xff,z=16711425
 
 x=0xffff,y=0xff,z=0xfeff01
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/reference-main-overview.md b/docs/src/reference-main-overview.md index cc9c3a0b3..2f5b66220 100644 --- a/docs/src/reference-main-overview.md +++ b/docs/src/reference-main-overview.md @@ -34,6 +34,9 @@ For example, reading from a file: color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -43,6 +46,9 @@ yellow triangle true 1 11    43.6498  9.8870
 color  shape    flag k index quantity rate
 red    square   true 2 15    79.2778  0.0130
 yellow triangle true 1 11    43.6498  9.8870
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Reading from standard input: @@ -54,6 +60,9 @@ Reading from standard input: color shape flag k index quantity rate red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The rest of this reference section gives you full information on each of these parts of the command line. @@ -79,6 +88,9 @@ Example of using a verb for data processing: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream * Verbs are coded in Go @@ -96,6 +108,9 @@ Example of doing the same thing using a DSL expression: a=pan,x_sum=0.346791 a=eks,x_sum=1.140078 a=wye,x_sum=0.777891 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream * You get to write your own expressions in Miller's programming language diff --git a/docs/src/reference-main-regular-expressions.md b/docs/src/reference-main-regular-expressions.md index 0df53e059..c4d2efca0 100644 --- a/docs/src/reference-main-regular-expressions.md +++ b/docs/src/reference-main-regular-expressions.md @@ -59,6 +59,9 @@ name=bull,regex=^b[ou]ll$
 name=jane,regex=^j.*e$
 name=bull,regex=^b[ou]ll$
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Regex captures @@ -95,13 +98,11 @@ Regular expressions are those supported by the [Go regexp package](https://pkg.g
 package syntax // import "regexp/syntax"
 
-Package syntax parses regular expressions into parse trees and compiles
-parse trees into programs. Most clients of regular expressions will use the
-facilities of package regexp (such as Compile and Match) instead of this
-package.
+Package syntax parses regular expressions into parse trees and compiles parse
+trees into programs. Most clients of regular expressions will use the facilities
+of package regexp (such as Compile and Match) instead of this package.
 
-
-Syntax
+# Syntax
 
 The regular expression syntax understood by this package when parsing with
 the Perl flag is as follows. Parts of the syntax can be disabled by passing
@@ -141,9 +142,9 @@ Repetitions:
     x{n,}?         n or more x, prefer fewer
     x{n}?          exactly n x
 
-Implementation restriction: The counting forms x{n,m}, x{n,}, and x{n}
-reject forms that create a minimum or maximum repetition count above 1000.
-Unlimited repetitions are not subject to this restriction.
+Implementation restriction: The counting forms x{n,m}, x{n,}, and x{n} reject
+forms that create a minimum or maximum repetition count above 1000. Unlimited
+repetitions are not subject to this restriction.
 
 Grouping:
 
@@ -229,8 +230,7 @@ ASCII character classes:
     [[:word:]]     word characters (== [0-9A-Za-z_])
     [[:xdigit:]]   hex digit (== [0-9A-Fa-f])
 
-Unicode character classes are those in unicode.Categories and
-unicode.Scripts.
+Unicode character classes are those in unicode.Categories and unicode.Scripts.
 
 func IsWordChar(r rune) bool
 type EmptyOp uint8
diff --git a/docs/src/reference-main-separators.md b/docs/src/reference-main-separators.md
index c13241e65..3f63a2f7f 100644
--- a/docs/src/reference-main-separators.md
+++ b/docs/src/reference-main-separators.md
@@ -74,6 +74,9 @@ a=4,b=5,c=6
 
 c:3;a:1;b:2
 c:6;a:4;b:5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -83,6 +86,9 @@ c:6;a:4;b:5
 color,shape,flag,k,index,quantity,rate
 yellow,triangle,true,1,11,43.6498,9.8870
 red,square,true,2,15,79.2778,0.0130
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -92,6 +98,9 @@ red,square,true,2,15,79.2778,0.0130
 color|shape|flag|k|index|quantity|rate
 yellow|triangle|true|1|11|43.6498|9.8870
 red|square|true|2|15|79.2778|0.0130
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If your data has non-default separators and you don't want to change those @@ -112,6 +121,9 @@ a:4;b:5;c:6
 c:3;a:1;b:2
 c:6;a:4;b:5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Multi-character separators @@ -126,6 +138,9 @@ restrictions), IRS must be `\n` and IFS must be a single character.
 c:=3;;;a:=1;;;b:=2
 c:=6;;;a:=4;;;b:=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
If your data has field separators which are one or more consecutive spaces, you @@ -166,6 +181,9 @@ early light what so 2 light 3 what 4 so +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
## Regular-expression separators @@ -255,6 +273,9 @@ their values indicate what you specified at the command line -- so their use is
 a:1;b:2;c:3;d:>>>,|||;<<<
 a:4;b:5;c:6;d:>>>,|||;<<<
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Which separators apply to which file formats diff --git a/docs/src/reference-main-strings.md b/docs/src/reference-main-strings.md index df35284f4..0ca67ad33 100644 --- a/docs/src/reference-main-strings.md +++ b/docs/src/reference-main-strings.md @@ -41,6 +41,9 @@ purple triangle false 7 65 80.1405 5.8240 purple:triangle yellow circle true 8 73 63.9785 4.2370 yellow:circle yellow circle true 9 87 63.5058 8.3350 yellow:circle purple square false 10 91 72.3735 8.2430 purple:square +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Also see the [list of string-related built-in functions](reference-dsl-builtin-functions.md#string-functions). @@ -92,6 +95,9 @@ a e ab de +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Slicing @@ -118,6 +124,9 @@ ab cde abcde bcd +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Out-of-bounds indexing @@ -140,6 +149,9 @@ accesses result in trimming the indices, resulting in a short string or even the a e (error) +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -156,6 +168,9 @@ e
 "ab"
 "abcde"
 ""
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Escape sequences for string literals diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 96953fd8a..2a9b53349 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -28,6 +28,9 @@ yellow triangle true 1 11 43.6498 9.8870 red square true 2 15 79.2778 0.0130 red circle true 3 16 13.8103 2.9010 red square false 4 48 77.5542 7.4670 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream the `sort` and `head` bits are _verbs_. See the [Miller command @@ -79,6 +82,9 @@ Options:
 a=b,c=d,e=f
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -86,6 +92,9 @@ a=b,c=d,e=f
 
 a=b,c=d,e=f,4=g
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## bar @@ -124,6 +133,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -136,6 +148,9 @@ eks pan 2 ******************************.......... ********************.........
 wye wye 3 ********................................ *************...........................
 eks wye 4 ***************......................... *****...................................
 wye pan 5 **********************.................. **********************************......
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -148,6 +163,9 @@ eks pan 2 ***************************************# ************************.....
 wye wye 3 #....................................... #.......................................
 eks wye 4 #....................................... #.......................................
 wye pan 5 **********************************...... ***************************************#
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -160,6 +178,9 @@ eks pan 2 [0.204603]*******************#[0.758679] [0.134188]**********.........
 wye wye 3 [0.204603]#...................[0.758679] [0.134188]*****...............[0.863624]
 eks wye 4 [0.204603]******..............[0.758679] [0.134188]#...................[0.863624]
 wye pan 5 [0.204603]*************.......[0.758679] [0.134188]*******************#[0.863624]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## bootstrap @@ -277,6 +298,9 @@ a,b,c 1,2,3 4,5,6 7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -294,6 +318,9 @@ c 6
 a 7
 b 8
 c 9
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -304,6 +331,9 @@ n,a,b,c
 1,1,2,3
 2,4,5,6
 3,7,8,9
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -316,6 +346,9 @@ eks pan 2 0.758679 0.522151
 wye wye 3 0.204603 0.338318
 eks wye 4 0.381399 0.134188
 wye pan 5 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -328,6 +361,9 @@ n a   b   i x        y
 1 wye wye 3 0.204603 0.338318
 2 eks wye 4 0.381399 0.134188
 2 wye pan 5 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## check @@ -382,6 +418,9 @@ leave off -k as well as -v. " Preference ": " yellow" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -402,6 +441,9 @@ leave off -k as well as -v.
   "Preference": "    yellow"
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -422,6 +464,9 @@ leave off -k as well as -v.
   " Preference  ": "yellow"
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -442,6 +487,9 @@ leave off -k as well as -v.
   "Preference": "yellow"
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Function links: @@ -472,6 +520,9 @@ Options:
 count=10000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -483,6 +534,9 @@ a=eks,count=1965
 a=wye,count=1966
 a=zee,count=2047
 a=hat,count=1941
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -490,6 +544,9 @@ a=hat,count=1941
 
 count=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -501,6 +558,9 @@ b=wye,count=2057
 b=zee,count=1943
 b=eks,count=2008
 b=hat,count=2050
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -508,6 +568,9 @@ b=hat,count=2050
 
 count=5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -539,6 +602,9 @@ a=zee,b=hat,count=409
 a=wye,b=zee,count=385
 a=eks,b=hat,count=417
 a=wye,b=eks,count=386
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## count-distinct @@ -592,6 +658,9 @@ a=wye,b=wye,count=377 a=eks,b=pan,count=371 a=hat,b=pan,count=363 a=eks,b=zee,count=357 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -608,6 +677,9 @@ field=b,value=wye,count=2057
 field=b,value=zee,count=1943
 field=b,value=eks,count=2008
 field=b,value=hat,count=2050
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -639,6 +711,9 @@ a=wye,b=wye,someothername=377
 a=eks,b=pan,someothername=371
 a=hat,b=pan,someothername=363
 a=eks,b=zee,someothername=357
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -646,6 +721,9 @@ a=eks,b=zee,someothername=357
 
 count=25
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## count-similar @@ -688,6 +766,9 @@ zee eks 17 0.29081949506712723 0.054478717073354166 hat zee 18 0.05727869223575699 0.13343527626645157 zee pan 19 0.43144132839222604 0.8442204830496998 eks wye 20 0.38245149780530685 0.4730652428100751 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -715,6 +796,9 @@ zee eks 17 0.29081949506712723 0.054478717073354166 5
 zee pan 19 0.43144132839222604 0.8442204830496998   5
 hat wye 9  0.03144187646093577 0.7495507603507059   2
 hat zee 18 0.05727869223575699 0.13343527626645157  2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -742,6 +826,9 @@ zee wye 8  0.5985540091064224  0.976181385699006    5
 zee pan 12 0.3676141320555616  0.23614420670296965  5
 zee eks 17 0.29081949506712723 0.054478717073354166 5
 zee pan 19 0.43144132839222604 0.8442204830496998   5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## cut @@ -780,6 +867,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -792,6 +882,9 @@ i x        y
 3 0.204603 0.338318
 4 0.381399 0.134188
 5 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -799,6 +892,9 @@ i x        y
 
 a=1,b=2,c=3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -806,6 +902,9 @@ a=1,b=2,c=3
 
 b=2,c=3,a=1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## decimate @@ -864,6 +963,9 @@ a,b,c 1,,3 4,5,6 7,5,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -874,6 +976,9 @@ a,b,c
 1,,3
 4,5,6
 7,,9
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## fill-empty @@ -907,6 +1012,9 @@ a,b,c 1,N/A,3 4,5,6 7,N/A,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -917,6 +1025,9 @@ a,b,c
 1,something,3
 4,5,6
 7,something,9
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## filter @@ -1076,6 +1187,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1088,6 +1202,9 @@ eks pan 2.000000 0.758679 0.522151
 wye wye 3.000000 0.204603 0.338318
 eks wye 4.000000 0.381399 0.134188
 wye pan 5.000000 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1100,6 +1217,9 @@ XeksX XpanX 00000002 7.586790e-01 5.221510e-01
 XwyeX XwyeX 00000003 2.046030e-01 3.383180e-01
 XeksX XwyeX 00000004 3.813990e-01 1.341880e-01
 XwyeX XpanX 00000005 5.732880e-01 8.636240e-01
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1112,6 +1232,9 @@ XeksX XpanX 2.000000e+00 7.586790e-01 5.221510e-01
 XwyeX XwyeX 3.000000e+00 2.046030e-01 3.383180e-01
 XeksX XwyeX 4.000000e+00 3.813990e-01 1.341880e-01
 XwyeX XpanX 5.000000e+00 5.732880e-01 8.636240e-01
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## fraction @@ -1178,6 +1301,9 @@ male blue 2034 0.270083654229186 male purple 12 0.0015934138892577346 male yellow 1192 0.15827911299960165 male orange 448 0.0594874518656221 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Using `-g` we can split those out by gender, or by color: @@ -1199,6 +1325,9 @@ male blue 2034 0.5014792899408284 male purple 12 0.0029585798816568047 male yellow 1192 0.2938856015779093 male orange 448 0.11045364891518737 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1218,6 +1347,9 @@ male   blue   2034 0.8578658793757908
 male   purple 12   0.025
 male   yellow 1192 0.9974895397489539
 male   orange 448  0.9634408602150538
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
We can see, for example, that 70.9% of females have red (on the left) while 94.5% of reds are for females. @@ -1241,6 +1373,9 @@ male blue 2034 27.0083654229186 male purple 12 0.15934138892577346 male yellow 1192 15.827911299960165 male orange 448 5.94874518656221 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Another often-used idiom is to convert from a point distribution to a cumulative distribution, also known as "running sums". Here, you can use `-c`: @@ -1262,6 +1397,9 @@ male blue 2034 78.06400212455186 male purple 12 78.22334351347763 male yellow 1192 94.0512548134378 male orange 448 100 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1281,6 +1419,9 @@ male   blue   2034 59.27021696252466
 male   purple 12   59.56607495069034
 male   yellow 1192 88.95463510848126
 male   orange 448  100
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## gap @@ -1348,6 +1489,9 @@ eks wye 4 0.381399 0.134188 pan pan 1 0.346791 0.726802 wye wye 3 0.204603 0.338318 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1360,6 +1504,9 @@ eks pan 2 0.758679 0.522151
 eks wye 4 0.381399 0.134188
 wye wye 3 0.204603 0.338318
 wye pan 5 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
In this example, since the sort is on field `a`, the first step is to group together all records having the same value for field `a`; the second step is to sort the distinct `a`-field values `pan`, `eks`, and `wye` into `eks`, `pan`, and `wye`; the third step is to print out the record-list for `a=eks`, then the record-list for `a=pan`, then the record-list for `a=wye`. The group-by operation omits the middle sort and just puts like records together, for those times when a sort isn't desired. In particular, the ordering of group-by fields for group-by is the order in which they were encountered in the data stream, which in some cases may be more interesting to you. @@ -1387,6 +1534,9 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1401,6 +1551,9 @@ resource             loadsec ok
 record_count resource
 100          /path/to/file
 150          /path/to/second/file
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## having-fields @@ -1436,6 +1589,9 @@ record_count=100,resource=/path/to/file resource=/path/to/second/file,loadsec=0.32,ok=true record_count=150,resource=/path/to/second/file resource=/some/other/path,loadsec=0.97,ok=false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1447,6 +1603,9 @@ record_count=100,resource=/path/to/file
 resource=/path/to/second/file,loadsec=0.32,ok=true
 record_count=150,resource=/path/to/second/file
 resource=/some/other/path,loadsec=0.97,ok=false
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1456,6 +1615,9 @@ resource=/some/other/path,loadsec=0.97,ok=false
 resource=/path/to/file,loadsec=0.45,ok=true
 resource=/path/to/second/file,loadsec=0.32,ok=true
 resource=/some/other/path,loadsec=0.97,ok=false
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## head @@ -1484,6 +1646,9 @@ pan pan 1 0.3467901443380824 0.7268028627434533 eks pan 2 0.7586799647899636 0.5221511083334797 wye wye 3 0.20460330576630303 0.33831852551664776 eks wye 4 0.38139939387114097 0.13418874328430463 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1496,6 +1661,9 @@ wye wye 3  0.20460330576630303 0.33831852551664776
 eks zee 7  0.6117840605678454  0.1878849191181694
 zee eks 17 0.29081949506712723 0.054478717073354166
 wye hat 24 0.7286126830627567  0.19441962592638418
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## histogram @@ -1535,6 +1703,9 @@ bin_lo bin_hi x_count x2_count x3_count 0.7 0.8 1007 560 420 0.8 0.9 986 571 383 0.9 1 1013 507 341 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1554,6 +1725,9 @@ my_bin_lo my_bin_hi my_x_count my_x2_count my_x3_count
 0.7       0.8       1007       560         420
 0.8       0.9       986        571         383
 0.9       1         1013       507         341
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## join @@ -1629,6 +1803,9 @@ id name 300 carol 400 david 500 edgar +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1656,6 +1833,9 @@ present 200
 present 200
 present 400
 present 300
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1684,6 +1864,9 @@ id  name  status
 200 bob   present
 400 david present
 300 carol present
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Same, but with sorting the input first: @@ -1714,6 +1897,9 @@ id name status 400 david present 400 david missing 400 david present +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Same, but showing only unpaired records: @@ -1729,6 +1915,9 @@ missing 600 id name 500 edgar +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Use prefixing options to disambiguate between otherwise identical non-join field names: @@ -1742,6 +1931,9 @@ a b c 1 4 5 1 2 3 1 4 5 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1753,6 +1945,9 @@ a left_b left_c right_b right_c
 1 4      5      2       3
 1 2      3      4       5
 1 4      5      4       5
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Use zero join columns: @@ -1766,6 +1961,9 @@ left_a left_b left_c right_a right_b right_c 1 4 5 1 2 3 1 2 3 1 4 5 1 4 5 1 4 5 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## json-parse @@ -1851,6 +2049,9 @@ John,23,present Fred,34,present Alice,56,missing Carol,45,present +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1862,6 +2063,9 @@ John,23,present
 Fred,34,present
 Alice,56,missing
 Carol,45,present
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1873,6 +2077,9 @@ John  23  present
 Fred  34  present
 Alice 56  missing
 Carol 45  present
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## latin1-to-utf8 @@ -1933,6 +2140,9 @@ shape count circle 2591 triangle 3372 square 4115 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -1945,6 +2155,9 @@ triangle orange 107
 square   orange 128
 circle   green  287
 circle   purple 289
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1957,6 +2170,9 @@ triangle orange 107
 square   orange 128
 circle   green  287
 circle   purple 289
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -1969,6 +2185,9 @@ triangle orange
 square   orange
 circle   green
 circle   purple
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See also [most-frequent](reference-verbs.md#most-frequent). @@ -2040,6 +2259,9 @@ a_in a_out b_in b_out 436 490 446 195 526 320 963 780 220 888 705 831 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2050,6 +2272,9 @@ a_min a_max a_sum b_min b_max b_sum
 436   490   926   195   446   641
 320   526   846   780   963   1743
 220   888   1108  705   831   1536
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -2060,6 +2285,9 @@ a_in a_out b_in b_out a_sum b_sum
 436  490   446  195   926   641
 526  320   963  780   846   1743
 220  888   705  831   1108  1536
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## most-frequent @@ -2087,6 +2315,9 @@ shape count square 4115 triangle 3372 circle 2591 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2097,8 +2328,11 @@ shape    color  count
 square   red    1874
 triangle red    1560
 circle   red    1207
-square   yellow 589
 square   blue   589
+square   yellow 589
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -2109,8 +2343,11 @@ shape    color  someothername
 square   red    1874
 triangle red    1560
 circle   red    1207
-square   yellow 589
 square   blue   589
+square   yellow 589
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -2121,8 +2358,11 @@ shape    color
 square   red
 triangle red
 circle   red
-square   yellow
 square   blue
+square   yellow
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
See also [least-frequent](reference-verbs.md#least-frequent). @@ -2331,6 +2571,12 @@ a,c,e 1,3,5 2,4,5 3,5,7 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-remove-empty-columns +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Since this verb needs to read all records to see if any of them has a non-empty value for a given field name, it is non-streaming: it will ingest all records before writing any. @@ -2374,6 +2620,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2386,6 +2635,9 @@ eks pan     2     0.758679 0.522151
 wye wye     3     0.204603 0.338318
 eks wye     4     0.381399 0.134188
 wye pan     5     0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
As discussed in [Performance](performance.md), `sed` is significantly faster than Miller at doing this. However, Miller is format-aware, so it knows to do renames only within specified field keys and not any others, nor in field values which may happen to contain the same pattern. Example: @@ -2410,6 +2662,9 @@ a=eks,b=pan,i=2,x=0.758679,COLUMN5=0.522151 a=wye,b=wye,i=3,x=0.204603,COLUMN5=0.338318 a=eks,b=wye,i=4,x=0.381399,COLUMN5=0.134188 a=wye,b=pan,i=5,x=0.573288,COLUMN5=0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also [label](reference-verbs.md#label). @@ -2452,6 +2707,9 @@ eks pan 2 0.758679 0.522151 wye wye 3 0.204603 0.338318 eks wye 4 0.381399 0.134188 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2464,6 +2722,9 @@ i b   a   x        y
 3 wye wye 0.204603 0.338318
 4 wye eks 0.381399 0.134188
 5 pan wye 0.573288 0.863624
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -2476,6 +2737,9 @@ eks 0.758679 0.522151 2 pan
 wye 0.204603 0.338318 3 wye
 eks 0.381399 0.134188 4 wye
 wye 0.573288 0.863624 5 pan
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## repeat @@ -2540,6 +2804,9 @@ color=red color=green color=green color=green +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream After expansion with `repeat`, such data can then be sent on to @@ -2755,6 +3022,9 @@ i=7 i=8 i=9 i=10 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2767,6 +3037,9 @@ i=28
 i=32
 i=36
 i=40
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -2779,6 +3052,9 @@ i=32
 i=28
 i=24
 i=20
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## shuffle @@ -2826,6 +3102,9 @@ a,b,c 1,2,3 4,,6 ,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## sort @@ -2871,6 +3150,9 @@ eks wye 4 0.381399 0.134188 pan pan 1 0.346791 0.726802 wye pan 5 0.573288 0.863624 wye wye 3 0.204603 0.338318 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Here's an example filtering log data: suppose multiple threads (labeled here by color) are all logging progress counts to a single log file. The log file is (by nature) chronological, so the progress of various threads is interleaved: @@ -2920,6 +3202,9 @@ upsec color count 1.587 red 3782 1.601 red 3755 1.832 red 3717 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Any records not having all specified sort keys will appear at the end of the output, in the order they @@ -2933,6 +3218,9 @@ x=1 x=2 x=4 a=3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -2943,6 +3231,9 @@ x=4
 x=2
 x=1
 a=3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## sort-within-records @@ -2991,6 +3282,9 @@ b a c c b a 7 8 9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3014,6 +3308,9 @@ c b a
   "c": 7
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3024,6 +3321,9 @@ a b c
 1 2 3
 5 4 6
 9 8 7
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## split @@ -3165,6 +3465,9 @@ y_p50 0.5060212582772865 y_mean 0.5062057444929905 y_p90 0.9053657573378745 y_max 0.9999648102177897 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3177,6 +3480,9 @@ hat 0.4878988625336502 0.5131176341556505
 pan 0.4973036405471583 0.49959885012092725
 wye 0.4975928392133964 0.5045964890907357
 zee 0.5042419022900586 0.5029967546798116
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3192,6 +3498,9 @@ purple 0.501319 0.988893 0.504571 0.988287 1.9725823278192132 1.9586678584381585
 green  0.502015 0.990764 0.505359 0.990175 1.9735744947860123 1.9593496900223406
 blue   0.525226 0.992655 0.485170 0.993873 1.8899578467174132 2.048504647855391
 orange 0.483548 0.993635 0.480913 0.989102 2.054883899840347  2.056717119312641
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3202,6 +3511,9 @@ shape    count
 square   4115
 triangle 3372
 circle   2591
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3212,6 +3524,9 @@ shape    color_mode
 triangle red
 square   red
 circle   red
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## stats2 @@ -3265,6 +3580,9 @@ x2_xy_cov 0.04188382281779374 x2_xy_corr 0.630174342037994 x2_y2_cov -0.00030953725962542085 x2_y2_corr -0.0034249088761121966 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3279,6 +3597,9 @@ eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587
 wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
 zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
 hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Here's an example simple line-fit. The `x` and `y` @@ -3369,6 +3690,9 @@ upsec_count_pca_b 979.9883413064914 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 donesec 25.10852919630297 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## step @@ -3557,7 +3881,7 @@ All summarizers: uof upper outer fence: p75 + 3.0 * iqr Default summarizers: - field_type count mean min median max null_count distinct_count + field_type count mean min max null_count distinct_count Notes: * min, p25, median, p75, and max work for strings as well as numbers @@ -3575,12 +3899,18 @@ Options: mlr --ofmt %.3f --from data/medium --opprint summary
-field_name field_type count null_count distinct_count mean     min   median max
-a          string     10000 0          5              -        eks   pan    zee
-b          string     10000 0          5              -        eks   pan    zee
-i          int        10000 0          10000          5000.500 1     5001   10000
-x          float      10000 0          10000          0.499    0.000 0.501  1.000
-y          float      10000 0          10000          0.506    0.000 0.506  1.000
+field_name field_type count null_count distinct_count mean     min   max
+a          string     10000 0          5              -        eks   zee
+b          string     10000 0          5              -        eks   zee
+i          int        10000 0          10000          5000.500 1     10000
+x          float      10000 0          10000          0.499    0.000 1.000
+y          float      10000 0          10000          0.506    0.000 1.000
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-summary
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3600,7 +3930,7 @@ var            -      -      8334166.666666667  0.08426974433144456    0.0846112
 skewness       -      -      0                  -0.0006899591185521965 -0.017849760120133784
 minlen         3      3      1                  15                     13
 maxlen         3      3      5                  22                     22
-min            eks    eks    1                  4.509679127584487e-05  8.818962627266114e-05
+min            eks    eks    1                  0.00004509679127584487 0.00008818962627266114
 p25            hat    hat    2501               0.24667037823231752    0.25213670524015686
 median         pan    pan    5001               0.5011592202840128     0.5060212582772865
 p75            wye    wye    7501               0.7481860062358446     0.7640028449996572
@@ -3610,6 +3940,12 @@ lof            -      -      -12499             -1.2578765057782637    -1.283461
 lif            -      -      -4999              -0.5056030637729731    -0.5156625043990937
 uif            -      -      10001              0.9989438202376082     1.0199359148794074
 uof            -      -      17501              1.751217262242899      1.787735124518658
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-summary
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3620,6 +3956,12 @@ field_name a   b   i      x                   y
 mode       pan wye 1      0.3467901443380824  0.7268028627434533
 mean       -   -   5000.5 0.49860196816795804 0.5062057444929905
 median     pan pan 5001   0.5011592202840128  0.5060212582772865
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-summary
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## tac @@ -3643,6 +3985,9 @@ Prints the records in the input stream in reverse order. Note: this requires Mil a b c 1 2 3 4 5 6 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3651,6 +3996,9 @@ a b c
 
 a b c
 7 8 9
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3661,6 +4009,9 @@ a b c
 7 8 9
 4 5 6
 1 2 3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3671,6 +4022,9 @@ a b c filename
 7 8 9 data/b.csv
 4 5 6 data/a.csv
 1 2 3 data/a.csv
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## tail @@ -3698,6 +4052,9 @@ blue square 1 499872 0.618906 0.263796 0.531147 6.210738 blue triangle 0 499880 0.008111 0.826727 0.473296 6.146957 yellow triangle 0 499955 0.383942 0.559529 0.511376 4.307974 yellow circle 1 499974 0.764951 0.252842 0.499699 5.013810 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3708,6 +4065,9 @@ color  shape    flag i      u        v        w        x
 yellow triangle 0    499955 0.383942 0.559529 0.511376 4.307974
 blue   square   1    499872 0.618906 0.263796 0.531147 6.210738
 yellow circle   1    499974 0.764951 0.252842 0.499699 5.013810
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## tee @@ -3790,6 +4150,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3798,6 +4161,9 @@ purple square   false 10 91    72.3735  8.2430
 
 top_idx quantity_top
 1       81.2290
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3808,6 +4174,9 @@ shape    top_idx quantity_top
 triangle 1       81.2290
 square   1       79.2778
 circle   1       63.9785
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3818,6 +4187,9 @@ shape    someothername quantity_top
 triangle 1             81.2290
 square   1             79.2778
 circle   1             63.9785
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3828,6 +4200,9 @@ color  shape    flag  k index quantity rate
 purple triangle false 5 51    81.2290  8.5910
 red    square   true  2 15    79.2778  0.0130
 yellow circle   true  8 73    63.9785  4.2370
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3838,6 +4213,9 @@ color  shape    flag  k index quantity rate
 yellow circle   true  8 73    63.9785  4.2370
 red    square   true  2 15    79.2778  0.0130
 purple triangle false 5 51    81.2290  8.5910
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## unflatten @@ -3908,6 +4286,9 @@ green,circle orange,triangle orange,square orange,circle +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -3933,6 +4314,9 @@ red    triangle 1560
 yellow circle   356
 yellow square   589
 yellow triangle 468
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3960,6 +4344,9 @@ green  circle   287
 orange square   128
 orange triangle 107
 orange circle   68
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -3968,6 +4355,9 @@ orange circle   68
 
 count
 18
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The second main way to use `mlr uniq` is without group-by columns, using `-a` instead: @@ -4054,6 +4444,9 @@ red circle 1 purple square 0 red square 1 yellow triangle 1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -4062,6 +4455,9 @@ yellow triangle 1
 
 count
 7
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -4076,6 +4472,9 @@ count color  shape    flag
 7     purple square   0
 3     red    square   1
 2     yellow triangle 1
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## unsparsify @@ -4149,6 +4548,9 @@ Examples: "w": 2 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -4160,6 +4562,9 @@ a b v u x w
 - 2 - 1 - -
 1 - 2 - 3 -
 - - 1 - - 2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -4171,6 +4576,9 @@ a       b       v       u       x       w
 missing 2       missing 1       missing missing
 1       missing 2       missing 3       missing
 missing missing 1       missing missing 2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -4188,6 +4596,9 @@ a v x b u
 
 v w a b u
 1 2 - - -
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -4199,5 +4610,8 @@ a b v u w x
 - 2 - 1 - -
 1 - 2 - - 3
 - - 1 - 2 -
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/repl.md b/docs/src/repl.md index 71de28b33..4301af343 100644 --- a/docs/src/repl.md +++ b/docs/src/repl.md @@ -91,6 +91,9 @@ HELLO }GOODBYE ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Using Miller with the REPL diff --git a/docs/src/scripting.md b/docs/src/scripting.md index 29cac3fb7..cdb019313 100644 --- a/docs/src/scripting.md +++ b/docs/src/scripting.md @@ -30,6 +30,9 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Typing this out can get a bit old, if the only thing that changes for you is the filename. Some options include: @@ -72,6 +75,9 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -82,6 +88,9 @@ shape    count count_fraction
 triangle 3     0.3
 square   4     0.4
 circle   3     0.3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -105,6 +114,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -123,6 +135,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
etc. @@ -160,6 +175,9 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -170,6 +188,9 @@ shape    count count_fraction
 triangle 3     0.3
 square   4     0.4
 circle   3     0.3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -193,6 +214,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -211,6 +235,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Miller scripts on Windows @@ -247,6 +274,9 @@ shape count count_fraction triangle 3 0.3 square 4 0.4 circle 3 0.3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -270,6 +300,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -288,6 +321,9 @@ circle   3     0.3
   "count_fraction": 0.3
 }
 ]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
and so on. See also [Miller on Windows](miller-on-windows.md). diff --git a/docs/src/shapes-of-data.md b/docs/src/shapes-of-data.md index bab58b7f0..eac46a7c2 100644 --- a/docs/src/shapes-of-data.md +++ b/docs/src/shapes-of-data.md @@ -118,6 +118,9 @@ Miller records are ordered lists of key-value pairs. For NIDX format, DKVP forma
 1=x,2=y,3=z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -125,6 +128,9 @@ Miller records are ordered lists of key-value pairs. For NIDX format, DKVP forma
 
 1=x,2=y,3=z,6=a,4=b,55=cde
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -132,6 +138,9 @@ Miller records are ordered lists of key-value pairs. For NIDX format, DKVP forma
 
 x,y,z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -140,6 +149,9 @@ x,y,z
 
 1,2,3
 x,y,z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -147,6 +159,9 @@ x,y,z
 
 1=x,999=y,3=z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -154,6 +169,9 @@ x,y,z
 
 1=x,newname=y,3=z
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -162,6 +180,9 @@ x,y,z
 
 3,1,2
 z,x,y
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Why doesn't mlr cut put fields in the order I want? @@ -200,6 +221,9 @@ triangle,false,5.8240 circle,true,4.2370 circle,true,8.3350 square,false,8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
The issue is that Miller's `cut`, by default, outputs cut fields in the order they appear in the input data. This design decision was made intentionally to parallel the Unix/Linux system `cut` command, which has the same semantics. @@ -221,6 +245,9 @@ rate,shape,flag 4.2370,circle,true 8.3350,circle,true 8.2430,square,false +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
## Numbering and renumbering records @@ -259,6 +286,9 @@ purple,triangle,false,7,65,80.1405,5.8240,7 yellow,circle,true,8,73,63.9785,4.2370,8 yellow,circle,true,9,87,63.5058,8.3350,9 purple,square,false,10,91,72.3735,8.2430,10 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream However, this is the record number within the original input stream -- not after any filtering you may have done: @@ -271,6 +301,9 @@ color,shape,flag,k,index,quantity,rate,nr yellow,triangle,true,1,11,43.6498,9.8870,1 yellow,circle,true,8,73,63.9785,4.2370,8 yellow,circle,true,9,87,63.5058,8.3350,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream There are two good options here. One is to use the `cat` verb with `-n`: @@ -283,6 +316,9 @@ n,color,shape,flag,k,index,quantity,rate 1,yellow,triangle,true,1,11,43.6498,9.8870 2,yellow,circle,true,8,73,63.9785,4.2370 3,yellow,circle,true,9,87,63.5058,8.3350 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The other is to keep your own counter within the `put` DSL: @@ -295,6 +331,9 @@ color,shape,flag,k,index,quantity,rate,n yellow,triangle,true,1,11,43.6498,9.8870,1 yellow,circle,true,8,73,63.9785,4.2370,2 yellow,circle,true,9,87,63.5058,8.3350,3 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The difference is a matter of taste (although `mlr cat -n` puts the counter first). @@ -383,6 +422,9 @@ outer=2,middle=21,inner1=210,inner2=211 outer=3,middle=30,inner1=300,inner2=301 outer=3,middle=31,inner1=312,inner2=301 outer=3,middle=31,inner1=313,inner2=314 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream See also the [record-heterogeneity page](record-heterogeneity.md); see in diff --git a/docs/src/shell-commands.md b/docs/src/shell-commands.md index e22713a90..a15ebe13f 100644 --- a/docs/src/shell-commands.md +++ b/docs/src/shell-commands.md @@ -30,6 +30,9 @@ eks pan 2 0.758679 0.522151 hello world wye wye 3 0.204603 0.338318 hello world eks wye 4 0.381399 0.134188 hello world wye pan 5 0.573288 0.863624 hello world +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -42,6 +45,9 @@ eks pan 2 0.758679 0.522151 {2}
 wye wye 3 0.204603 0.338318 {3}
 eks wye 4 0.381399 0.134188 {4}
 wye pan 5 0.573288 0.863624 {5}
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -54,6 +60,9 @@ eks pan 2 0.758679 0.522151 585d25a8ff04840f77779eeff61167dc
 wye wye 3 0.204603 0.338318 fb6361a373147c163e65ada94719fa16
 eks wye 4 0.381399 0.134188 585d25a8ff04840f77779eeff61167dc
 wye pan 5 0.573288 0.863624 fb6361a373147c163e65ada94719fa16
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Note that running a subprocess on every record takes a non-trivial amount of time. Comparing asking the system `date` command for the current time in nanoseconds versus computing it in process: diff --git a/docs/src/sorting.md b/docs/src/sorting.md index 68e1f4a02..c5e8537e7 100644 --- a/docs/src/sorting.md +++ b/docs/src/sorting.md @@ -49,6 +49,9 @@ purple triangle false 7 65 80.1405 5.8240 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 purple square false 10 91 72.3735 8.2430 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Sorted numerically ascending by rate: @@ -68,6 +71,9 @@ yellow circle true 9 87 63.5058 8.3350 purple triangle false 5 51 81.2290 8.5910 red square false 6 64 77.1991 9.5310 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Sorted lexically ascending by color; then, within each color, numerically descending by quantity: @@ -87,6 +93,9 @@ red circle true 3 16 13.8103 2.9010 yellow circle true 8 73 63.9785 4.2370 yellow circle true 9 87 63.5058 8.3350 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Example of natural sort, adapted from [https://github.com/facette/natsort](https://github.com/facette/natsort): @@ -123,6 +132,9 @@ n name 25 Xiph Xlater 40 26 Allegia 6R Clasteron 27 Callisto Morphamax 5000 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -157,6 +169,9 @@ n  name
 3  Xiph Xlater 58
 21 Xiph Xlater 300
 14 Xiph Xlater 500
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Sorting fields within records: the sort-within-records verb @@ -200,6 +215,9 @@ b a c c b a 7 8 9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -210,6 +228,9 @@ a b c
 1 2 3
 5 4 6
 9 8 7
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## The sort function by example @@ -228,6 +249,9 @@ a b c
 [1, 2, 3, 4, 5]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -240,6 +264,9 @@ a b c
 
 [5, 4, 3, 2, 1]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -252,6 +279,9 @@ a b c
 
 [1, 2, 3, 4, 5]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -264,6 +294,9 @@ a b c
 
 [5, 4, 3, 2, 1]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -280,6 +313,9 @@ a b c
   "b": 1,
   "c": 2
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -296,6 +332,9 @@ a b c
   "b": 1,
   "a": 3
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -319,6 +358,9 @@ a b c
   "c": 2,
   "a": 3
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -342,6 +384,9 @@ a b c
   "c": 2,
   "b": 1
 }
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -354,6 +399,9 @@ a b c
 
 ["a1", "a2", "a10", "a20", "a100", "a200"]
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
In the rest of this page we'll look more closely at these variants. @@ -397,6 +445,9 @@ key values alpha 1;4;5;6 beta 7;8;9;9 gamma 1;2;11;12 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Use the `"r"` flag for reverse, which is numerical descending: @@ -413,6 +464,9 @@ key values alpha 6;5;4;1 beta 9;9;8;7 gamma 12;11;2;1 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Use the `"f"` flag for lexical ascending sort (and `"fr"` would lexical descending): @@ -429,6 +483,9 @@ key values alpha 1;4;5;6 beta 7;8;9;9 gamma 1;11;12;2 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Without and with case-folding: @@ -457,6 +514,9 @@ alpha,cat;bat;Australia;Bavaria;apple;Colombia key values alpha Australia;Bavaria;Colombia;apple;bat;cat alpha apple;Australia;bat;Bavaria;cat;Colombia +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Simple sorting of maps within records @@ -528,6 +588,9 @@ Also note that, unlike the `sort-within-record` verb with its `-r` flag, } } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Simple sorting of maps across records @@ -570,6 +633,9 @@ red square false 6 64 77.1991 9.5310 6 purple triangle false 7 65 80.1405 5.8240 7 yellow circle true 8 73 63.9785 4.2370 8 yellow circle true 9 87 63.5058 8.3350 9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Custom sorting of arrays within records @@ -638,6 +704,9 @@ recapitulate (for reference) what `sort` with default flags already does; the th "even_then_odd": [2, 4, 6, 8, 10, 1, 3, 5, 7, 9] } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Custom sorting of arrays across records @@ -691,6 +760,9 @@ red square true 2 15 79.2778 0.0130 purple triangle false 7 65 80.1405 5.8240 purple triangle false 5 51 81.2290 8.5910 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Custom sorting of maps within records @@ -754,6 +826,9 @@ For example, we can sort ascending or descending by map key or map value: "b": 2, "c": 1 } +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Custom sorting of maps across records @@ -796,4 +871,7 @@ red square false 4 48 77.5542 7.4670 red circle true 3 16 13.8103 2.9010 red square true 2 15 79.2778 0.0130 yellow triangle true 1 11 43.6498 9.8870 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/special-symbols-and-formatting.md b/docs/src/special-symbols-and-formatting.md index c12fc01bf..97cd771ca 100644 --- a/docs/src/special-symbols-and-formatting.md +++ b/docs/src/special-symbols-and-formatting.md @@ -45,6 +45,9 @@ Likewise [JSON](file-formats.md#json): "Role": "tester" } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream For Miller's [XTAB](file-formats.md#xtab-vertical-tabular) there is no escaping for carriage returns, but commas work fine: @@ -58,6 +61,9 @@ Role administrator Name Khavari, Darius Role tester +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream But for [key-value-pairs](file-formats.md#dkvp-key-value-pairs) and [index-numbered](file-formats.md#nidx-index-numbered-toolkit-style) formats, commas are the default field separator. And -- as of Miller 5.4.0 anyway -- there is no CSV-style double-quote-handling like there is for CSV. So commas within the data look like delimiters: @@ -68,6 +74,9 @@ But for [key-value-pairs](file-formats.md#dkvp-key-value-pairs) and [index-numbe
 Name=Xiao, Lin,Role=administrator
 Name=Khavari, Darius,Role=tester
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
One solution is to use a different delimiter, such as a pipe character: @@ -78,6 +87,9 @@ One solution is to use a different delimiter, such as a pipe character:
 Name=Xiao, Lin|Role=administrator
 Name=Khavari, Darius|Role=tester
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
To be extra-sure to avoid data/delimiter clashes, you can also use control @@ -89,6 +101,9 @@ characters as delimiters -- here, control-A:
 Name=Xiao, Lin^ARole=administrator
 Name=Khavari, Darius^ARole=tester
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## How can I handle field names with special symbols in them? @@ -100,6 +115,9 @@ Simply surround the field names with curly braces:
 x.a=3,y:b=4,z/c=5,product.all=60
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## How can I put single quotes into strings? @@ -115,6 +133,9 @@ $a = "It's OK, I said, then 'for now'."
 a=It's OK, I said, then 'for now'.
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
So: Miller's DSL uses double quotes for strings, and you can put single quotes (or backslash-escaped double-quotes) inside strings, no problem. @@ -126,6 +147,9 @@ Without putting the update expression in a file, it's messier:
 a=It's OK, I said, 'for now'.
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The idea is that the outermost single-quotes are to protect the `put` expression from the shell, and the double quotes within them are for Miller. To get a single quote in the middle there, you need to actually put it *outside* the single-quoting for the shell. The pieces are the following, all concatenated together: @@ -155,6 +179,9 @@ a=is it?,b=it is! a is it? b it is! c is it ... +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
 mlr --oxtab put '$c = ssub($a, "?"," ...")' data/question.dat
@@ -163,6 +190,9 @@ c is it ...
 a is it?
 b it is!
 c is it ...
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
The @@ -186,6 +216,9 @@ The `ssub` and `gssub` functions are also handy for dealing with non-UTF-8 strin
 Kaðlín og Þormundr
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
More generally, though, we have the DSL functions @@ -219,4 +252,7 @@ See also the [page on regular expressions](reference-main-regular-expressions.md
 a=14°45',degrees=14.75
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/statistics-examples.md b/docs/src/statistics-examples.md index b1b7ea7b3..77252f9e9 100644 --- a/docs/src/statistics-examples.md +++ b/docs/src/statistics-examples.md @@ -29,6 +29,9 @@ For one or more specified field names, simply compute p25 and p75, then write th x_p25 0.24667037823231752 x_p75 0.7481860062358446 x_iqr 0.5015156280035271 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream For wildcarded field names, first compute p25 and p75, then loop over field names with `p25` in them: @@ -52,6 +55,9 @@ y_p75 0.7640028449996572 i_iqr 5000 x_iqr 0.5015156280035271 y_iqr 0.5118661397595003 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Computing weighted means @@ -90,4 +96,7 @@ a=eks,wmean=4890.3815931472145,mean=4956.2900763358775 a=wye,wmean=4946.987746229947,mean=4920.001017293998 a=zee,wmean=5164.719684856538,mean=5123.092330239375 a=hat,wmean=4925.533162478552,mean=4967.743946419371 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e..4c80d25c0 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -50,6 +50,9 @@ you can simply do
 x_sum 4986.019681679581
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
or @@ -64,6 +67,9 @@ wye 1023.5484702619565 zee 979.7420161495838 eks 1016.7728571314786 hat 1000.192668193983 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream rather than the more tedious @@ -78,6 +84,9 @@ rather than the more tedious
 x_sum 4986.019681679581
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
or @@ -97,6 +106,9 @@ wye 1023.5484702619565 zee 979.7420161495838 eks 1016.7728571314786 hat 1000.192668193983 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream The former (`mlr stats1` et al.) has the advantages of being easier to type, being less error-prone to type, and running faster. @@ -143,6 +155,9 @@ NR x x_pct 3 0.204603 0 4 0.381399 31.90825807289974 5 0.573288 66.54051068806446 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Line-number ratios @@ -170,6 +185,9 @@ I N PCT a b i x y 3 5 60 wye wye 3 0.204603 0.338318 4 5 80 eks wye 4 0.381399 0.134188 5 5 100 wye pan 5 0.573288 0.863624 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Records having max value @@ -212,6 +230,9 @@ blue purple 2 0.208785 purple purple 1 0.455077 red purple 4 0.477187 blue red 4 0.007487 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream Of course, the largest value of `n` isn't known until after all data have been read. Using an [out-of-stream variable](reference-dsl-variables.md#out-of-stream-variables) we can [retain all records as they are read](operating-on-all-records.md), then filter them at the end: @@ -251,6 +272,9 @@ purple red 5 0.454779 orange blue 5 0.705700 purple red 5 0.072936 green purple 5 0.203577 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream ## Feature-counting @@ -349,6 +373,9 @@ Then "key_fraction": 0.08333333333333333 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -373,6 +400,9 @@ latency 0.5833333333333334
 name    0.3333333333333333
 uid     0.25
 uid2    0.08333333333333333
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Unsparsing @@ -465,6 +495,9 @@ end { "w": 2 } ] +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -476,6 +509,9 @@ a,b,v,u,x,w
 ,2,,1,,
 1,,2,,3,
 ,,1,,,2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -487,6 +523,9 @@ a b v u x w
 - 2 - 1 - -
 1 - 2 - 3 -
 - - 1 - - 2
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Mean without/with oosvars @@ -497,6 +536,9 @@ a b v u x w
 x_mean
 0.49860196816795804
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -512,6 +554,9 @@ x_mean
 
 x_mean
 0.49860196816795804
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Keyed mean without/with oosvars @@ -546,6 +591,9 @@ zee hat 0.46772617655014515 wye zee 0.5059066170573692 eks hat 0.5006790659966355 wye eks 0.5306035254809106 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -587,6 +635,9 @@ hat zee 0.5099985721987774
 hat eks 0.48587864619953547
 hat hat 0.47993053101017374
 hat pan 0.4643355557376876
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Variance and standard deviation without/with oosvars @@ -600,6 +651,9 @@ x_sum 4986.019681679581 x_mean 0.49860196816795804 x_var 0.08426974433144456 x_stddev 0.2902925151144007 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -627,6 +681,9 @@ sumx2  3328.652400179729
 mean   0.49860196816795804
 var    0.08426974433144456
 stddev 0.2902925151144007
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
You can also do this keyed, of course, imitating the keyed-mean example above. @@ -639,6 +696,9 @@ You can also do this keyed, of course, imitating the keyed-mean example above.
 x_min 0.00004509679127584487
 x_max 0.999952670371898
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
@@ -651,6 +711,9 @@ x_max 0.999952670371898
 
 x_min 0.00004509679127584487
 x_max 0.999952670371898
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Keyed min/max without/with oosvars @@ -665,6 +728,9 @@ eks 0.0006917972627396018 0.9988110946859143 wye 0.0001874794831505655 0.9998228522652893 zee 0.0005486114815762555 0.9994904324789629 hat 0.00004509679127584487 0.999952670371898 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -683,6 +749,9 @@ eks 0.0006917972627396018  0.9988110946859143
 wye 0.0001874794831505655  0.9998228522652893
 zee 0.0005486114815762555  0.9994904324789629
 hat 0.00004509679127584487 0.999952670371898
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Delta without/with oosvars @@ -697,6 +766,9 @@ eks pan 2 0.758679 0.522151 0.411888 wye wye 3 0.204603 0.338318 -0.554076 eks wye 4 0.381399 0.134188 0.17679599999999998 wye pan 5 0.573288 0.863624 0.19188900000000003 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -712,6 +784,9 @@ eks pan 2 0.758679 0.522151 0.411888
 wye wye 3 0.204603 0.338318 -0.554076
 eks wye 4 0.381399 0.134188 0.17679599999999998
 wye pan 5 0.573288 0.863624 0.19188900000000003
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Keyed delta without/with oosvars @@ -726,6 +801,9 @@ eks pan 2 0.758679 0.522151 0 wye wye 3 0.204603 0.338318 0 eks wye 4 0.381399 0.134188 -0.37728 wye pan 5 0.573288 0.863624 0.36868500000000004 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -741,6 +819,9 @@ eks pan 2 0.758679 0.522151 0
 wye wye 3 0.204603 0.338318 0
 eks wye 4 0.381399 0.134188 -0.37728
 wye pan 5 0.573288 0.863624 0.36868500000000004
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
## Exponentially weighted moving averages without/with oosvars @@ -755,6 +836,9 @@ eks pan 2 0.758679 0.522151 0.3879798 wye wye 3 0.204603 0.338318 0.36964211999999996 eks wye 4 0.381399 0.134188 0.37081780799999997 wye pan 5 0.573288 0.863624 0.3910648272 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -771,4 +855,7 @@ eks pan 2 0.758679 0.522151 0.3879798
 wye wye 3 0.204603 0.338318 0.36964211999999996
 eks wye 4 0.381399 0.134188 0.37081780799999997
 wye pan 5 0.573288 0.863624 0.3910648272
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
diff --git a/docs/src/unix-toolkit-context.md b/docs/src/unix-toolkit-context.md index 1687f4868..59591d6d8 100644 --- a/docs/src/unix-toolkit-context.md +++ b/docs/src/unix-toolkit-context.md @@ -47,6 +47,9 @@ a,b,c 1,2,3 4,5,6 7,8,9 +Memory profile started. +Memory profile finished. +go tool pprof -http=:8080 foo-stream
@@ -57,6 +60,9 @@ a,b,c
 7,8,9
 4,5,6
 1,2,3
+Memory profile started.
+Memory profile finished.
+go tool pprof -http=:8080 foo-stream
 
Likewise with `mlr sort`, `mlr tac`, and so on.