From 7fa862fbdcecce6fc31cfbf30415ddcdff510387 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 17 Jan 2021 22:55:40 -0500 Subject: [PATCH] output-handler iterate --- ...-user-defined-functions-and-subroutines.sh | 5 +- .../expected/case-dsl-split-join-more.sh.out | 628 +++++++++--------- ...r-defined-functions-and-subroutines.sh.out | 121 ++++ go/src/miller/dsl/ast_types.go | 24 +- go/src/miller/dsl/cst/dump.go | 235 +++++-- go/src/miller/dsl/cst/leaves.go | 8 +- go/src/miller/dsl/cst/output-handlers.go | 25 +- go/src/miller/dsl/cst/print.go | 345 ++++++---- go/src/miller/dsl/cst/tee.go | 176 ++++- go/src/miller/lib/halfpipe.go | 43 ++ go/src/miller/parsing/mlr.bnf | 2 + go/todo.txt | 50 +- 12 files changed, 1098 insertions(+), 564 deletions(-) create mode 100644 go/src/miller/lib/halfpipe.go diff --git a/go/reg-test/cases/case-dsl-user-defined-functions-and-subroutines.sh b/go/reg-test/cases/case-dsl-user-defined-functions-and-subroutines.sh index 60dfa11ff..c32ac85e2 100644 --- a/go/reg-test/cases/case-dsl-user-defined-functions-and-subroutines.sh +++ b/go/reg-test/cases/case-dsl-user-defined-functions-and-subroutines.sh @@ -128,9 +128,8 @@ run_mlr --from $indir/abixy --opprint put ' } ' -# print/dump from subr/func; no tee/emit from func run_mlr --from $indir/abixy --opprint put 'subr log(text) { print "TEXT IS ".text } call log("NR is ".NR)' run_mlr --from $indir/abixy --opprint put 'func f(text) { print "TEXT IS ".text; return text.text } $o = f($a)' run_mlr --from $indir/abixy --opprint put 'begin{@x=1} func f(x) { dump; print "hello" } $o=f($i)' -mlr_expect_fail --from $indir/abixy put 'begin{@x=1} func f(x) { dump; print "hello"; tee > "x", $* } $o=f($i)' -mlr_expect_fail --from $indir/abixy put 'begin{@x=1} func f(x) { dump; print "hello"; emit > "x", @* } $o=f($i)' +run_mlr --from $indir/abixy put 'begin{@x=1} func f(x) { dump; print "hello"; tee > "x", $* } $o=f($i)' +run_mlr --from $indir/abixy put 'begin{@x=1} func f(x) { dump; print "hello"; emit > "x", @* } $o=f($i)' diff --git a/go/reg-test/expected/case-dsl-split-join-more.sh.out b/go/reg-test/expected/case-dsl-split-join-more.sh.out index 31c7ee90d..6369f826b 100644 --- a/go/reg-test/expected/case-dsl-split-join-more.sh.out +++ b/go/reg-test/expected/case-dsl-split-join-more.sh.out @@ -79,18 +79,6 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnv("a,b,c" , ","); for (k,v 1:string a:string 2:string b:string 3:string c:string -1:string a:string -2:string b:string -3:string c:string -1:string a:string -2:string b:string -3:string c:string -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c -1=a,2=b,3=c -1=a,2=b,3=c 1=a,2=b,3=c 1:string a:string 2:string b:string @@ -107,8 +95,20 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnv("a,b,c" , ","); for (k,v 1:string a:string 2:string b:string 3:string c:string -1:string a:string 1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c @@ -121,18 +121,6 @@ a=1,b=2,3=c a:string 1:int b:string 2:int 3:string c:string -a:string 1:int -b:string 2:int -3:string c:string -a=1,b=2,3=c -a:string 1:int -b:string 2:int -a=1,b=2,3=c -3:string c:string -a:string 1:int -b:string 2:int -3:string c:string -a=1,b=2,3=c a=1,b=2,3=c a:string 1:int b:string 2:int @@ -145,10 +133,22 @@ a=1,b=2,3=c a:string 1:int b:string 2:int 3:string c:string +a=1,b=2,3=c a:string 1:int b:string 2:int 3:string c:string a=1,b=2,3=c +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +a:string 1:int +b:string 2:int +3:string c:string a=1,b=2,3=c a:string 1:int b:string 2:int @@ -161,58 +161,58 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnv("a,b,c", IFS); print ">>" 1:string a:string 2:string b:string 3:string c:string ->> -<<1=a,2=b,3=c - -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c 1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string ->> -<< -1:string a:string1=a,2=b,3=c - -2:string b:string -3:string c:string ->> -<< -1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -2:string b:string -1=a,2=b,3=c -3:string c:string 1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string -1=a,2=b,3=c 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string +2:string b:string +3:string c:string 1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c @@ -223,42 +223,42 @@ mlr --from ./reg-test/input/abixy-het put $* = splitkv("a=1,b=2,c", IPS, IFS); p a:string 1:int b:string 2:int 3:string c:string ->> -<< -a=1,b=2,3=c -a:string 1:int -b:string 2:int -3:string c:string ->> -<< -a:string 1:int -b:string 2:int -3:string c:stringa=1,b=2,3=c - ->> -<< -a:string 1:int -b:string 2:int -3:string c:string -a=1,b=2,3=c a=1,b=2,3=c >> << a:string 1:int b:string 2:int 3:string c:string ->> -<< -a:string 1:int -b:string 2:int a=1,b=2,3=c -3:string c:string >> << a:string 1:int b:string 2:int 3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string a=1,b=2,3=c >> << @@ -285,11 +285,11 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnv("a,b,c", OFS); print ">>" 1:string a:string 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string -2:string b:string1=a,2=b,3=c - +2:string b:string 3:string c:string 1=a,2=b,3=c >> @@ -297,42 +297,42 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnv("a,b,c", OFS); print ">>" 1:string a:string 2:string b:string 3:string c:string ->> -<< -1:string a:string -2:string b:string -1=a,2=b,3=c -3:string c:string ->> -<< -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c 1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string ->> -<< -1:string a:string -2:string b:string -1=a,2=b,3=c -3:string c:string 1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string 1=a,2=b,3=c >> << @@ -347,30 +347,6 @@ mlr --from ./reg-test/input/abixy-het put $* = splitkv("a=1,b=2,c", OPS, OFS); p a:string 1:int b:string 2:int 3:string c:string ->> -<< -a=1,b=2,3=c -a:string 1:int -b:string 2:int -3:string c:string ->> -<< -a=1,b=2,3=c -a:string 1:int -b:string 2:int -3:string c:string ->> -<< -a=1,b=2,3=c -a:string 1:int -b:string 2:int -3:string c:string ->> -<< -a:string 1:int -b:string 2:int -3:string c:string -a=1,b=2,3=c a=1,b=2,3=c >> << @@ -383,16 +359,40 @@ a=1,b=2,3=c a:string 1:int b:string 2:int 3:string c:string +a=1,b=2,3=c >> << a:string 1:int b:string 2:int -3:string c:stringa=1,b=2,3=c - +3:string c:string +a=1,b=2,3=c >> << a:string 1:int +b:string 2:int +3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int +b:string 2:int +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:int b:string 2:int 3:string c:string a=1,b=2,3=c @@ -411,10 +411,6 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c" , ","); for (k, 1:string a:string 2:string b:string 3:string c:string -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c 1=a,2=b,3=c 1:string a:string 2:string b:string @@ -423,22 +419,26 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c" , ","); for (k, 1:string a:string 2:string b:string 3:string c:string -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c 1=a,2=b,3=c 1:string a:string 2:string b:string 3:string c:string -1:string a:string -2:string b:string1=a,2=b,3=c - -3:string c:string +1=a,2=b,3=c 1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +1:string a:string +2:string b:string +3:string c:string 1=a,2=b,3=c 1:string a:string 2:string b:string @@ -457,22 +457,6 @@ a=1,b=2,3=c a:string 1:string b:string 2:string 3:string c:string -a:string 1:string -b:string 2:string -3:string c:string -a:string 1:string -b:string 2:string -a=1,b=2,3=c -a=1,b=2,3=c -3:string c:string -a=1,b=2,3=c -a:string 1:string -b:string 2:string -3:string c:string -a:string 1:string -b:string 2:string -3:string c:string -a=1,b=2,3=c a=1,b=2,3=c a:string 1:string b:string 2:string @@ -481,10 +465,26 @@ a=1,b=2,3=c a:string 1:string b:string 2:string 3:string c:string +a=1,b=2,3=c a:string 1:string b:string 2:string 3:string c:string a=1,b=2,3=c +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", IFS); print ">>".IRS."<<"; for (k, v in $*) {print k.":".typeof(k)." ".v.":".typeof(v)} @@ -493,42 +493,6 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", IFS); print ">> 1:string a:string 2:string b:string 3:string c:string ->> -<< -1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string ->> -<< -1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string ->> -<< -1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c ->> -<< -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -1=a,2=b,3=c -2:string b:string -3:string c:string ->> -<< -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c 1=a,2=b,3=c >> << @@ -541,12 +505,48 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", IFS); print ">> 1:string a:string 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string 1=a,2=b,3=c mlr --from ./reg-test/input/abixy-het put $* = splitkvx("a=1,b=2,c", IPS, IFS); print ">>".IRS."<<"; for (k, v in $*) {print k.":".typeof(k)." ".v.":".typeof(v)} @@ -555,15 +555,9 @@ mlr --from ./reg-test/input/abixy-het put $* = splitkvx("a=1,b=2,c", IPS, IFS); a:string 1:string b:string 2:string 3:string c:string ->> -<> << -a=1,b=2,3=c a:string 1:string b:string 2:string 3:string c:string @@ -573,42 +567,48 @@ a=1,b=2,3=c a:string 1:string b:string 2:string 3:string c:string ->> -<< -a:string 1:string -b:string 2:string -a=1,b=2,3=c -3:string c:string ->> -<< -a:string 1:string -b:string 2:string -3:string c:string ->> -<< -a:string 1:string -b:string 2:string -3:string c:string -a=1,b=2,3=c -a=1,b=2,3=c a=1,b=2,3=c >> << a:string 1:string b:string 2:string 3:string c:string ->> -<< a=1,b=2,3=c -a:string 1:string -b:string 2:string -3:string c:string >> << a:string 1:string b:string 2:string 3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", OFS); print ">>".ORS."<<"; for (k, v in $*) {print k.":".typeof(k)." ".v.":".typeof(v)} @@ -617,42 +617,7 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", OFS); print ">> 1:string a:string 2:string b:string 3:string c:string ->> -<< 1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string ->> -<< -1=a,2=b,3=c -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -2:string b:string -3:string c:string -1=a,2=b,3=c -1=a,2=b,3=c ->> -<< -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -2:string b:string -3:string c:string ->> -<< -1:string a:string -1=a,2=b,3=c -1=a,2=b,3=c -2:string b:string -3:string c:string >> << 1:string a:string @@ -660,17 +625,52 @@ mlr --from ./reg-test/input/abixy-het put $* = splitnvx("a,b,c", OFS); print ">> 3:string c:string 1=a,2=b,3=c >> -<<1=a,2=b,3=c - +<< 1:string a:string 2:string b:string 3:string c:string +1=a,2=b,3=c >> << 1:string a:string 2:string b:string 3:string c:string 1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string +1=a,2=b,3=c +>> +<< +1:string a:string +2:string b:string +3:string c:string 1=a,2=b,3=c mlr --from ./reg-test/input/abixy-het put $* = splitkvx("a=1,b=2,c", OPS, OFS); print ">>".ORS."<<"; for (k, v in $*) {print k.":".typeof(k)." ".v.":".typeof(v)} @@ -679,48 +679,48 @@ mlr --from ./reg-test/input/abixy-het put $* = splitkvx("a=1,b=2,c", OPS, OFS); a:string 1:string b:string 2:string 3:string c:string ->> -<> -<< -a:string 1:string -b:string 2:string -3:string c:string -a=1,b=2,3=c a=1,b=2,3=c >> << a:string 1:string b:string 2:string 3:string c:string ->> -<< -a:string 1:string -b:string 2:string -3:string c:string ->> -<< -a:string 1:string -b:string 2:string -3:string c:string ->> -<< -a:string 1:string -b:string 2:string -3:string c:string +a=1,b=2,3=c >> << a:string 1:string b:string 2:string 3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string +a=1,b=2,3=c +>> +<< +a:string 1:string +b:string 2:string +3:string c:string a=1,b=2,3=c >> << @@ -884,27 +884,27 @@ b:string wye:string i:string 3:int x:string 0.20460330576630303:float y:string 0.33831852551664776:float -a:string eks:string -bbb:string wye:string aaa wye b wye i 3 x 0.20460330576630303 y 0.33831852551664776 +a:string eks:string +bbb:string wye:string i:string 4:int x:string 0.38139939387114097:float y:string 0.13418874328430463:float -a:string wye:string -b:string pan:string -i:string 5:int -xxx:string 0.5732889198020006:float a eks bbb wye i 4 x 0.38139939387114097 y 0.13418874328430463 +a:string wye:string +b:string pan:string +i:string 5:int +xxx:string 0.5732889198020006:float y:string 0.8636244699032729:float a wye @@ -974,16 +974,16 @@ b:string pan:string i:string 1:string x:string 0.3467901443380824:string y:string 0.7268028627434533:string -a:string eks:string -b:string pan:string -i:string 2:string -x:string 0.7586799647899636:string -y:string 0.5221511083334797:string a pan b pan i 1 x 0.3467901443380824 y 0.7268028627434533 +a:string eks:string +b:string pan:string +i:string 2:string +x:string 0.7586799647899636:string +y:string 0.5221511083334797:string a eks b pan @@ -995,16 +995,16 @@ b:string wye:string i:string 3:string x:string 0.20460330576630303:string y:string 0.33831852551664776:string -a:string eks:string -bbb:string wye:string -i:string 4:string -x:string 0.38139939387114097:string aaa wye b wye i 3 x 0.20460330576630303 y 0.33831852551664776 +a:string eks:string +bbb:string wye:string +i:string 4:string +x:string 0.38139939387114097:string y:string 0.13418874328430463:string a eks @@ -1017,13 +1017,13 @@ b:string pan:string i:string 5:string xxx:string 0.5732889198020006:string y:string 0.8636244699032729:string -a:string zee:string a wye b pan i 5 xxx 0.5732889198020006 y 0.8636244699032729 +a:string zee:string b:string pan:string i:string 6:string x:string 0.5271261600918548:string @@ -1050,15 +1050,15 @@ b:string wye:string i:string 8:string x:string 0.5985540091064224:string yyy:string 0.976181385699006:string -aaa:string hat:string -bbb:string wye:string -i:string 9:string a zee b wye i 8 x 0.5985540091064224 yyy 0.976181385699006 +aaa:string hat:string +bbb:string wye:string +i:string 9:string x:string 0.03144187646093577:string y:string 0.7495507603507059:string @@ -1171,11 +1171,11 @@ abc 4 1:string 2:int abc:string 4:int 5:string xyz:string -1:string 2:int 1 2 abc 4 5 xyz +1:string 2:int abc:string 4:int 5:string xyz:string @@ -1185,13 +1185,13 @@ abc 4 1:string 2:int abc:string 4:int 5:string xyz:string -1:string 2:int -abc:string 4:int -5:string xyz:string 1 2 abc 4 5 xyz +1:string 2:int +abc:string 4:int +5:string xyz:string 1 2 abc 4 @@ -1242,13 +1242,6 @@ abc 4 1:string 2:string abc:string 4:string 5:string xyz:string -1:string 2:string -abc:string 4:string -5:string xyz:string - -1 2 -abc 4 -5 xyz 1 2 abc 4 @@ -1291,11 +1284,18 @@ abc 4 1:string 2:string abc:string 4:string 5:string xyz:string -1:string 2:string 1 2 abc 4 5 xyz +1:string 2:string +abc:string 4:string +5:string xyz:string + +1 2 +abc 4 +5 xyz +1:string 2:string abc:string 4:string 5:string xyz:string diff --git a/go/reg-test/expected/case-dsl-user-defined-functions-and-subroutines.sh.out b/go/reg-test/expected/case-dsl-user-defined-functions-and-subroutines.sh.out index d50676ee6..92ccd3ae2 100644 --- a/go/reg-test/expected/case-dsl-user-defined-functions-and-subroutines.sh.out +++ b/go/reg-test/expected/case-dsl-user-defined-functions-and-subroutines.sh.out @@ -353,6 +353,127 @@ hat wye 9 0.03144187646093577 0.7495507603507059 pan wye 10 0.5026260055412137 0.9526183602969864 mlr --from ./reg-test/input/abixy put begin{@x=1} func f(x) { dump; print "hello"; tee > "x", $* } $o=f($i) +{ + "a": "pan", + "b": "pan", + "i": 1, + "x": 0.3467901443380824, + "y": 0.7268028627434533 +} +{ + "x": 1 +} +hello +a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 +{ + "x": 1 +} +hello +{ + "a": "eks", + "b": "pan", + "i": 2, + "x": 0.7586799647899636, + "y": 0.5221511083334797 +} +a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 +{ + "x": 1 +} +hello +{ + "a": "wye", + "b": "wye", + "i": 3, + "x": 0.20460330576630303, + "y": 0.33831852551664776 +} +a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 +{ + "x": 1 +} +hello +{ + "a": "eks", + "b": "wye", + "i": 4, + "x": 0.38139939387114097, + "y": 0.13418874328430463 +} +a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 +{ + "x": 1 +} +hello +{ + "a": "wye", + "b": "pan", + "i": 5, + "x": 0.5732889198020006, + "y": 0.8636244699032729 +} +{ + "a": "zee", + "b": "pan", + "i": 6, + "x": 0.5271261600918548, + "y": 0.49322128674835697 +} +a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 +{ + "x": 1 +} +hello +a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 +{ + "x": 1 +} +hello +{ + "a": "eks", + "b": "zee", + "i": 7, + "x": 0.6117840605678454, + "y": 0.1878849191181694 +} +{ + "a": "zee", + "b": "wye", + "i": 8, + "x": 0.5985540091064224, + "y": 0.976181385699006 +} +a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 +{ + "x": 1 +} +hello +{ + "a": "hat", + "b": "wye", + "i": 9, + "x": 0.03144187646093577, + "y": 0.7495507603507059 +} +a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 +{ + "x": 1 +} +hello +a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 +{ + "x": 1 +} +hello +{ + "a": "pan", + "b": "wye", + "i": 10, + "x": 0.5026260055412137, + "y": 0.9526183602969864 +} +a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 +Exit status was 0; expected 1. mlr --from ./reg-test/input/abixy put begin{@x=1} func f(x) { dump; print "hello"; emit > "x", @* } $o=f($i) diff --git a/go/src/miller/dsl/ast_types.go b/go/src/miller/dsl/ast_types.go index 5d4855b11..ab31a3e2a 100644 --- a/go/src/miller/dsl/ast_types.go +++ b/go/src/miller/dsl/ast_types.go @@ -24,17 +24,17 @@ type ASTNode struct { type TNodeType string const ( - NodeTypeStringLiteral = "string literal" - NodeTypeIntLiteral = "int literal" - NodeTypeFloatLiteral = "float literal" - NodeTypeBoolLiteral = "bool literal" - NodeTypeArrayLiteral = "array literal" - NodeTypeMapLiteral = "map literal" - NodeTypeMapLiteralKeyValuePair = "map-literal key-value pair" - NodeTypeArrayOrMapIndexAccess = "array or map index access" - NodeTypeArraySliceAccess = "array-slice access" - NodeTypeArraySliceEmptyLowerIndex = "array-slice empty lower index" - NodeTypeArraySliceEmptyUpperIndex = "array-slice empty upper index" + NodeTypeStringLiteral TNodeType = "string literal" + NodeTypeIntLiteral = "int literal" + NodeTypeFloatLiteral = "float literal" + NodeTypeBoolLiteral = "bool literal" + NodeTypeArrayLiteral = "array literal" + NodeTypeMapLiteral = "map literal" + NodeTypeMapLiteralKeyValuePair = "map-literal key-value pair" + NodeTypeArrayOrMapIndexAccess = "array or map index access" + NodeTypeArraySliceAccess = "array-slice access" + NodeTypeArraySliceEmptyLowerIndex = "array-slice empty lower index" + NodeTypeArraySliceEmptyUpperIndex = "array-slice empty upper index" NodeTypePositionalFieldName = "positionally-indexed field name" NodeTypePositionalFieldValue = "positionally-indexed field value" @@ -85,7 +85,7 @@ const ( // This helps various emit-variant sub-ASTs have the same shape. For // example, in 'emit > "foo.txt", @v' and 'emit @v', the latter has a no-op // for its redirect target. - NodeTypeNoOp TNodeType = "no-op" + NodeTypeNoOp = "no-op" NodeTypeOperator = "operator" NodeTypeFunctionCallsite = "function callsite" diff --git a/go/src/miller/dsl/cst/dump.go b/go/src/miller/dsl/cst/dump.go index b99ee49b5..01c3dedd8 100644 --- a/go/src/miller/dsl/cst/dump.go +++ b/go/src/miller/dsl/cst/dump.go @@ -1,72 +1,231 @@ // ================================================================ -// This handles print and dump statements. +// This handles dump and edump statements. +// See print.go for comments; this is similar. +// +// Differences between print and dump: +// +// * 'print $x' and 'dump $x' are the same. +// +// * 'print' and 'dump' with no specific value: print outputs a newline; dump +// outputs a JSON representation of all out-of-stream variables. +// +// * 'print $x,$y,$z' prints all items on one line; 'dump $x,$y,$z' prints each on +// its own line. // ================================================================ package cst import ( + "bytes" + "errors" "fmt" "os" + "strings" "miller/dsl" "miller/lib" + "miller/types" ) // ================================================================ +type tDumpToRedirectFunc func( + outputString string, + state *State, +) error + type DumpStatementNode struct { - // TODO: redirect options - ostream *os.File - expressions []IEvaluable - // xxx redirect + expressionEvaluables []IEvaluable + dumpToRedirectFunc tDumpToRedirectFunc + redirectorTargetEvaluable IEvaluable // for file/pipe targets + outputHandlerManager OutputHandlerManager // for file/pipe targets } // ---------------------------------------------------------------- func (this *RootNode) BuildDumpStatementNode(astNode *dsl.ASTNode) (IExecutable, error) { lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeDumpStatement) - return this.BuildDumpxStatementNode(astNode, os.Stdout) + return this.buildDumpxStatementNode( + astNode, + os.Stdout, + ) } func (this *RootNode) BuildEdumpStatementNode(astNode *dsl.ASTNode) (IExecutable, error) { lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeEdumpStatement) - return this.BuildDumpxStatementNode(astNode, os.Stderr) -} - -// Common code for building dump/edump nodes -func (this *RootNode) BuildDumpxStatementNode( - astNode *dsl.ASTNode, - ostream *os.File, -) (IExecutable, error) { - lib.InternalCodingErrorIf(len(astNode.Children) != 2) - expressionsNode := astNode.Children[0] - - expressions := make([]IEvaluable, len(expressionsNode.Children)) - for i, childNode := range expressionsNode.Children { - expression, err := this.BuildEvaluableNode(childNode) - if err != nil { - return nil, err - } - expressions[i] = expression - } - - return &DumpStatementNode{ - ostream, - expressions, - }, nil + return this.buildDumpxStatementNode( + astNode, + os.Stderr, + ) } // ---------------------------------------------------------------- -func (this *DumpStatementNode) Execute(state *State) (*BlockExitPayload, error) { - if len(this.expressions) == 0 { // 'dump' without argument means 'dump @*' - // Not Fprintln since JSON output is LF-terminated already - fmt.Fprint(this.ostream, state.Oosvars.String()) +// Common code for building dump/edump nodes + +func (this *RootNode) buildDumpxStatementNode( + astNode *dsl.ASTNode, + defaultOutputStream *os.File, +) (IExecutable, error) { + lib.InternalCodingErrorIf(len(astNode.Children) != 2) + expressionsNode := astNode.Children[0] + redirectorNode := astNode.Children[1] + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Things to be dumped, e.g. $a and $b in 'dump > "foo.dat", $a, $b'. + + var expressionEvaluables []IEvaluable = nil + + if expressionsNode.Type == dsl.NodeTypeNoOp { + // Just 'dump' without 'dump $something' + expressionEvaluables = make([]IEvaluable, 1) + expressionEvaluable := this.BuildFullOosvarRvalueNode() + expressionEvaluables[0] = expressionEvaluable + } else if expressionsNode.Type == dsl.NodeTypeFunctionCallsite { + expressionEvaluables = make([]IEvaluable, len(expressionsNode.Children)) + for i, childNode := range expressionsNode.Children { + expressionEvaluable, err := this.BuildEvaluableNode(childNode) + if err != nil { + return nil, err + } + expressionEvaluables[i] = expressionEvaluable + } } else { - for _, expression := range this.expressions { - evaluation := expression.Evaluate(state) - if !evaluation.IsAbsent() { - fmt.Fprintln(this.ostream, evaluation.String()) + lib.InternalCodingErrorIf(true) + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Redirection targets (the thing after > >> |, if any). + + retval := &DumpStatementNode{ + expressionEvaluables: expressionEvaluables, + dumpToRedirectFunc: nil, + redirectorTargetEvaluable: nil, + outputHandlerManager: nil, + } + + if redirectorNode.Type == dsl.NodeTypeNoOp { + // No > >> or | was provided. + if defaultOutputStream == os.Stdout { + retval.dumpToRedirectFunc = retval.dumpToStdout + } else if defaultOutputStream == os.Stderr { + retval.dumpToRedirectFunc = retval.dumpToStderr + } else { + lib.InternalCodingErrorIf(true) + } + } else { + // There is > >> or | provided. + lib.InternalCodingErrorIf(redirectorNode.Children == nil) + lib.InternalCodingErrorIf(len(redirectorNode.Children) != 1) + redirectorTargetNode := redirectorNode.Children[0] + var err error = nil + + if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStdout { + retval.dumpToRedirectFunc = retval.dumpToStdout + } else if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStderr { + retval.dumpToRedirectFunc = retval.dumpToStderr + } else { + retval.dumpToRedirectFunc = retval.dumpToFileOrPipe + + retval.redirectorTargetEvaluable, err = this.BuildEvaluableNode(redirectorTargetNode) + if err != nil { + return nil, err + } + + if redirectorNode.Type == dsl.NodeTypeRedirectWrite { + retval.outputHandlerManager = NewFileWritetHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectAppend { + retval.outputHandlerManager = NewFileAppendHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { + retval.outputHandlerManager = NewPipeWriteHandlerManager() + } else { + return nil, errors.New( + fmt.Sprintf( + "%s: unhandled redirector node type %s.", + os.Args[0], string(redirectorNode.Type), + ), + ) } } } + // TODO: root node register outputHandlerManager to add to close-handles list + + return retval, nil +} + +// ---------------------------------------------------------------- +func (this *DumpStatementNode) Execute(state *State) (*BlockExitPayload, error) { + // 5x faster than fmt.Dump() separately: note that os.Stdout is + // non-buffered in Go whereas stdout is buffered in C. + // + // Minus: we need to do our own buffering for performance. + // + // Plus: we never have to worry about forgetting to do fflush(). :) + var buffer bytes.Buffer + + for _, expressionEvaluable := range this.expressionEvaluables { + evaluation := expressionEvaluable.Evaluate(state) + if !evaluation.IsAbsent() { + s := evaluation.String() + buffer.WriteString(s) + // Dump of 1 is "1", needs newline; similar for other atomics. + // Dump of JSON objects already ends in newline and doesn't need + // another. + if !strings.HasSuffix(s, "\n") { + buffer.WriteString("\n") + } + } + } + outputString := buffer.String() + this.dumpToRedirectFunc(outputString, state) return nil, nil } + +// ---------------------------------------------------------------- +type FullOosvarDumpNode struct { +} + +func (this *RootNode) BuildFullOosvarDumpNode() *FullOosvarDumpNode { + return &FullOosvarDumpNode{} +} +func (this *FullOosvarDumpNode) Evaluate(state *State) types.Mlrval { + return types.MlrvalFromString(state.Oosvars.String()) +} + +// ---------------------------------------------------------------- +func (this *DumpStatementNode) dumpToStdout( + outputString string, + state *State, +) error { + // Insert the string into the record-output stream, so that goroutine can + // print it, resulting in deterministic output-ordering. + state.OutputChannel <- types.NewOutputString(outputString, state.Context) + return nil +} + +// ---------------------------------------------------------------- +func (this *DumpStatementNode) dumpToStderr( + outputString string, + state *State, +) error { + fmt.Fprintf(os.Stderr, outputString) + return nil +} + +// ---------------------------------------------------------------- +func (this *DumpStatementNode) dumpToFileOrPipe( + outputString string, + state *State, +) error { + redirectorTarget := this.redirectorTargetEvaluable.Evaluate(state) + if !redirectorTarget.IsString() { + return errors.New( + fmt.Sprintf( + "%s: output redirection yielded %s, not string.", + os.Args[0], redirectorTarget.GetTypeName(), + ), + ) + } + outputFileName := redirectorTarget.String() + + this.outputHandlerManager.Print(outputString, outputFileName) + return nil +} diff --git a/go/src/miller/dsl/cst/leaves.go b/go/src/miller/dsl/cst/leaves.go index 8fe3cc6f9..8e441a811 100644 --- a/go/src/miller/dsl/cst/leaves.go +++ b/go/src/miller/dsl/cst/leaves.go @@ -26,14 +26,14 @@ func (this *RootNode) BuildLeafNode( return this.BuildDirectFieldRvalueNode(sval), nil break case dsl.NodeTypeFullSrec: - return this.BuildFullSrecRvalueNode(sval), nil + return this.BuildFullSrecRvalueNode(), nil break case dsl.NodeTypeDirectOosvarValue: return this.BuildDirectOosvarRvalueNode(sval), nil break case dsl.NodeTypeFullOosvar: - return this.BuildFullOosvarRvalueNode(sval), nil + return this.BuildFullOosvarRvalueNode(), nil break case dsl.NodeTypeLocalVariable: @@ -99,7 +99,7 @@ func (this *DirectFieldRvalueNode) Evaluate(state *State) types.Mlrval { type FullSrecRvalueNode struct { } -func (this *RootNode) BuildFullSrecRvalueNode(fieldName string) *FullSrecRvalueNode { +func (this *RootNode) BuildFullSrecRvalueNode() *FullSrecRvalueNode { return &FullSrecRvalueNode{} } func (this *FullSrecRvalueNode) Evaluate(state *State) types.Mlrval { @@ -129,7 +129,7 @@ func (this *DirectOosvarRvalueNode) Evaluate(state *State) types.Mlrval { type FullOosvarRvalueNode struct { } -func (this *RootNode) BuildFullOosvarRvalueNode(fieldName string) *FullOosvarRvalueNode { +func (this *RootNode) BuildFullOosvarRvalueNode() *FullOosvarRvalueNode { return &FullOosvarRvalueNode{} } func (this *FullOosvarRvalueNode) Evaluate(state *State) types.Mlrval { diff --git a/go/src/miller/dsl/cst/output-handlers.go b/go/src/miller/dsl/cst/output-handlers.go index a76938c9a..d5b27dbd4 100644 --- a/go/src/miller/dsl/cst/output-handlers.go +++ b/go/src/miller/dsl/cst/output-handlers.go @@ -21,7 +21,8 @@ import ( "fmt" "io" "os" - "os/exec" + + "miller/lib" ) // ================================================================ @@ -167,12 +168,8 @@ func NewFileAppendOutputHandler( func NewPipeWriteOutputHandler( commandString string, ) (*FileOutputHandler, error) { - commandHandle := exec.Command( - "bash", - "-c", - commandString, - ) - if commandHandle == nil { + writePipe, err := lib.OpenOutboundHalfPipe(commandString) + if err != nil { return nil, errors.New( fmt.Sprintf( "%s: could not launch command \"%s\" for pipe-to.", @@ -182,21 +179,9 @@ func NewPipeWriteOutputHandler( ) } - commandWriteHandle, err := commandHandle.StdinPipe() - if err != nil { - return nil, err - } - - // TODO: make the Stdout/Stderr pipes and spawn a goroutine to print them - - err = commandHandle.Start() - if err != nil { - return nil, err - } - return &FileOutputHandler{ filename: "| " + commandString, - handle: commandWriteHandle, + handle: writePipe, closeable: true, }, nil } diff --git a/go/src/miller/dsl/cst/print.go b/go/src/miller/dsl/cst/print.go index fcd52c8a6..18e180ba9 100644 --- a/go/src/miller/dsl/cst/print.go +++ b/go/src/miller/dsl/cst/print.go @@ -1,9 +1,7 @@ // ================================================================ -// This handles print and dump statements. +// This handles print, printn, eprint, and eprintn statements. // ================================================================ -// TODO: needs lots of comments - package cst import ( @@ -14,20 +12,153 @@ import ( "miller/dsl" "miller/lib" + "miller/types" ) +// ---------------------------------------------------------------- +// Example ASTs: +// +// $ mlr -n put -v 'print $a, $b' +// DSL EXPRESSION: +// print $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * no-op +// +// $ mlr -n put -v 'print > stdout, $a, $b' +// DSL EXPRESSION: +// print > stdout, $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * redirect write ">" +// * stdout redirect target "stdout" +// +// $ mlr -n put -v 'print > stderr, $a, $b' +// DSL EXPRESSION: +// print > stderr, $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * redirect write ">" +// * stderr redirect target "stderr" +// +// $ mlr -n put -v 'print > "foo.dat", $a, $b' +// DSL EXPRESSION: +// print > "foo.dat", $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * redirect write ">" +// * string literal "foo.dat" +// +// $ mlr -n put -v 'print >> "foo.dat", $a, $b' +// DSL EXPRESSION: +// print >> "foo.dat", $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * redirect append ">>" +// * string literal "foo.dat" +// +// $ mlr -n put -v 'print | "command", $a, $b' +// DSL EXPRESSION: +// print | "command", $a, $b +// RAW AST: +// * statement block +// * print statement "print" +// * function callsite +// * direct field value "a" +// * direct field value "b" +// * redirect pipe "|" +// * string literal "command" +// +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// Corresponding data structures for these cases: +// +// * printToRedirectFunc is either printToStdout, printToStderr, or +// printToFileOrPipe. Only the third of these takes a non-nil +// redirectorTargetEvaluable and a non-nil outputHandlerManager. +// +// * redirectorTargetEvaluable is nil for stdout or stderr. +// +// * The OutputHandlerManager is for file names or commands in >, >> or |. +// This is because the target for the redirect can vary from one record to +// the next, e.g. mlr put 'print > $a.txt, $b'. The OutputHandlerManager +// keeps file-handles for each distinct value of $a. +// +// So: +// +// * print $a, $b +// AST redirectorNode = NodeTypeNoOp +// AST redirectorTargetNode = (none) +// printToRedirectFunc = printToStdout +// redirectorTargetEvaluable = nil +// outputHandlerManager = nil +// +// * print > stdout, $a, $b +// AST redirectorNode = NodeTypeRedirectWrite +// AST redirectorTargetNode = NodeTypeRedirectTargetStdout +// printToRedirectFunc = printToStdout +// redirectorTargetEvaluable = nil +// outputHandlerManager = nil +// +// * print > stderr, $a, $b +// AST redirectorNode = NodeTypeRedirectWrite +// AST redirectorTargetNode = NodeTypeRedirectTargetStderr +// printToRedirectFunc = printToStderr +// redirectorTargetEvaluable = nil +// outputHandlerManager = nil +// +// * print > "foo.dat", $a, $b +// AST redirectorNode = NodeTypeRedirectWrite +// AST redirectorTargetNode = any of various evaluables +// printToRedirectFunc = printToFileOrPipe +// redirectorTargetEvaluable = non-nil +// outputHandlerManager = non-nil +// +// * print >> "foo.dat", $a, $b +// AST redirectorNode = NodeTypeRedirectAppend +// AST redirectorTargetNode = any of various evaluables +// printToRedirectFunc = printToFileOrPipe +// redirectorTargetEvaluable = non-nil +// outputHandlerManager = non-nil +// +// * print | "command", $a, $b +// AST redirectorNode = NodeTypeRedirectPipe +// AST redirectorTargetNode = any of various evaluables +// printToRedirectFunc = printToFileOrPipe +// redirectorTargetEvaluable = non-nil +// outputHandlerManager = non-nil + // ================================================================ -type printToRedirectFunc func( +type tPrintToRedirectFunc func( outputString string, state *State, ) error type PrintStatementNode struct { - outputHandlerManager OutputHandlerManager // TODO: comments - terminator string - expressions []IEvaluable - redirectorTarget IEvaluable - printToRedirect printToRedirectFunc + expressionEvaluables []IEvaluable + terminator string + printToRedirectFunc tPrintToRedirectFunc + redirectorTargetEvaluable IEvaluable // for file/pipe targets + outputHandlerManager OutputHandlerManager // for file/pipe targets } // ---------------------------------------------------------------- @@ -69,43 +200,6 @@ func (this *RootNode) BuildEprintnStatementNode(astNode *dsl.ASTNode) (IExecutab // ---------------------------------------------------------------- // Common code for building print/eprint/printn/eprintn nodes -// -// Example ASTs: -// -// $ mlr -n put -v 'print 1, 2' -// DSL EXPRESSION: -// print 1, 2 -// RAW AST: -// * statement block -// * print statement "print" -// * function callsite -// * int literal "1" -// * int literal "2" -// * no-op -// -// $ mlr -n put -v 'print > "foo", 1, 2' -// DSL EXPRESSION: -// print > "foo", 1, 2 -// RAW AST: -// * statement block -// * print statement "print" -// * function callsite -// * int literal "1" -// * int literal "2" -// * redirect write ">" -// * string literal "foo" -// -// $ mlr -n put -v 'print >> "foo", 1, 2' -// DSL EXPRESSION: -// print >> "foo", 1, 2 -// RAW AST: -// * statement block -// * print statement "print" -// * function callsite -// * int literal "1" -// * int literal "2" -// * redirect append ">>" -// * string literal "foo" func (this *RootNode) buildPrintxStatementNode( astNode *dsl.ASTNode, @@ -113,92 +207,117 @@ func (this *RootNode) buildPrintxStatementNode( terminator string, ) (IExecutable, error) { lib.InternalCodingErrorIf(len(astNode.Children) != 2) - expressionsNode := astNode.Children[0] - redirectNode := astNode.Children[1] + redirectorNode := astNode.Children[1] - expressions := make([]IEvaluable, len(expressionsNode.Children)) - for i, childNode := range expressionsNode.Children { - expression, err := this.BuildEvaluableNode(childNode) - if err != nil { - return nil, err + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Things to be printed, e.g. $a and $b in 'print > "foo.dat", $a, $b'. + + var expressionEvaluables []IEvaluable = nil + + if expressionsNode.Type == dsl.NodeTypeNoOp { + // Just 'print' without 'print $something' + expressionEvaluables = make([]IEvaluable, 1) + expressionEvaluable := this.BuildStringLiteralNode("") + expressionEvaluables[0] = expressionEvaluable + } else if expressionsNode.Type == dsl.NodeTypeFunctionCallsite { + expressionEvaluables = make([]IEvaluable, len(expressionsNode.Children)) + for i, childNode := range expressionsNode.Children { + expressionEvaluable, err := this.BuildEvaluableNode(childNode) + if err != nil { + return nil, err + } + expressionEvaluables[i] = expressionEvaluable } - expressions[i] = expression - } - - // Without explicit redirect, the redirect AST node comes in as a no-op - // node from the parser. - var outputHandlerManager OutputHandlerManager = nil - if redirectNode.Type == dsl.NodeTypeNoOp { - // leave it nil - } else if redirectNode.Type == dsl.NodeTypeRedirectWrite { - outputHandlerManager = NewFileWritetHandlerManager() - } else if redirectNode.Type == dsl.NodeTypeRedirectAppend { - outputHandlerManager = NewFileAppendHandlerManager() - } else if redirectNode.Type == dsl.NodeTypeRedirectPipe { - outputHandlerManager = NewPipeWriteHandlerManager() } else { - return nil, errors.New( - fmt.Sprintf( - "%s: unhandled redirection node type %s.", - os.Args[0], string(redirectNode.Type), - ), - ) + lib.InternalCodingErrorIf(true) } - var redirectorTarget IEvaluable = nil - foo := &PrintStatementNode{} - printToRedirect := foo.printToStdout - - if redirectNode.Type != dsl.NodeTypeNoOp { - lib.InternalCodingErrorIf(redirectNode.Children == nil) - lib.InternalCodingErrorIf(len(redirectNode.Children) != 1) - redirectorTargetNode := redirectNode.Children[0] - var err error = nil - redirectorTarget, err = this.BuildEvaluableNode(redirectorTargetNode) - if err != nil { - return nil, err - } - if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStdout { - printToRedirect = foo.printToStdout - } else if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStderr { - printToRedirect = foo.printToStderr - } else { - printToRedirect = foo.printToFileOrPipe - } - } - - // TODO: root node register oututHandlerManager to add to close-handles list + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Redirection targets (the thing after > >> |, if any). retval := &PrintStatementNode{ - outputHandlerManager: outputHandlerManager, - terminator: terminator, - expressions: expressions, - redirectorTarget: redirectorTarget, - printToRedirect: printToRedirect, + expressionEvaluables: expressionEvaluables, + terminator: terminator, + printToRedirectFunc: nil, + redirectorTargetEvaluable: nil, + outputHandlerManager: nil, } + if redirectorNode.Type == dsl.NodeTypeNoOp { + // No > >> or | was provided. + if defaultOutputStream == os.Stdout { + retval.printToRedirectFunc = retval.printToStdout + } else if defaultOutputStream == os.Stderr { + retval.printToRedirectFunc = retval.printToStderr + } else { + lib.InternalCodingErrorIf(true) + } + } else { + // There is > >> or | provided. + lib.InternalCodingErrorIf(redirectorNode.Children == nil) + lib.InternalCodingErrorIf(len(redirectorNode.Children) != 1) + redirectorTargetNode := redirectorNode.Children[0] + var err error = nil + + if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStdout { + retval.printToRedirectFunc = retval.printToStdout + } else if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStderr { + retval.printToRedirectFunc = retval.printToStderr + } else { + retval.printToRedirectFunc = retval.printToFileOrPipe + + retval.redirectorTargetEvaluable, err = this.BuildEvaluableNode(redirectorTargetNode) + if err != nil { + return nil, err + } + + if redirectorNode.Type == dsl.NodeTypeRedirectWrite { + retval.outputHandlerManager = NewFileWritetHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectAppend { + retval.outputHandlerManager = NewFileAppendHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { + retval.outputHandlerManager = NewPipeWriteHandlerManager() + } else { + return nil, errors.New( + fmt.Sprintf( + "%s: unhandled redirector node type %s.", + os.Args[0], string(redirectorNode.Type), + ), + ) + } + } + } + + // TODO: root node register outputHandlerManager to add to close-handles list + return retval, nil } // ---------------------------------------------------------------- func (this *PrintStatementNode) Execute(state *State) (*BlockExitPayload, error) { - if len(this.expressions) == 0 { - this.printToRedirect(this.terminator, state) + if len(this.expressionEvaluables) == 0 { + this.printToRedirectFunc(this.terminator, state) } else { - var buffer bytes.Buffer // 5x faster than fmt.Print() separately + // 5x faster than fmt.Print() separately: note that os.Stdout is + // non-buffered in Go whereas stdout is buffered in C. + // + // Minus: we need to do our own buffering for performance. + // + // Plus: we never have to worry about forgetting to do fflush(). :) + var buffer bytes.Buffer - for i, expression := range this.expressions { + for i, expressionEvaluable := range this.expressionEvaluables { if i > 0 { buffer.WriteString(" ") } - evaluation := expression.Evaluate(state) + evaluation := expressionEvaluable.Evaluate(state) if !evaluation.IsAbsent() { buffer.WriteString(evaluation.String()) } } buffer.WriteString(this.terminator) - this.printToRedirect(buffer.String(), state) + this.printToRedirectFunc(buffer.String(), state) } return nil, nil } @@ -208,7 +327,9 @@ func (this *PrintStatementNode) printToStdout( outputString string, state *State, ) error { - fmt.Fprint(os.Stdout, outputString) + // Insert the string into the record-output stream, so that goroutine can + // print it, resulting in deterministic output-ordering. + state.OutputChannel <- types.NewOutputString(outputString, state.Context) return nil } @@ -226,16 +347,16 @@ func (this *PrintStatementNode) printToFileOrPipe( outputString string, state *State, ) error { - redirectorEvaluation := this.redirectorTarget.Evaluate(state) - if !redirectorEvaluation.IsString() { + redirectorTarget := this.redirectorTargetEvaluable.Evaluate(state) + if !redirectorTarget.IsString() { return errors.New( fmt.Sprintf( "%s: output redirection yielded %s, not string.", - os.Args[0], redirectorEvaluation.GetTypeName(), + os.Args[0], redirectorTarget.GetTypeName(), ), ) } - outputFileName := redirectorEvaluation.String() + outputFileName := redirectorTarget.String() this.outputHandlerManager.Print(outputString, outputFileName) return nil diff --git a/go/src/miller/dsl/cst/tee.go b/go/src/miller/dsl/cst/tee.go index 539266eab..2ecc2e625 100644 --- a/go/src/miller/dsl/cst/tee.go +++ b/go/src/miller/dsl/cst/tee.go @@ -1,57 +1,179 @@ // ================================================================ -// This handles tee statements. This produces new records (in addition to $*) -// into th output record stream. +// This handles tee statements. // ================================================================ package cst import ( + "errors" "fmt" + "os" "miller/dsl" "miller/lib" + "miller/types" ) -// ================================================================ +// ---------------------------------------------------------------- // Examples: -// tee @a -// tee @a, @b +// tee > "foo.dat", $* +// tee > stderr, $* +// tee > stdout, $* +// tee | "jq .", $* // -// Each argument must be a non-indexed oosvar/localvar/fieldname, so we can use -// their names as keys in the emitted record. These restrictions are enforced -// in the CST logic, to keep this parser/AST logic simpler. +// The item being teed can only be $*. This is a special case of emit. (This +// doesn't do anything emit can't do.) +// +// $ mlr -n put -v 'tee > stdout, $*' +// DSL EXPRESSION: +// tee > stdout, $* +// RAW AST: +// * statement block +// * tee statement "tee" +// * full record "$*" +// * redirect write ">" +// * stdout redirect target "stdout" +// +// $ mlr -n put -v 'tee > "foo.dat", $*' +// DSL EXPRESSION: +// tee > "foo.dat", $* +// RAW AST: +// * statement block +// * tee statement "tee" +// * full record "$*" +// * redirect write ">" +// * string literal "foo.dat" +// +// $ mlr -n put -v 'tee | "jq .", $*' +// DSL EXPRESSION: +// tee | "jq .", $* +// RAW AST: +// * statement block +// * tee statement "tee" +// * full record "$*" +// * redirect pipe "|" +// * string literal "jq ." +// ---------------------------------------------------------------- + +// ================================================================ +type tTeeToRedirectFunc func( + outputString string, + state *State, +) error type TeeStatementNode struct { - teeEvaluable IEvaluable - // xxx redirect + expressionEvaluable IEvaluable // always the $* evaluable + teeToRedirectFunc tTeeToRedirectFunc + redirectorTargetEvaluable IEvaluable // for file/pipe targets + outputHandlerManager OutputHandlerManager // for file/pipe targets } // ---------------------------------------------------------------- -// Example: -// 'tee > "foo.dat", $*' -// Only $* can be the expression for tee. (This is a syntactic special case of emit.) - func (this *RootNode) BuildTeeStatementNode(astNode *dsl.ASTNode) (IExecutable, error) { lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeTeeStatement) lib.InternalCodingErrorIf(len(astNode.Children) != 2) - expressionNode := astNode.Children[0] + redirectorNode := astNode.Children[1] - teeEvaluable, err := this.BuildEvaluableNode(expressionNode) - if err != nil { - return nil, err + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Expresosin to be teed, which is $*. + + lib.InternalCodingErrorIf(expressionNode.Type != dsl.NodeTypeFullSrec) + expressionEvaluable := this.BuildFullSrecRvalueNode() + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + // Redirection targets (the thing after > >> |, if any). + + retval := &TeeStatementNode{ + expressionEvaluable: expressionEvaluable, + teeToRedirectFunc: nil, + redirectorTargetEvaluable: nil, + outputHandlerManager: nil, } - return &TeeStatementNode{ - teeEvaluable: teeEvaluable, - }, nil + + // There is > >> or | provided. + lib.InternalCodingErrorIf(redirectorNode.Children == nil) + lib.InternalCodingErrorIf(len(redirectorNode.Children) != 1) + redirectorTargetNode := redirectorNode.Children[0] + var err error = nil + + if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStdout { + retval.teeToRedirectFunc = retval.teeToStdout + } else if redirectorTargetNode.Type == dsl.NodeTypeRedirectTargetStderr { + retval.teeToRedirectFunc = retval.teeToStderr + } else { + retval.teeToRedirectFunc = retval.teeToFileOrPipe + + retval.redirectorTargetEvaluable, err = this.BuildEvaluableNode(redirectorTargetNode) + if err != nil { + return nil, err + } + + if redirectorNode.Type == dsl.NodeTypeRedirectWrite { + retval.outputHandlerManager = NewFileWritetHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectAppend { + retval.outputHandlerManager = NewFileAppendHandlerManager() + } else if redirectorNode.Type == dsl.NodeTypeRedirectPipe { + retval.outputHandlerManager = NewPipeWriteHandlerManager() + } else { + return nil, errors.New( + fmt.Sprintf( + "%s: unhandled redirector node type %s.", + os.Args[0], string(redirectorNode.Type), + ), + ) + } + } + + // TODO: root node register outputHandlerManager to add to close-handles list + + return retval, nil } +// ---------------------------------------------------------------- func (this *TeeStatementNode) Execute(state *State) (*BlockExitPayload, error) { - teeValue := this.teeEvaluable.Evaluate(state) - if !teeValue.IsAbsent() { - // xxx temp - fmt.Println(teeValue.String()) - } - + evaluation := this.expressionEvaluable.Evaluate(state) + outputString := evaluation.String() + this.teeToRedirectFunc(outputString, state) return nil, nil } + +// ---------------------------------------------------------------- +func (this *TeeStatementNode) teeToStdout( + outputString string, + state *State, +) error { + // Insert the string into the record-output stream, so that goroutine can + // print it, resulting in deterministic output-ordering. + state.OutputChannel <- types.NewOutputString(outputString, state.Context) + return nil +} + +// ---------------------------------------------------------------- +func (this *TeeStatementNode) teeToStderr( + outputString string, + state *State, +) error { + fmt.Fprintf(os.Stderr, outputString) + return nil +} + +// ---------------------------------------------------------------- +func (this *TeeStatementNode) teeToFileOrPipe( + outputString string, + state *State, +) error { + redirectorTarget := this.redirectorTargetEvaluable.Evaluate(state) + if !redirectorTarget.IsString() { + return errors.New( + fmt.Sprintf( + "%s: output redirection yielded %s, not string.", + os.Args[0], redirectorTarget.GetTypeName(), + ), + ) + } + outputFileName := redirectorTarget.String() + + this.outputHandlerManager.Print(outputString, outputFileName) + return nil +} diff --git a/go/src/miller/lib/halfpipe.go b/go/src/miller/lib/halfpipe.go new file mode 100644 index 000000000..8a320dc23 --- /dev/null +++ b/go/src/miller/lib/halfpipe.go @@ -0,0 +1,43 @@ +package lib + +import ( + "os" +) + +// OpenOutboundHalfPipe returns a handle to a process. Writing to that handle +// writes to the process' stdin. The process' stdout and stderr are the current +// process' stdout and stderr. +// +// This is for pipe-redirection in the Miller put/filter DSL. +// +// Note I am not using os.exec.Cmd which is billed as being simpler than using +// os.StartProcess. It may indeed be simpler when you want to handle the +// subprocess' stdin/stdout/stderr all three within the parent process. Here I +// found it much easier to use os.StartProcess to let the stdout/stderr run +// free. + +func OpenOutboundHalfPipe(commandString string) (*os.File, error) { + readPipe, writePipe, err := os.Pipe() + + var procAttr os.ProcAttr + procAttr.Files = []*os.File{ + readPipe, + os.Stdout, + os.Stderr, + } + + args := []string{ + "/bin/sh", + "-c", + commandString, + } + + process, err := os.StartProcess(args[0], args, &procAttr) + if err != nil { + return nil, err + } + + go process.Wait() + + return writePipe, nil +} diff --git a/go/src/miller/parsing/mlr.bnf b/go/src/miller/parsing/mlr.bnf index dc77927e9..551147f63 100644 --- a/go/src/miller/parsing/mlr.bnf +++ b/go/src/miller/parsing/mlr.bnf @@ -951,6 +951,8 @@ Emittable | DirectOosvarValue | DirectFieldValue | MapLiteral + | FullOosvar + | FullSrec ; // ---------------------------------------------------------------- diff --git a/go/todo.txt b/go/todo.txt index 431403253..23d2ba1db 100644 --- a/go/todo.txt +++ b/go/todo.txt @@ -13,41 +13,23 @@ TOP OF LIST: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - emitp/emitf: -! ochan mod for all stdouts - o cst-build disallow | stdout,stderr - o try non-nil record @ EOS for test - o need output-handler object ... - - producers: - * print/printn/eprint/eprintn - * dump/edump - * tee - * emitf - * emit/emitp - - types: - * > stdout|stderr|filename - * >> stdout|stderr|filename - * | command - - targets: - * stdout -- ochan <- x - * stderr -- out immediately - * write-to-file -- out immediately - * append-to-file -- out immediately - * write-to-pipe -- out immediately - - still needs pipe - - still needs ochan for stdout - - still needs to hook in a way to close at shutdown -* bug - run_mlr $input put $vflag '@x={"a":1}; @y={"b":2}; emit (@x, @y), "a"' +! panic + run_mlr --from s put '@x={"a":1}; @y={"b":2}; emit (@x, @y), "a"' +* implement tee +* exhaustive redirector cases +* exhaustive terminator cases +* root-node register outputHandlerManager to add to close-handles list +* try non-nil record @ EOS for test * new emitx punctuation-syntax -- decide x 4 -* to support (from C impl): - x MD_TOKEN_ALL - x md_fcn_or_subr_call - x md_indexed_local_variable - x md_oosvar_keylist - k MD_TOKEN_FULL_OOSVAR - k MD_TOKEN_FULL_SREC - k md_map_literal - k md_nonindexed_local_variable + o to support (from C impl): + x MD_TOKEN_ALL + x md_fcn_or_subr_call + x md_indexed_local_variable + x md_oosvar_keylist + k MD_TOKEN_FULL_OOSVAR + k MD_TOKEN_FULL_SREC + k md_map_literal + k md_nonindexed_local_variable * double-check all examples at https://miller.readthedocs.io/en/latest/reference-dsl.html#emit-statements * mlr: tee statements are not valid within func blocks