This commit is contained in:
John Kerl 2022-11-26 00:23:12 -05:00
parent a299ce22fe
commit 95be06b752
15 changed files with 345 additions and 340 deletions

View file

@ -9,7 +9,8 @@ import (
"github.com/johnkerl/miller/internal/pkg/mlrval"
)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
//
// string_cmp implements the spaceship operator for strings.
func string_cmp(a, b string) int64 {
if a < b {
@ -43,7 +44,7 @@ func float_cmp(a, b float64) int64 {
return 0
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireStringValue() == input2.AcquireStringValue())
}
@ -66,7 +67,7 @@ func cmp_b_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int64(string_cmp(input1.AcquireStringValue(), input2.AcquireStringValue())))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_xs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.String() == input2.AcquireStringValue())
}
@ -89,7 +90,7 @@ func cmp_b_xs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int64(string_cmp(input1.String(), input2.AcquireStringValue())))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_sx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireStringValue() == input2.String())
}
@ -112,7 +113,7 @@ func cmp_b_sx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(string_cmp(input1.AcquireStringValue(), input2.String()))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireIntValue() == input2.AcquireIntValue())
}
@ -135,7 +136,7 @@ func cmp_b_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int_cmp(input1.AcquireIntValue(), input2.AcquireIntValue()))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_if(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(float64(input1.AcquireIntValue()) == input2.AcquireFloatValue())
}
@ -158,7 +159,7 @@ func cmp_b_if(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(float64(input1.AcquireIntValue()), input2.AcquireFloatValue()))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_fi(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireFloatValue() == float64(input2.AcquireIntValue()))
}
@ -181,7 +182,7 @@ func cmp_b_fi(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(input1.AcquireFloatValue(), float64(input2.AcquireIntValue())))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireFloatValue() == input2.AcquireFloatValue())
}
@ -204,7 +205,7 @@ func cmp_b_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(input1.AcquireFloatValue(), input2.AcquireFloatValue()))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireBoolValue() == input2.AcquireBoolValue())
}
@ -231,7 +232,7 @@ func cmp_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int_cmp(lib.BoolToInt(input1.AcquireBoolValue()), lib.BoolToInt(input2.AcquireBoolValue())))
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_aa(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
a := input1.AcquireArrayValue()
b := input2.AcquireArrayValue()
@ -257,7 +258,7 @@ func ne_b_aa(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(!output.AcquireBoolValue())
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_mm(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireMapValue().Equals(input2.AcquireMapValue()))
}

View file

@ -867,17 +867,17 @@ func unaliasArrayLengthIndex(n int, mindex int) (int, bool) {
}
// MillerSliceAccess is code shared by the string-slicer and the array-slicer.
// * Miller indices are 1-up, 1..n where n is the length of the array/string.
// They are also aliased -n..-1. These are called "mindex" (if int) or "index mlrval"
// (if mlrval).
// * Go indices are 0-up, with no aliasing. These are called "zindex".
// * The job of this routine is to map a pair of index-mlrval to a pair of zindex,
// with possible outcomes that the slice access should result in an empty array/string,
// or Mlrval of type absent, or Mlrval of type error.
// * Callsites include the DSL array-slicer (e.g. [1,2,3,4,5][2:3]), the DSL string-slicer
// (e.g. "abcde"[2:3]), the substr1 function (e.g. substr1("abcde", 2, 3), and the substr0
// function (e.g. substr0("abcde", 1, 2)).
// * The isZeroUp argument is in support of substr0.
// - Miller indices are 1-up, 1..n where n is the length of the array/string.
// They are also aliased -n..-1. These are called "mindex" (if int) or "index mlrval"
// (if mlrval).
// - Go indices are 0-up, with no aliasing. These are called "zindex".
// - The job of this routine is to map a pair of index-mlrval to a pair of zindex,
// with possible outcomes that the slice access should result in an empty array/string,
// or Mlrval of type absent, or Mlrval of type error.
// - Callsites include the DSL array-slicer (e.g. [1,2,3,4,5][2:3]), the DSL string-slicer
// (e.g. "abcde"[2:3]), the substr1 function (e.g. substr1("abcde", 2, 3), and the substr0
// function (e.g. substr0("abcde", 1, 2)).
// - The isZeroUp argument is in support of substr0.
func MillerSliceAccess(
lowerIndexMlrval *mlrval.Mlrval,
upperIndexMlrval *mlrval.Mlrval,

View file

@ -50,13 +50,13 @@ import (
// Data types used within the flags table.
// FlagParser is a function which takes a flag such as `--foo`.
// * It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
// * The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
// * Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
// - It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
// - The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
// - Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
type FlagParser func(
args []string,
argc int,

View file

@ -19,14 +19,14 @@ import (
)
// FinalizeReaderOptions does a few things.
// * If a file format was specified but one or more separators were not, a
// default specific to that file format is applied.
// * Computing regexes for IPS and IFS, and unbackslashing IRS. This is
// because the '\n' at the command line which is Go "\\n" (a backslash and an
// n) needs to become the single newline character, and likewise for "\t", etc.
// * IFS/IPS can have escapes like "\x1f" which aren't valid regex literals
// so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f'
// -- to the single byte with hex code 0x1f.
// - If a file format was specified but one or more separators were not, a
// default specific to that file format is applied.
// - Computing regexes for IPS and IFS, and unbackslashing IRS. This is
// because the '\n' at the command line which is Go "\\n" (a backslash and an
// n) needs to become the single newline character, and likewise for "\t", etc.
// - IFS/IPS can have escapes like "\x1f" which aren't valid regex literals
// so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f'
// -- to the single byte with hex code 0x1f.
func FinalizeReaderOptions(readerOptions *TReaderOptions) {
readerOptions.IFS = lib.UnhexStringLiteral(readerOptions.IFS)

View file

@ -10,12 +10,16 @@ import (
)
// maybeInterpolateDashS supports Miller scripts with shebang lines like
// #!/usr/bin/env mlr -s
// --csv tac then filter '
// NR % 2 == 1
// '
//
// #!/usr/bin/env mlr -s
// --csv tac then filter '
// NR % 2 == 1
// '
//
// invoked as
// scriptfile input1.csv input2.csv
//
// scriptfile input1.csv input2.csv
//
// The "-s" flag must be the very first command-line argument after "mlr" for
// two reasons:
// * This is how shebang lines work

View file

@ -385,21 +385,21 @@ func (node *TernaryFunctionWithStateCallsiteNode) Evaluate(
//
// Note the use of "capture" is ambiguous:
//
// * There is the regex-match part which captures submatches out
// of a full match expression, and saves them.
// - There is the regex-match part which captures submatches out
// of a full match expression, and saves them.
//
// * Then there is the part which inserts these captures into another string.
//
// * For sub/gsub, the former and latter are both within the sub/gsub routine.
// E.g. with
// - For sub/gsub, the former and latter are both within the sub/gsub routine.
// E.g. with
// $y = sub($x, "(..)_(...)", "\2:\1"
// and $x being "ab_cde", $y will be "cde:ab".
// and $x being "ab_cde", $y will be "cde:ab".
//
// * For =~ and !=~, the former are right there, but the latter can be several
// lines later. E.g.
// - For =~ and !=~, the former are right there, but the latter can be several
// lines later. E.g.
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1";
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// So: this RegexCaptureBinaryFunctionCallsiteNode only refers to the =~ and

View file

@ -244,10 +244,10 @@ type StringLiteralNode struct {
// "\9" in it. As of the original design of Miller, submatches are captured
// in one place and interpolated in another. For example:
//
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// This node type is for things like "\2:\1". They can occur quite far from the
// =~ callsite so we need to check all string literals to see if they have "\0"
@ -287,10 +287,10 @@ func (node *StringLiteralNode) Evaluate(
// As noted above, in things like
//
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// the captures can be set (by =~ or !=~) quite far from where they are used.
// This is why we consult the state.RegexCaptures here, to see if they've been

View file

@ -20,12 +20,12 @@ import (
// Namely, for "bare booleans" which are non-assignment statements like 'NR >
// 10' or 'true' or '$x =~ "(..)_(...)" or even '1+2'.
//
// * For mlr put, bare booleans are no-ops; except side-effects (like
// regex-captures)
// * For mlr filter, they set the filter condition only if they're the last
// statement in the main block.
// * For mlr repl, similar to mlr filter: they are used to track the output to
// be printed for an expression entered at the REPL prompt.
// - For mlr put, bare booleans are no-ops; except side-effects (like
// regex-captures)
// - For mlr filter, they set the filter condition only if they're the last
// statement in the main block.
// - For mlr repl, similar to mlr filter: they are used to track the output to
// be printed for an expression entered at the REPL prompt.
type DSLInstanceType int
const (

View file

@ -272,13 +272,13 @@ func validateForLoopTwoVariableUniqueNames(astNode *dsl.ASTNode) error {
// Check against 'for ((a,a), b in $*)' or 'for ((a,b), a in $*)' -- repeated 'a'.
// AST:
// * statement block
// * multi-variable for-loop "for"
// * parameter list
// * local variable "a"
// * local variable "b"
// * local variable "a"
// * full record "$*"
// * statement block
// - multi-variable for-loop "for"
// - parameter list
// - local variable "a"
// - local variable "b"
// - local variable "a"
// - full record "$*"
// - statement block
func validateForLoopMultivariableUniqueNames(astNode *dsl.ASTNode) error {
lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeForLoopMultivariable)
keyVarsNode := astNode.Children[0]

View file

@ -124,13 +124,13 @@ func (reader *RecordReaderXTAB) processHandle(
// Given input like
//
// a 1
// b 2
// c 3
// a 1
// b 2
// c 3
//
// a 4
// b 5
// c 6
// a 4
// b 5
// c 6
//
// this function reads the input stream a line at a time, then produces
// string-lists one per stanza where a stanza is delimited by blank line, or

View file

@ -145,16 +145,16 @@ func RegexReplacementHasCaptures(
// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
// state and may be used by a DSL statement after the =~. For example, in
//
// sub($a, "(..)_(...)", "\1:\2")
// sub($a, "(..)_(...)", "\1:\2")
//
// the replacement string is an argument to sub and therefore the captures are
// confined to the implementation of the sub function. Similarly for gsub. But
// for the match operator, people can do
//
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1"
// }
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1"
// }
//
// and the =~ callsite doesn't know if captures will be used or not. So,
// RegexMatches always returns the captures array. It is stored within the CST
@ -229,18 +229,18 @@ func RegexMatchesCompiled(
}
// InterpolateCaptures example:
// * Input $x is "ab_cde"
// * DSL expression
// - Input $x is "ab_cde"
// - DSL expression
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1";
// ... other lines of code ...
// $y = "\2:\1";
// }
// * InterpolateCaptures is used on the evaluation of "\2:\1"
// * replacementString is "\2:\1"
// * replacementMatrix contains precomputed/cached offsets for the "\2" and
// "\1" substrings within "\2:\1"
// * captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
// - InterpolateCaptures is used on the evaluation of "\2:\1"
// - replacementString is "\2:\1"
// - replacementMatrix contains precomputed/cached offsets for the "\2" and
// "\1" substrings within "\2:\1"
// - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
func InterpolateCaptures(
replacementString string,
replacementMatrix [][]int,

View file

@ -208,12 +208,12 @@ func (mlrmap *Mlrmap) findEntry(key string) *MlrmapEntry {
// findEntryByPositionalIndex is for '$[1]' etc. in the DSL.
//
// Notes:
// * This is a linear search.
// * Indices are 1-up not 0-up
// * Indices -n..-1 are aliases for 1..n. In particular, it will be faster to
// get the -1st field than the nth.
// * Returns 0 on invalid index: 0, or < -n, or > n where n is the number of
// fields.
// - This is a linear search.
// - Indices are 1-up not 0-up
// - Indices -n..-1 are aliases for 1..n. In particular, it will be faster to
// get the -1st field than the nth.
// - Returns 0 on invalid index: 0, or < -n, or > n where n is the number of
// fields.
func (mlrmap *Mlrmap) findEntryByPositionalIndex(position int64) *MlrmapEntry {
if position > mlrmap.FieldCount || position < -mlrmap.FieldCount || position == 0 {
return nil

View file

@ -112,14 +112,14 @@ func (mlrmap *Mlrmap) isFlattenable() bool {
//
// Examples:
//
// * The three fields x.a = 7, x.b = 8, x.c = 9 become
// the single field x = {"a": 7, "b": 8, "c": 9}.
// - The three fields x.a = 7, x.b = 8, x.c = 9 become
// the single field x = {"a": 7, "b": 8, "c": 9}.
//
// * The three fields x.1 = 7, x.2 = 8, x.3 = 9 become
// the single field x = [7,8,9].
// - The three fields x.1 = 7, x.2 = 8, x.3 = 9 become
// the single field x = [7,8,9].
//
// * The two fields x.1 = 7, x.3 = 9 become
// the single field x = {"1": 7, "3": 9}
// - The two fields x.1 = 7, x.3 = 9 become
// the single field x = {"1": 7, "3": 9}
func (mlrmap *Mlrmap) Unflatten(
separator string,
) {

View file

@ -640,12 +640,12 @@ func BsearchMlrvalArrayForAscendingInsert(
// NewMlrvalForAutoDeepen is for auto-deepen of nested maps in things like
//
// $foo[1]["a"][2]["b"] = 3
// $foo[1]["a"][2]["b"] = 3
//
// Autocreated levels are maps. Array levels can be explicitly created e.g.
//
// $foo[1]["a"] ??= []
// $foo[1]["a"][2]["b"] = 3
// $foo[1]["a"] ??= []
// $foo[1]["a"][2]["b"] = 3
func NewMlrvalForAutoDeepen(mvtype MVType) (*Mlrval, error) {
if mvtype == MT_STRING || mvtype == MT_INT {
empty := FromEmptyMap()

File diff suppressed because it is too large Load diff