diff --git a/go/src/miller/dsl/ast.go b/go/src/miller/dsl/ast.go index 74ea6c284..04e1a6131 100644 --- a/go/src/miller/dsl/ast.go +++ b/go/src/miller/dsl/ast.go @@ -113,6 +113,21 @@ func NewASTNodeStripDollarPlease(itok interface{}, nodeType TNodeType) (*ASTNode return NewASTNodeNestable(newToken, nodeType), nil } +// Likewise for the leading/trailing double quotes on string literals. +func NewASTNodeStripDoubleQuotePairPlease( + itok interface{}, + nodeType TNodeType, +) (*ASTNode, error) { + oldToken := itok.(*token.Token) + n := len(oldToken.Lit) + newToken := &token.Token{ + Type: oldToken.Type, + Lit: oldToken.Lit[1:n-1], + Pos: oldToken.Pos, + } + return NewASTNodeNestable(newToken, nodeType), nil +} + // xxx comment why grammar use func NewASTNodeNestable(itok interface{}, nodeType TNodeType) *ASTNode { var tok *token.Token = nil diff --git a/go/src/miller/parsing/mlr.bnf b/go/src/miller/parsing/mlr.bnf index ca1cd5319..2a5a63bbd 100644 --- a/go/src/miller/parsing/mlr.bnf +++ b/go/src/miller/parsing/mlr.bnf @@ -258,6 +258,10 @@ md_token_FILENUM : 'F' 'I' 'L' 'E' 'N' 'U' 'M' ; // Note: the parser depends on the dollar sign being here. If this is changed, // that needs to be changed as well. +// +// Also note: if we omit the '$' here and include it in the parser section +// below as "$", then we get an LR-1 conflict. So this must be dealt with at +// the AST level. md_token_field_name : '$' _idchar { _idchar } ; //\$\* { @@ -2543,6 +2547,11 @@ FieldName // ... ; +// Note: the field name is "$field" not "field" since md_token_field_name +// includes the '$'. If we omit the '$' there and include it in the parser +// section here as "$", then we get an LR-1 conflict. So this must be dealt +// with at the AST level. Hence the NewASTNodeStripDollarPlease. + DirectFieldName : md_token_field_name << dsl.NewASTNodeStripDollarPlease($0, dsl.NodeTypeDirectFieldName) >> @@ -2641,8 +2650,14 @@ IndirectFieldName // xxx split out node-type string vs number so we can track parsed-type through // to the lrecs, with mlrvals there! :) +// As with '$' on md_token_field_name, so too for md_token_string_literal +// we get LR-1 conflicts if we attempt to put the double quotes here. +// Hence the quote-stripper AST method. AtomOrFunction - : md_token_string_literal << dsl.NewASTNode($0, dsl.NodeTypeStringLiteral) >> + : md_token_string_literal << dsl.NewASTNodeStripDoubleQuotePairPlease( + $0, + dsl.NodeTypeStringLiteral, + ) >> | md_token_int_literal << dsl.NewASTNode($0, dsl.NodeTypeIntLiteral) >> | md_token_float_literal << dsl.NewASTNode($0, dsl.NodeTypeFloatLiteral) >> | md_token_boolean_literal << dsl.NewASTNode($0, dsl.NodeTypeBoolLiteral) >> diff --git a/go/src/miller/parsing/parser/productionstable.go b/go/src/miller/parsing/parser/productionstable.go index 57de34e64..d3d64738d 100644 --- a/go/src/miller/parsing/parser/productionstable.go +++ b/go/src/miller/parsing/parser/productionstable.go @@ -841,13 +841,19 @@ var productionsTable = ProdTab{ }, }, ProdTabEntry{ - String: `AtomOrFunction : md_token_string_literal << dsl.NewASTNode(X[0], dsl.NodeTypeStringLiteral) >>`, + String: `AtomOrFunction : md_token_string_literal << dsl.NewASTNodeStripDoubleQuotePairPlease( + X[0], + dsl.NodeTypeStringLiteral, + ) >>`, Id: "AtomOrFunction", NTType: 22, Index: 82, NumSymbols: 1, ReduceFunc: func(X []Attrib) (Attrib, error) { - return dsl.NewASTNode(X[0], dsl.NodeTypeStringLiteral) + return dsl.NewASTNodeStripDoubleQuotePairPlease( + X[0], + dsl.NodeTypeStringLiteral, + ) }, }, ProdTabEntry{