Support more Go regex patterns, like "\d" (#974)

* Support more Go regex patterns

* Simplify doc example

* todo
This commit is contained in:
John Kerl 2022-03-07 00:06:33 -05:00 committed by GitHub
parent 1eae19421b
commit 7b89bab8dd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 221 additions and 104 deletions

View file

@ -301,7 +301,7 @@ If you prefer [regular expressions](reference-main-regular-expressions.md), thos
<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint --from data/split2.csv put '</b>
<b> if ($stamp =~ "([0-9]+)-([0-9]+):([0-9]+):([0-9]+)") {</b>
<b> if ($stamp =~ "(\d+)-(\d+):(\d+):(\d+)") {</b>
<b> $description = "\1 day(s) \2 hour(s) \3 minute(s) \4 seconds(s)";</b>
<b> }</b>
<b>'</b>

View file

@ -136,7 +136,7 @@ If you prefer [regular expressions](reference-main-regular-expressions.md), thos
GENMD-RUN-COMMAND
mlr --icsv --opprint --from data/split2.csv put '
if ($stamp =~ "([0-9]+)-([0-9]+):([0-9]+):([0-9]+)") {
if ($stamp =~ "(\d+)-(\d+):(\d+):(\d+)") {
$description = "\1 day(s) \2 hour(s) \3 minute(s) \4 seconds(s)";
}
'

View file

@ -12,7 +12,7 @@ import (
const (
NoState = -1
NumStates = 328
NumSymbols = 580
NumSymbols = 634
)
type Lexer struct {
@ -595,118 +595,172 @@ Lexer symbols:
463: '\'
464: ']'
465: '\'
466: 'b'
466: '.'
467: '\'
468: 'f'
468: '*'
469: '\'
470: 'n'
470: '%'
471: '\'
472: 'r'
472: 'A'
473: '\'
474: 't'
474: 'B'
475: '\'
476: 'x'
476: 'C'
477: '\'
478: 'a'
478: 'D'
479: '\'
480: 'v'
480: 'G'
481: '\'
482: 'u'
482: 'H'
483: '\'
484: 'U'
484: 'K'
485: '\'
486: '.'
486: 'L'
487: '\'
488: '*'
488: 'N'
489: '\'
490: '0'
490: 'P'
491: '\'
492: '1'
492: 'R'
493: '\'
494: '2'
494: 'S'
495: '\'
496: '3'
496: 'U'
497: '\'
498: '4'
498: 'V'
499: '\'
500: '5'
500: 'W'
501: '\'
502: '6'
502: 'X'
503: '\'
504: '7'
504: 'Z'
505: '\'
506: '8'
506: 'a'
507: '\'
508: '9'
509: 'e'
510: 'E'
511: 't'
512: 'r'
513: 'u'
514: 'e'
515: 'f'
516: 'a'
517: 'l'
518: 's'
519: 'e'
520: ' '
521: '!'
522: '#'
523: '$'
524: '%'
525: '&'
526: '''
508: 'b'
509: '\'
510: 'c'
511: '\'
512: 'd'
513: '\'
514: 'f'
515: '\'
516: 'g'
517: '\'
518: 'h'
519: '\'
520: 'k'
521: '\'
522: 'l'
523: '\'
524: 'n'
525: '\'
526: 'p'
527: '\'
528: '('
529: ')'
530: '*'
531: '+'
532: ','
533: '-'
534: '.'
535: '/'
536: ':'
537: ';'
538: '<'
539: '='
540: '>'
541: '?'
542: '@'
543: '['
544: ']'
545: '^'
546: '_'
547: '`'
548: '|'
549: '~'
550: '\'
551: '{'
552: '\'
553: '}'
554: ' '
555: '\t'
556: '\n'
557: '\r'
558: '#'
559: '\n'
560: 'a'-'z'
561: 'A'-'Z'
562: \u00a0-\u00ff
563: \u0100-\U0010ffff
564: '0'-'9'
565: '0'-'9'
566: 'a'-'f'
567: 'A'-'F'
568: '0'-'7'
569: '0'-'1'
570: 'A'-'Z'
571: 'a'-'z'
572: '0'-'9'
573: \u00a0-\u00ff
574: \u0100-\U0010ffff
575: 'A'-'Z'
576: 'a'-'z'
577: '0'-'9'
578: \u0100-\U0010ffff
579: .
528: 'r'
529: '\'
530: 's'
531: '\'
532: 't'
533: '\'
534: 'u'
535: '\'
536: 'v'
537: '\'
538: 'w'
539: '\'
540: 'x'
541: '\'
542: 'z'
543: '\'
544: '0'
545: '\'
546: '1'
547: '\'
548: '2'
549: '\'
550: '3'
551: '\'
552: '4'
553: '\'
554: '5'
555: '\'
556: '6'
557: '\'
558: '7'
559: '\'
560: '8'
561: '\'
562: '9'
563: 'e'
564: 'E'
565: 't'
566: 'r'
567: 'u'
568: 'e'
569: 'f'
570: 'a'
571: 'l'
572: 's'
573: 'e'
574: ' '
575: '!'
576: '#'
577: '$'
578: '%'
579: '&'
580: '''
581: '\'
582: '('
583: ')'
584: '*'
585: '+'
586: ','
587: '-'
588: '.'
589: '/'
590: ':'
591: ';'
592: '<'
593: '='
594: '>'
595: '?'
596: '@'
597: '['
598: ']'
599: '^'
600: '_'
601: '`'
602: '|'
603: '~'
604: '\'
605: '{'
606: '\'
607: '}'
608: ' '
609: '\t'
610: '\n'
611: '\r'
612: '#'
613: '\n'
614: 'a'-'z'
615: 'A'-'Z'
616: \u00a0-\u00ff
617: \u0100-\U0010ffff
618: '0'-'9'
619: '0'-'9'
620: 'a'-'f'
621: 'A'-'F'
622: '0'-'7'
623: '0'-'1'
624: 'A'-'Z'
625: 'a'-'z'
626: '0'-'9'
627: \u00a0-\u00ff
628: \u0100-\U0010ffff
629: 'A'-'Z'
630: 'a'-'z'
631: '0'-'9'
632: \u0100-\U0010ffff
633: .
*/

View file

@ -1234,6 +1234,8 @@ var TransTab = TransitionTable{
switch {
case r == 34: // ['"','"']
return 157
case r == 37: // ['%','%']
return 57
case r == 42: // ['*','*']
return 57
case r == 46: // ['.','.']
@ -1258,8 +1260,40 @@ var TransTab = TransitionTable{
return 57
case r == 57: // ['9','9']
return 57
case r == 65: // ['A','A']
return 57
case r == 66: // ['B','B']
return 57
case r == 67: // ['C','C']
return 57
case r == 68: // ['D','D']
return 57
case r == 71: // ['G','G']
return 57
case r == 72: // ['H','H']
return 57
case r == 75: // ['K','K']
return 57
case r == 76: // ['L','L']
return 57
case r == 78: // ['N','N']
return 57
case r == 80: // ['P','P']
return 57
case r == 82: // ['R','R']
return 57
case r == 83: // ['S','S']
return 57
case r == 85: // ['U','U']
return 57
case r == 86: // ['V','V']
return 57
case r == 87: // ['W','W']
return 57
case r == 88: // ['X','X']
return 57
case r == 90: // ['Z','Z']
return 57
case r == 91: // ['[','[']
return 57
case r == 92: // ['\','\']
@ -1270,20 +1304,40 @@ var TransTab = TransitionTable{
return 57
case r == 98: // ['b','b']
return 57
case r == 99: // ['c','c']
return 57
case r == 100: // ['d','d']
return 57
case r == 102: // ['f','f']
return 57
case r == 103: // ['g','g']
return 57
case r == 104: // ['h','h']
return 57
case r == 107: // ['k','k']
return 57
case r == 108: // ['l','l']
return 57
case r == 110: // ['n','n']
return 57
case r == 112: // ['p','p']
return 57
case r == 114: // ['r','r']
return 57
case r == 115: // ['s','s']
return 57
case r == 116: // ['t','t']
return 57
case r == 117: // ['u','u']
return 57
case r == 118: // ['v','v']
return 57
case r == 119: // ['w','w']
return 57
case r == 120: // ['x','x']
return 57
case r == 122: // ['z','z']
return 57
}
return NoState
},

View file

@ -79,22 +79,29 @@ _idchar : _letter | _decdig | '_' ;
//
// the shell command is the 'tr ...' string and we need to spell out the
// escape sequence used by tr.
// * See https://github.com/google/re2/wiki/Syntax
_string_literal_element
: 'A'-'Z' | 'a'-'z' | '0'-'9'
| ' ' | '!' | '#' | '$' | '%' | '&' | '\'' | '\\'
| '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/'
| ':' | ';' | '<' | '=' | '>' | '?' | '@' | '['
| ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~'
| ( '\\' '\\' ) | ( '\\' '"' )
| ( '\\' '[' ) | ( '\\' ']' )
| ( '\\' 'b' ) | ( '\\' 'f' )
| ( '\\' 'n' ) | ( '\\' 'r' )
| ( '\\' 't' ) | ( '\\' 'x' ) | ( '\\' 'a' ) | ( '\\' 'v' )
| ( '\\' 'u' ) | ( '\\' 'U' )
| ( '\\' '.' )
| ( '\\' '*' )
| ( '\\' '\\' ) | ( '\\' '"' ) | ( '\\' '[' ) | ( '\\' ']' )
| ( '\\' '.' ) | ( '\\' '*' ) | ( '\\' '%' )
| ( '\\' 'A') | ( '\\' 'B') | ( '\\' 'C') | ( '\\' 'D') | ( '\\' 'G') | ( '\\' 'H')
| ( '\\' 'K') | ( '\\' 'L') | ( '\\' 'N') | ( '\\' 'P') | ( '\\' 'R') | ( '\\' 'S')
| ( '\\' 'U') | ( '\\' 'V') | ( '\\' 'W') | ( '\\' 'X') | ( '\\' 'Z')
| ( '\\' 'a') | ( '\\' 'b') | ( '\\' 'c') | ( '\\' 'd') | ( '\\' 'f') | ( '\\' 'g')
| ( '\\' 'h') | ( '\\' 'k') | ( '\\' 'l') | ( '\\' 'n') | ( '\\' 'p') | ( '\\' 'r')
| ( '\\' 's') | ( '\\' 't') | ( '\\' 'u') | ( '\\' 'v') | ( '\\' 'w') | ( '\\' 'x')
| ( '\\' 'z')
| ( '\\' '0' ) | ( '\\' '1' ) | ( '\\' '2' ) | ( '\\' '3' ) | ( '\\' '4' )
| ( '\\' '5' ) | ( '\\' '6' ) | ( '\\' '7' ) | ( '\\' '8' ) | ( '\\' '9' )
| '\u00a0'-'\u00ff'
| '\u0100'-'\U0010ffff'
;

View file

@ -2,11 +2,13 @@
RELEASES
* plan 6.1.0
o unsparsify -f CSV by default -- ? into CSV record-writer -- ? caveat that record 1 controls all ...
- \d etc to DSL :( -- & parsing-and-formatting-fields.md.in
o mlr split -- needs an example page along with the tee DSL function
o 404 -- what broke?
https://github.com/johnkerl/miller/pull/757/files
o https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation
k more regex patterns like \d
k mlr join --left-fields a,b,c
k strptime/882
k fmtifnum, & recursive fmtnum/fmtifnum