mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Support more Go regex patterns, like "\d" (#974)
* Support more Go regex patterns * Simplify doc example * todo
This commit is contained in:
parent
1eae19421b
commit
7b89bab8dd
6 changed files with 221 additions and 104 deletions
|
|
@ -301,7 +301,7 @@ If you prefer [regular expressions](reference-main-regular-expressions.md), thos
|
|||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr --icsv --opprint --from data/split2.csv put '</b>
|
||||
<b> if ($stamp =~ "([0-9]+)-([0-9]+):([0-9]+):([0-9]+)") {</b>
|
||||
<b> if ($stamp =~ "(\d+)-(\d+):(\d+):(\d+)") {</b>
|
||||
<b> $description = "\1 day(s) \2 hour(s) \3 minute(s) \4 seconds(s)";</b>
|
||||
<b> }</b>
|
||||
<b>'</b>
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ If you prefer [regular expressions](reference-main-regular-expressions.md), thos
|
|||
|
||||
GENMD-RUN-COMMAND
|
||||
mlr --icsv --opprint --from data/split2.csv put '
|
||||
if ($stamp =~ "([0-9]+)-([0-9]+):([0-9]+):([0-9]+)") {
|
||||
if ($stamp =~ "(\d+)-(\d+):(\d+):(\d+)") {
|
||||
$description = "\1 day(s) \2 hour(s) \3 minute(s) \4 seconds(s)";
|
||||
}
|
||||
'
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
const (
|
||||
NoState = -1
|
||||
NumStates = 328
|
||||
NumSymbols = 580
|
||||
NumSymbols = 634
|
||||
)
|
||||
|
||||
type Lexer struct {
|
||||
|
|
@ -595,118 +595,172 @@ Lexer symbols:
|
|||
463: '\'
|
||||
464: ']'
|
||||
465: '\'
|
||||
466: 'b'
|
||||
466: '.'
|
||||
467: '\'
|
||||
468: 'f'
|
||||
468: '*'
|
||||
469: '\'
|
||||
470: 'n'
|
||||
470: '%'
|
||||
471: '\'
|
||||
472: 'r'
|
||||
472: 'A'
|
||||
473: '\'
|
||||
474: 't'
|
||||
474: 'B'
|
||||
475: '\'
|
||||
476: 'x'
|
||||
476: 'C'
|
||||
477: '\'
|
||||
478: 'a'
|
||||
478: 'D'
|
||||
479: '\'
|
||||
480: 'v'
|
||||
480: 'G'
|
||||
481: '\'
|
||||
482: 'u'
|
||||
482: 'H'
|
||||
483: '\'
|
||||
484: 'U'
|
||||
484: 'K'
|
||||
485: '\'
|
||||
486: '.'
|
||||
486: 'L'
|
||||
487: '\'
|
||||
488: '*'
|
||||
488: 'N'
|
||||
489: '\'
|
||||
490: '0'
|
||||
490: 'P'
|
||||
491: '\'
|
||||
492: '1'
|
||||
492: 'R'
|
||||
493: '\'
|
||||
494: '2'
|
||||
494: 'S'
|
||||
495: '\'
|
||||
496: '3'
|
||||
496: 'U'
|
||||
497: '\'
|
||||
498: '4'
|
||||
498: 'V'
|
||||
499: '\'
|
||||
500: '5'
|
||||
500: 'W'
|
||||
501: '\'
|
||||
502: '6'
|
||||
502: 'X'
|
||||
503: '\'
|
||||
504: '7'
|
||||
504: 'Z'
|
||||
505: '\'
|
||||
506: '8'
|
||||
506: 'a'
|
||||
507: '\'
|
||||
508: '9'
|
||||
509: 'e'
|
||||
510: 'E'
|
||||
511: 't'
|
||||
512: 'r'
|
||||
513: 'u'
|
||||
514: 'e'
|
||||
515: 'f'
|
||||
516: 'a'
|
||||
517: 'l'
|
||||
518: 's'
|
||||
519: 'e'
|
||||
520: ' '
|
||||
521: '!'
|
||||
522: '#'
|
||||
523: '$'
|
||||
524: '%'
|
||||
525: '&'
|
||||
526: '''
|
||||
508: 'b'
|
||||
509: '\'
|
||||
510: 'c'
|
||||
511: '\'
|
||||
512: 'd'
|
||||
513: '\'
|
||||
514: 'f'
|
||||
515: '\'
|
||||
516: 'g'
|
||||
517: '\'
|
||||
518: 'h'
|
||||
519: '\'
|
||||
520: 'k'
|
||||
521: '\'
|
||||
522: 'l'
|
||||
523: '\'
|
||||
524: 'n'
|
||||
525: '\'
|
||||
526: 'p'
|
||||
527: '\'
|
||||
528: '('
|
||||
529: ')'
|
||||
530: '*'
|
||||
531: '+'
|
||||
532: ','
|
||||
533: '-'
|
||||
534: '.'
|
||||
535: '/'
|
||||
536: ':'
|
||||
537: ';'
|
||||
538: '<'
|
||||
539: '='
|
||||
540: '>'
|
||||
541: '?'
|
||||
542: '@'
|
||||
543: '['
|
||||
544: ']'
|
||||
545: '^'
|
||||
546: '_'
|
||||
547: '`'
|
||||
548: '|'
|
||||
549: '~'
|
||||
550: '\'
|
||||
551: '{'
|
||||
552: '\'
|
||||
553: '}'
|
||||
554: ' '
|
||||
555: '\t'
|
||||
556: '\n'
|
||||
557: '\r'
|
||||
558: '#'
|
||||
559: '\n'
|
||||
560: 'a'-'z'
|
||||
561: 'A'-'Z'
|
||||
562: \u00a0-\u00ff
|
||||
563: \u0100-\U0010ffff
|
||||
564: '0'-'9'
|
||||
565: '0'-'9'
|
||||
566: 'a'-'f'
|
||||
567: 'A'-'F'
|
||||
568: '0'-'7'
|
||||
569: '0'-'1'
|
||||
570: 'A'-'Z'
|
||||
571: 'a'-'z'
|
||||
572: '0'-'9'
|
||||
573: \u00a0-\u00ff
|
||||
574: \u0100-\U0010ffff
|
||||
575: 'A'-'Z'
|
||||
576: 'a'-'z'
|
||||
577: '0'-'9'
|
||||
578: \u0100-\U0010ffff
|
||||
579: .
|
||||
528: 'r'
|
||||
529: '\'
|
||||
530: 's'
|
||||
531: '\'
|
||||
532: 't'
|
||||
533: '\'
|
||||
534: 'u'
|
||||
535: '\'
|
||||
536: 'v'
|
||||
537: '\'
|
||||
538: 'w'
|
||||
539: '\'
|
||||
540: 'x'
|
||||
541: '\'
|
||||
542: 'z'
|
||||
543: '\'
|
||||
544: '0'
|
||||
545: '\'
|
||||
546: '1'
|
||||
547: '\'
|
||||
548: '2'
|
||||
549: '\'
|
||||
550: '3'
|
||||
551: '\'
|
||||
552: '4'
|
||||
553: '\'
|
||||
554: '5'
|
||||
555: '\'
|
||||
556: '6'
|
||||
557: '\'
|
||||
558: '7'
|
||||
559: '\'
|
||||
560: '8'
|
||||
561: '\'
|
||||
562: '9'
|
||||
563: 'e'
|
||||
564: 'E'
|
||||
565: 't'
|
||||
566: 'r'
|
||||
567: 'u'
|
||||
568: 'e'
|
||||
569: 'f'
|
||||
570: 'a'
|
||||
571: 'l'
|
||||
572: 's'
|
||||
573: 'e'
|
||||
574: ' '
|
||||
575: '!'
|
||||
576: '#'
|
||||
577: '$'
|
||||
578: '%'
|
||||
579: '&'
|
||||
580: '''
|
||||
581: '\'
|
||||
582: '('
|
||||
583: ')'
|
||||
584: '*'
|
||||
585: '+'
|
||||
586: ','
|
||||
587: '-'
|
||||
588: '.'
|
||||
589: '/'
|
||||
590: ':'
|
||||
591: ';'
|
||||
592: '<'
|
||||
593: '='
|
||||
594: '>'
|
||||
595: '?'
|
||||
596: '@'
|
||||
597: '['
|
||||
598: ']'
|
||||
599: '^'
|
||||
600: '_'
|
||||
601: '`'
|
||||
602: '|'
|
||||
603: '~'
|
||||
604: '\'
|
||||
605: '{'
|
||||
606: '\'
|
||||
607: '}'
|
||||
608: ' '
|
||||
609: '\t'
|
||||
610: '\n'
|
||||
611: '\r'
|
||||
612: '#'
|
||||
613: '\n'
|
||||
614: 'a'-'z'
|
||||
615: 'A'-'Z'
|
||||
616: \u00a0-\u00ff
|
||||
617: \u0100-\U0010ffff
|
||||
618: '0'-'9'
|
||||
619: '0'-'9'
|
||||
620: 'a'-'f'
|
||||
621: 'A'-'F'
|
||||
622: '0'-'7'
|
||||
623: '0'-'1'
|
||||
624: 'A'-'Z'
|
||||
625: 'a'-'z'
|
||||
626: '0'-'9'
|
||||
627: \u00a0-\u00ff
|
||||
628: \u0100-\U0010ffff
|
||||
629: 'A'-'Z'
|
||||
630: 'a'-'z'
|
||||
631: '0'-'9'
|
||||
632: \u0100-\U0010ffff
|
||||
633: .
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1234,6 +1234,8 @@ var TransTab = TransitionTable{
|
|||
switch {
|
||||
case r == 34: // ['"','"']
|
||||
return 157
|
||||
case r == 37: // ['%','%']
|
||||
return 57
|
||||
case r == 42: // ['*','*']
|
||||
return 57
|
||||
case r == 46: // ['.','.']
|
||||
|
|
@ -1258,8 +1260,40 @@ var TransTab = TransitionTable{
|
|||
return 57
|
||||
case r == 57: // ['9','9']
|
||||
return 57
|
||||
case r == 65: // ['A','A']
|
||||
return 57
|
||||
case r == 66: // ['B','B']
|
||||
return 57
|
||||
case r == 67: // ['C','C']
|
||||
return 57
|
||||
case r == 68: // ['D','D']
|
||||
return 57
|
||||
case r == 71: // ['G','G']
|
||||
return 57
|
||||
case r == 72: // ['H','H']
|
||||
return 57
|
||||
case r == 75: // ['K','K']
|
||||
return 57
|
||||
case r == 76: // ['L','L']
|
||||
return 57
|
||||
case r == 78: // ['N','N']
|
||||
return 57
|
||||
case r == 80: // ['P','P']
|
||||
return 57
|
||||
case r == 82: // ['R','R']
|
||||
return 57
|
||||
case r == 83: // ['S','S']
|
||||
return 57
|
||||
case r == 85: // ['U','U']
|
||||
return 57
|
||||
case r == 86: // ['V','V']
|
||||
return 57
|
||||
case r == 87: // ['W','W']
|
||||
return 57
|
||||
case r == 88: // ['X','X']
|
||||
return 57
|
||||
case r == 90: // ['Z','Z']
|
||||
return 57
|
||||
case r == 91: // ['[','[']
|
||||
return 57
|
||||
case r == 92: // ['\','\']
|
||||
|
|
@ -1270,20 +1304,40 @@ var TransTab = TransitionTable{
|
|||
return 57
|
||||
case r == 98: // ['b','b']
|
||||
return 57
|
||||
case r == 99: // ['c','c']
|
||||
return 57
|
||||
case r == 100: // ['d','d']
|
||||
return 57
|
||||
case r == 102: // ['f','f']
|
||||
return 57
|
||||
case r == 103: // ['g','g']
|
||||
return 57
|
||||
case r == 104: // ['h','h']
|
||||
return 57
|
||||
case r == 107: // ['k','k']
|
||||
return 57
|
||||
case r == 108: // ['l','l']
|
||||
return 57
|
||||
case r == 110: // ['n','n']
|
||||
return 57
|
||||
case r == 112: // ['p','p']
|
||||
return 57
|
||||
case r == 114: // ['r','r']
|
||||
return 57
|
||||
case r == 115: // ['s','s']
|
||||
return 57
|
||||
case r == 116: // ['t','t']
|
||||
return 57
|
||||
case r == 117: // ['u','u']
|
||||
return 57
|
||||
case r == 118: // ['v','v']
|
||||
return 57
|
||||
case r == 119: // ['w','w']
|
||||
return 57
|
||||
case r == 120: // ['x','x']
|
||||
return 57
|
||||
case r == 122: // ['z','z']
|
||||
return 57
|
||||
}
|
||||
return NoState
|
||||
},
|
||||
|
|
|
|||
|
|
@ -79,22 +79,29 @@ _idchar : _letter | _decdig | '_' ;
|
|||
//
|
||||
// the shell command is the 'tr ...' string and we need to spell out the
|
||||
// escape sequence used by tr.
|
||||
// * See https://github.com/google/re2/wiki/Syntax
|
||||
|
||||
_string_literal_element
|
||||
: 'A'-'Z' | 'a'-'z' | '0'-'9'
|
||||
|
||||
| ' ' | '!' | '#' | '$' | '%' | '&' | '\'' | '\\'
|
||||
| '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/'
|
||||
| ':' | ';' | '<' | '=' | '>' | '?' | '@' | '['
|
||||
| ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~'
|
||||
| ( '\\' '\\' ) | ( '\\' '"' )
|
||||
| ( '\\' '[' ) | ( '\\' ']' )
|
||||
| ( '\\' 'b' ) | ( '\\' 'f' )
|
||||
| ( '\\' 'n' ) | ( '\\' 'r' )
|
||||
| ( '\\' 't' ) | ( '\\' 'x' ) | ( '\\' 'a' ) | ( '\\' 'v' )
|
||||
| ( '\\' 'u' ) | ( '\\' 'U' )
|
||||
| ( '\\' '.' )
|
||||
| ( '\\' '*' )
|
||||
| ( '\\' '\\' ) | ( '\\' '"' ) | ( '\\' '[' ) | ( '\\' ']' )
|
||||
| ( '\\' '.' ) | ( '\\' '*' ) | ( '\\' '%' )
|
||||
|
||||
| ( '\\' 'A') | ( '\\' 'B') | ( '\\' 'C') | ( '\\' 'D') | ( '\\' 'G') | ( '\\' 'H')
|
||||
| ( '\\' 'K') | ( '\\' 'L') | ( '\\' 'N') | ( '\\' 'P') | ( '\\' 'R') | ( '\\' 'S')
|
||||
| ( '\\' 'U') | ( '\\' 'V') | ( '\\' 'W') | ( '\\' 'X') | ( '\\' 'Z')
|
||||
| ( '\\' 'a') | ( '\\' 'b') | ( '\\' 'c') | ( '\\' 'd') | ( '\\' 'f') | ( '\\' 'g')
|
||||
| ( '\\' 'h') | ( '\\' 'k') | ( '\\' 'l') | ( '\\' 'n') | ( '\\' 'p') | ( '\\' 'r')
|
||||
| ( '\\' 's') | ( '\\' 't') | ( '\\' 'u') | ( '\\' 'v') | ( '\\' 'w') | ( '\\' 'x')
|
||||
| ( '\\' 'z')
|
||||
|
||||
| ( '\\' '0' ) | ( '\\' '1' ) | ( '\\' '2' ) | ( '\\' '3' ) | ( '\\' '4' )
|
||||
| ( '\\' '5' ) | ( '\\' '6' ) | ( '\\' '7' ) | ( '\\' '8' ) | ( '\\' '9' )
|
||||
|
||||
| '\u00a0'-'\u00ff'
|
||||
| '\u0100'-'\U0010ffff'
|
||||
;
|
||||
|
|
|
|||
4
todo.txt
4
todo.txt
|
|
@ -2,11 +2,13 @@
|
|||
RELEASES
|
||||
* plan 6.1.0
|
||||
o unsparsify -f CSV by default -- ? into CSV record-writer -- ? caveat that record 1 controls all ...
|
||||
- \d etc to DSL :( -- & parsing-and-formatting-fields.md.in
|
||||
o mlr split -- needs an example page along with the tee DSL function
|
||||
o 404 -- what broke?
|
||||
https://github.com/johnkerl/miller/pull/757/files
|
||||
|
||||
o https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation
|
||||
|
||||
k more regex patterns like \d
|
||||
k mlr join --left-fields a,b,c
|
||||
k strptime/882
|
||||
k fmtifnum, & recursive fmtnum/fmtifnum
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue