mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Webdoc information on Unicode string literals (#935)
This commit is contained in:
parent
de9e17f73b
commit
8eeb82809e
7 changed files with 133 additions and 86 deletions
|
|
@ -173,6 +173,25 @@ You can use the following backslash escapes for strings such as between the doub
|
|||
* `\"`: double quote
|
||||
* `\123`: Octal 123, etc. for `\000` up to `\377`
|
||||
* `\x7f`: Hexadecimal 7f, etc. for `\x00` up to `\xff`
|
||||
* `\u2766`, `\U00010877:`: Unicode literals. For technical reasons, you must supply four hex digits after `\u` and eight hex digits after `\U`.
|
||||
|
||||
<pre class="pre-highlight-in-pair">
|
||||
<b>mlr repl</b>
|
||||
</pre>
|
||||
<pre class="pre-non-highlight-in-pair">
|
||||
[mlr] "a\nb"
|
||||
"a
|
||||
b"
|
||||
|
||||
[mlr] "a\tb"
|
||||
"a b"
|
||||
|
||||
[mlr] "a\x62c"
|
||||
"abc"
|
||||
|
||||
[mlr] "\u2766\U00010877"
|
||||
"❦𐡷"
|
||||
</pre>
|
||||
|
||||
See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipedia.org/wiki/Escape_sequences_in_C).
|
||||
|
||||
|
|
|
|||
|
|
@ -121,6 +121,23 @@ You can use the following backslash escapes for strings such as between the doub
|
|||
* `\"`: double quote
|
||||
* `\123`: Octal 123, etc. for `\000` up to `\377`
|
||||
* `\x7f`: Hexadecimal 7f, etc. for `\x00` up to `\xff`
|
||||
* `\u2766`, `\U00010877:`: Unicode literals. For technical reasons, you must supply four hex digits after `\u` and eight hex digits after `\U`.
|
||||
|
||||
GENMD-CARDIFY-HIGHLIGHT-ONE
|
||||
mlr repl
|
||||
[mlr] "a\nb"
|
||||
"a
|
||||
b"
|
||||
|
||||
[mlr] "a\tb"
|
||||
"a b"
|
||||
|
||||
[mlr] "a\x62c"
|
||||
"abc"
|
||||
|
||||
[mlr] "\u2766\U00010877"
|
||||
"❦𐡷"
|
||||
GENMD-EOF
|
||||
|
||||
See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipedia.org/wiki/Escape_sequences_in_C).
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,10 @@ func UnbackslashStringLiteral(input string) string {
|
|||
// } else {
|
||||
// return input
|
||||
// }
|
||||
//
|
||||
// ... and, given that desire, we don't a priori know how many digits in Unicode
|
||||
// escape sequences -- so we *require* that people use four hex digits after \u
|
||||
// and eight hex digits after \U.
|
||||
|
||||
var buffer bytes.Buffer
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import (
|
|||
const (
|
||||
NoState = -1
|
||||
NumStates = 328
|
||||
NumSymbols = 574
|
||||
NumSymbols = 578
|
||||
)
|
||||
|
||||
type Lexer struct {
|
||||
|
|
@ -607,100 +607,104 @@ Lexer symbols:
|
|||
475: '\'
|
||||
476: 'x'
|
||||
477: '\'
|
||||
478: 'u'
|
||||
478: 'a'
|
||||
479: '\'
|
||||
480: 'U'
|
||||
480: 'v'
|
||||
481: '\'
|
||||
482: '.'
|
||||
482: 'u'
|
||||
483: '\'
|
||||
484: '*'
|
||||
484: 'U'
|
||||
485: '\'
|
||||
486: '0'
|
||||
486: '.'
|
||||
487: '\'
|
||||
488: '1'
|
||||
488: '*'
|
||||
489: '\'
|
||||
490: '2'
|
||||
490: '0'
|
||||
491: '\'
|
||||
492: '3'
|
||||
492: '1'
|
||||
493: '\'
|
||||
494: '4'
|
||||
494: '2'
|
||||
495: '\'
|
||||
496: '5'
|
||||
496: '3'
|
||||
497: '\'
|
||||
498: '6'
|
||||
498: '4'
|
||||
499: '\'
|
||||
500: '7'
|
||||
500: '5'
|
||||
501: '\'
|
||||
502: '8'
|
||||
502: '6'
|
||||
503: '\'
|
||||
504: '9'
|
||||
505: 'e'
|
||||
506: 'E'
|
||||
507: 't'
|
||||
508: 'r'
|
||||
509: 'u'
|
||||
510: 'e'
|
||||
511: 'f'
|
||||
512: 'a'
|
||||
513: 'l'
|
||||
514: 's'
|
||||
515: 'e'
|
||||
516: ' '
|
||||
517: '!'
|
||||
518: '#'
|
||||
519: '$'
|
||||
520: '%'
|
||||
521: '&'
|
||||
522: '''
|
||||
523: '\'
|
||||
524: '('
|
||||
525: ')'
|
||||
526: '*'
|
||||
527: '+'
|
||||
528: ','
|
||||
529: '-'
|
||||
530: '.'
|
||||
531: '/'
|
||||
532: ':'
|
||||
533: ';'
|
||||
534: '<'
|
||||
535: '='
|
||||
536: '>'
|
||||
537: '?'
|
||||
538: '@'
|
||||
539: '['
|
||||
540: ']'
|
||||
541: '^'
|
||||
542: '_'
|
||||
543: '`'
|
||||
544: '|'
|
||||
545: '~'
|
||||
546: '\'
|
||||
547: '{'
|
||||
548: '\'
|
||||
549: '}'
|
||||
550: ' '
|
||||
551: '\t'
|
||||
552: '\n'
|
||||
553: '\r'
|
||||
554: '#'
|
||||
555: '\n'
|
||||
556: 'a'-'z'
|
||||
557: 'A'-'Z'
|
||||
558: \u0100-\U0010ffff
|
||||
559: '0'-'9'
|
||||
560: '0'-'9'
|
||||
561: 'a'-'f'
|
||||
562: 'A'-'F'
|
||||
563: '0'-'7'
|
||||
564: '0'-'1'
|
||||
565: 'A'-'Z'
|
||||
566: 'a'-'z'
|
||||
567: '0'-'9'
|
||||
568: \u0100-\U0010ffff
|
||||
504: '7'
|
||||
505: '\'
|
||||
506: '8'
|
||||
507: '\'
|
||||
508: '9'
|
||||
509: 'e'
|
||||
510: 'E'
|
||||
511: 't'
|
||||
512: 'r'
|
||||
513: 'u'
|
||||
514: 'e'
|
||||
515: 'f'
|
||||
516: 'a'
|
||||
517: 'l'
|
||||
518: 's'
|
||||
519: 'e'
|
||||
520: ' '
|
||||
521: '!'
|
||||
522: '#'
|
||||
523: '$'
|
||||
524: '%'
|
||||
525: '&'
|
||||
526: '''
|
||||
527: '\'
|
||||
528: '('
|
||||
529: ')'
|
||||
530: '*'
|
||||
531: '+'
|
||||
532: ','
|
||||
533: '-'
|
||||
534: '.'
|
||||
535: '/'
|
||||
536: ':'
|
||||
537: ';'
|
||||
538: '<'
|
||||
539: '='
|
||||
540: '>'
|
||||
541: '?'
|
||||
542: '@'
|
||||
543: '['
|
||||
544: ']'
|
||||
545: '^'
|
||||
546: '_'
|
||||
547: '`'
|
||||
548: '|'
|
||||
549: '~'
|
||||
550: '\'
|
||||
551: '{'
|
||||
552: '\'
|
||||
553: '}'
|
||||
554: ' '
|
||||
555: '\t'
|
||||
556: '\n'
|
||||
557: '\r'
|
||||
558: '#'
|
||||
559: '\n'
|
||||
560: 'a'-'z'
|
||||
561: 'A'-'Z'
|
||||
562: \u0100-\U0010ffff
|
||||
563: '0'-'9'
|
||||
564: '0'-'9'
|
||||
565: 'a'-'f'
|
||||
566: 'A'-'F'
|
||||
567: '0'-'7'
|
||||
568: '0'-'1'
|
||||
569: 'A'-'Z'
|
||||
570: 'a'-'z'
|
||||
571: '0'-'9'
|
||||
572: \u0100-\U0010ffff
|
||||
573: .
|
||||
573: 'A'-'Z'
|
||||
574: 'a'-'z'
|
||||
575: '0'-'9'
|
||||
576: \u0100-\U0010ffff
|
||||
577: .
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1208,6 +1208,8 @@ var TransTab = TransitionTable{
|
|||
return 57
|
||||
case r == 93: // [']',']']
|
||||
return 57
|
||||
case r == 97: // ['a','a']
|
||||
return 57
|
||||
case r == 98: // ['b','b']
|
||||
return 57
|
||||
case r == 102: // ['f','f']
|
||||
|
|
@ -1220,6 +1222,8 @@ var TransTab = TransitionTable{
|
|||
return 57
|
||||
case r == 117: // ['u','u']
|
||||
return 57
|
||||
case r == 118: // ['v','v']
|
||||
return 57
|
||||
case r == 120: // ['x','x']
|
||||
return 57
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,8 +89,7 @@ _string_literal_element
|
|||
| ( '\\' '[' ) | ( '\\' ']' )
|
||||
| ( '\\' 'b' ) | ( '\\' 'f' )
|
||||
| ( '\\' 'n' ) | ( '\\' 'r' )
|
||||
| ( '\\' 't' )
|
||||
| ( '\\' 'x' )
|
||||
| ( '\\' 't' ) | ( '\\' 'x' ) | ( '\\' 'a' ) | ( '\\' 'v' )
|
||||
| ( '\\' 'u' ) | ( '\\' 'U' )
|
||||
| ( '\\' '.' )
|
||||
| ( '\\' '*' )
|
||||
|
|
|
|||
8
todo.txt
8
todo.txt
|
|
@ -1,14 +1,12 @@
|
|||
=============================================================== RELEASES
|
||||
|
||||
* plan 6.1.0
|
||||
? strptime
|
||||
? inference
|
||||
? datediff et al.
|
||||
? strptime/882
|
||||
? mlr join --left-fields a,b,c
|
||||
o fmt/unfmt/regex doc
|
||||
o FAQ/examples reorg
|
||||
m strptime/strftime tabulate options
|
||||
m unicode string literals
|
||||
k unicode string literals
|
||||
k natural sort order
|
||||
k IANA-TSV w/ \{X}
|
||||
k still need csv --lazy-quotes
|
||||
|
|
@ -23,8 +21,10 @@
|
|||
k ?foo and ??foo @ repl help
|
||||
k doc-improves
|
||||
* plan 6.2.0
|
||||
? datediff et al.
|
||||
? rank
|
||||
? YAML
|
||||
? #908 inferencing options
|
||||
|
||||
================================================================
|
||||
FEATURES
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue