Webdoc information on Unicode string literals (#935)

This commit is contained in:
John Kerl 2022-02-09 00:28:04 -05:00 committed by GitHub
parent de9e17f73b
commit 8eeb82809e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 133 additions and 86 deletions

View file

@ -173,6 +173,25 @@ You can use the following backslash escapes for strings such as between the doub
* `\"`: double quote
* `\123`: Octal 123, etc. for `\000` up to `\377`
* `\x7f`: Hexadecimal 7f, etc. for `\x00` up to `\xff`
* `\u2766`, `\U00010877:`: Unicode literals. For technical reasons, you must supply four hex digits after `\u` and eight hex digits after `\U`.
<pre class="pre-highlight-in-pair">
<b>mlr repl</b>
</pre>
<pre class="pre-non-highlight-in-pair">
[mlr] "a\nb"
"a
b"
[mlr] "a\tb"
"a b"
[mlr] "a\x62c"
"abc"
[mlr] "\u2766\U00010877"
"❦𐡷"
</pre>
See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipedia.org/wiki/Escape_sequences_in_C).

View file

@ -121,6 +121,23 @@ You can use the following backslash escapes for strings such as between the doub
* `\"`: double quote
* `\123`: Octal 123, etc. for `\000` up to `\377`
* `\x7f`: Hexadecimal 7f, etc. for `\x00` up to `\xff`
* `\u2766`, `\U00010877:`: Unicode literals. For technical reasons, you must supply four hex digits after `\u` and eight hex digits after `\U`.
GENMD-CARDIFY-HIGHLIGHT-ONE
mlr repl
[mlr] "a\nb"
"a
b"
[mlr] "a\tb"
"a b"
[mlr] "a\x62c"
"abc"
[mlr] "\u2766\U00010877"
"❦𐡷"
GENMD-EOF
See also [https://en.wikipedia.org/wiki/Escape_sequences_in_C](https://en.wikipedia.org/wiki/Escape_sequences_in_C).

View file

@ -49,6 +49,10 @@ func UnbackslashStringLiteral(input string) string {
// } else {
// return input
// }
//
// ... and, given that desire, we don't a priori know how many digits in Unicode
// escape sequences -- so we *require* that people use four hex digits after \u
// and eight hex digits after \U.
var buffer bytes.Buffer

View file

@ -12,7 +12,7 @@ import (
const (
NoState = -1
NumStates = 328
NumSymbols = 574
NumSymbols = 578
)
type Lexer struct {
@ -607,100 +607,104 @@ Lexer symbols:
475: '\'
476: 'x'
477: '\'
478: 'u'
478: 'a'
479: '\'
480: 'U'
480: 'v'
481: '\'
482: '.'
482: 'u'
483: '\'
484: '*'
484: 'U'
485: '\'
486: '0'
486: '.'
487: '\'
488: '1'
488: '*'
489: '\'
490: '2'
490: '0'
491: '\'
492: '3'
492: '1'
493: '\'
494: '4'
494: '2'
495: '\'
496: '5'
496: '3'
497: '\'
498: '6'
498: '4'
499: '\'
500: '7'
500: '5'
501: '\'
502: '8'
502: '6'
503: '\'
504: '9'
505: 'e'
506: 'E'
507: 't'
508: 'r'
509: 'u'
510: 'e'
511: 'f'
512: 'a'
513: 'l'
514: 's'
515: 'e'
516: ' '
517: '!'
518: '#'
519: '$'
520: '%'
521: '&'
522: '''
523: '\'
524: '('
525: ')'
526: '*'
527: '+'
528: ','
529: '-'
530: '.'
531: '/'
532: ':'
533: ';'
534: '<'
535: '='
536: '>'
537: '?'
538: '@'
539: '['
540: ']'
541: '^'
542: '_'
543: '`'
544: '|'
545: '~'
546: '\'
547: '{'
548: '\'
549: '}'
550: ' '
551: '\t'
552: '\n'
553: '\r'
554: '#'
555: '\n'
556: 'a'-'z'
557: 'A'-'Z'
558: \u0100-\U0010ffff
559: '0'-'9'
560: '0'-'9'
561: 'a'-'f'
562: 'A'-'F'
563: '0'-'7'
564: '0'-'1'
565: 'A'-'Z'
566: 'a'-'z'
567: '0'-'9'
568: \u0100-\U0010ffff
504: '7'
505: '\'
506: '8'
507: '\'
508: '9'
509: 'e'
510: 'E'
511: 't'
512: 'r'
513: 'u'
514: 'e'
515: 'f'
516: 'a'
517: 'l'
518: 's'
519: 'e'
520: ' '
521: '!'
522: '#'
523: '$'
524: '%'
525: '&'
526: '''
527: '\'
528: '('
529: ')'
530: '*'
531: '+'
532: ','
533: '-'
534: '.'
535: '/'
536: ':'
537: ';'
538: '<'
539: '='
540: '>'
541: '?'
542: '@'
543: '['
544: ']'
545: '^'
546: '_'
547: '`'
548: '|'
549: '~'
550: '\'
551: '{'
552: '\'
553: '}'
554: ' '
555: '\t'
556: '\n'
557: '\r'
558: '#'
559: '\n'
560: 'a'-'z'
561: 'A'-'Z'
562: \u0100-\U0010ffff
563: '0'-'9'
564: '0'-'9'
565: 'a'-'f'
566: 'A'-'F'
567: '0'-'7'
568: '0'-'1'
569: 'A'-'Z'
570: 'a'-'z'
571: '0'-'9'
572: \u0100-\U0010ffff
573: .
573: 'A'-'Z'
574: 'a'-'z'
575: '0'-'9'
576: \u0100-\U0010ffff
577: .
*/

View file

@ -1208,6 +1208,8 @@ var TransTab = TransitionTable{
return 57
case r == 93: // [']',']']
return 57
case r == 97: // ['a','a']
return 57
case r == 98: // ['b','b']
return 57
case r == 102: // ['f','f']
@ -1220,6 +1222,8 @@ var TransTab = TransitionTable{
return 57
case r == 117: // ['u','u']
return 57
case r == 118: // ['v','v']
return 57
case r == 120: // ['x','x']
return 57
}

View file

@ -89,8 +89,7 @@ _string_literal_element
| ( '\\' '[' ) | ( '\\' ']' )
| ( '\\' 'b' ) | ( '\\' 'f' )
| ( '\\' 'n' ) | ( '\\' 'r' )
| ( '\\' 't' )
| ( '\\' 'x' )
| ( '\\' 't' ) | ( '\\' 'x' ) | ( '\\' 'a' ) | ( '\\' 'v' )
| ( '\\' 'u' ) | ( '\\' 'U' )
| ( '\\' '.' )
| ( '\\' '*' )

View file

@ -1,14 +1,12 @@
=============================================================== RELEASES
* plan 6.1.0
? strptime
? inference
? datediff et al.
? strptime/882
? mlr join --left-fields a,b,c
o fmt/unfmt/regex doc
o FAQ/examples reorg
m strptime/strftime tabulate options
m unicode string literals
k unicode string literals
k natural sort order
k IANA-TSV w/ \{X}
k still need csv --lazy-quotes
@ -23,8 +21,10 @@
k ?foo and ??foo @ repl help
k doc-improves
* plan 6.2.0
? datediff et al.
? rank
? YAML
? #908 inferencing options
================================================================
FEATURES