miller/pkg/parsing/lexer/lexer.go
Adam Lesperance 085e831668
The package version must match the major tag version (#1654)
* Update package version

* Update makefile targets

* Update readme packages

* Remaining old packages via rg/sd
2024-09-20 12:10:11 -04:00

785 lines
8 KiB
Go

// Code generated by gocc; DO NOT EDIT.
package lexer
import (
"os"
"unicode/utf8"
"github.com/johnkerl/miller/v6/pkg/parsing/token"
)
const (
NoState = -1
NumStates = 336
NumSymbols = 653
)
type Lexer struct {
src []byte
pos int
line int
column int
Context token.Context
}
func NewLexer(src []byte) *Lexer {
lexer := &Lexer{
src: src,
pos: 0,
line: 1,
column: 1,
Context: nil,
}
return lexer
}
// SourceContext is a simple instance of a token.Context which
// contains the name of the source file.
type SourceContext struct {
Filepath string
}
func (s *SourceContext) Source() string {
return s.Filepath
}
func NewLexerFile(fpath string) (*Lexer, error) {
src, err := os.ReadFile(fpath)
if err != nil {
return nil, err
}
lexer := NewLexer(src)
lexer.Context = &SourceContext{Filepath: fpath}
return lexer, nil
}
func (l *Lexer) Scan() (tok *token.Token) {
tok = &token.Token{}
if l.pos >= len(l.src) {
tok.Type = token.EOF
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = l.pos, l.line, l.column
tok.Pos.Context = l.Context
return
}
start, startLine, startColumn, end := l.pos, l.line, l.column, 0
tok.Type = token.INVALID
state, rune1, size := 0, rune(-1), 0
for state != -1 {
if l.pos >= len(l.src) {
rune1 = -1
} else {
rune1, size = utf8.DecodeRune(l.src[l.pos:])
l.pos += size
}
nextState := -1
if rune1 != -1 {
nextState = TransTab[state](rune1)
}
state = nextState
if state != -1 {
switch rune1 {
case '\n':
l.line++
l.column = 1
case '\r':
l.column = 1
case '\t':
l.column += 4
default:
l.column++
}
switch {
case ActTab[state].Accept != -1:
tok.Type = ActTab[state].Accept
end = l.pos
case ActTab[state].Ignore != "":
start, startLine, startColumn = l.pos, l.line, l.column
state = 0
if start >= len(l.src) {
tok.Type = token.EOF
}
}
} else {
if tok.Type == token.INVALID {
end = l.pos
}
}
}
if end > start {
l.pos = end
tok.Lit = l.src[start:end]
} else {
tok.Lit = []byte{}
}
tok.Pos.Offset, tok.Pos.Line, tok.Pos.Column = start, startLine, startColumn
tok.Pos.Context = l.Context
return
}
func (l *Lexer) Reset() {
l.pos = 0
}
/*
Lexer symbols:
0: '"'
1: '"'
2: '"'
3: '"'
4: 'i'
5: '0'
6: 'x'
7: '0'
8: 'o'
9: '0'
10: 'b'
11: '.'
12: '.'
13: '-'
14: '+'
15: '.'
16: '.'
17: '-'
18: '.'
19: '+'
20: '.'
21: '.'
22: '-'
23: '.'
24: '+'
25: 'M'
26: '_'
27: 'P'
28: 'I'
29: 'M'
30: '_'
31: 'E'
32: 'n'
33: 'u'
34: 'l'
35: 'l'
36: 'I'
37: 'n'
38: 'f'
39: 'N'
40: 'a'
41: 'N'
42: 'I'
43: 'P'
44: 'S'
45: 'I'
46: 'F'
47: 'S'
48: 'I'
49: 'R'
50: 'S'
51: 'O'
52: 'P'
53: 'S'
54: 'O'
55: 'F'
56: 'S'
57: 'O'
58: 'R'
59: 'S'
60: 'F'
61: 'L'
62: 'A'
63: 'T'
64: 'S'
65: 'E'
66: 'P'
67: 'N'
68: 'F'
69: 'N'
70: 'R'
71: 'F'
72: 'N'
73: 'R'
74: 'F'
75: 'I'
76: 'L'
77: 'E'
78: 'N'
79: 'A'
80: 'M'
81: 'E'
82: 'F'
83: 'I'
84: 'L'
85: 'E'
86: 'N'
87: 'U'
88: 'M'
89: 'E'
90: 'N'
91: 'V'
92: 'b'
93: 'e'
94: 'g'
95: 'i'
96: 'n'
97: 'd'
98: 'o'
99: 'e'
100: 'l'
101: 'i'
102: 'f'
103: 'e'
104: 'l'
105: 's'
106: 'e'
107: 'e'
108: 'n'
109: 'd'
110: 'f'
111: 'i'
112: 'l'
113: 't'
114: 'e'
115: 'r'
116: 'f'
117: 'o'
118: 'r'
119: 'i'
120: 'f'
121: 'i'
122: 'n'
123: 'w'
124: 'h'
125: 'i'
126: 'l'
127: 'e'
128: 'b'
129: 'r'
130: 'e'
131: 'a'
132: 'k'
133: 'c'
134: 'o'
135: 'n'
136: 't'
137: 'i'
138: 'n'
139: 'u'
140: 'e'
141: 'r'
142: 'e'
143: 't'
144: 'u'
145: 'r'
146: 'n'
147: 'f'
148: 'u'
149: 'n'
150: 'c'
151: 's'
152: 'u'
153: 'b'
154: 'r'
155: 'c'
156: 'a'
157: 'l'
158: 'l'
159: 'a'
160: 'r'
161: 'r'
162: 'b'
163: 'o'
164: 'o'
165: 'l'
166: 'f'
167: 'l'
168: 'o'
169: 'a'
170: 't'
171: 'i'
172: 'n'
173: 't'
174: 'm'
175: 'a'
176: 'p'
177: 'n'
178: 'u'
179: 'm'
180: 's'
181: 't'
182: 'r'
183: 'v'
184: 'a'
185: 'r'
186: 'f'
187: 'u'
188: 'n'
189: 'c'
190: 't'
191: 'u'
192: 'n'
193: 's'
194: 'e'
195: 't'
196: 'd'
197: 'u'
198: 'm'
199: 'p'
200: 'e'
201: 'd'
202: 'u'
203: 'm'
204: 'p'
205: 'e'
206: 'm'
207: 'i'
208: 't'
209: '1'
210: 'e'
211: 'm'
212: 'i'
213: 't'
214: 'e'
215: 'm'
216: 'i'
217: 't'
218: 'p'
219: 'e'
220: 'm'
221: 'i'
222: 't'
223: 'f'
224: 'e'
225: 'p'
226: 'r'
227: 'i'
228: 'n'
229: 't'
230: 'e'
231: 'p'
232: 'r'
233: 'i'
234: 'n'
235: 't'
236: 'n'
237: 'p'
238: 'r'
239: 'i'
240: 'n'
241: 't'
242: 'p'
243: 'r'
244: 'i'
245: 'n'
246: 't'
247: 'n'
248: 't'
249: 'e'
250: 'e'
251: 's'
252: 't'
253: 'd'
254: 'o'
255: 'u'
256: 't'
257: 's'
258: 't'
259: 'd'
260: 'e'
261: 'r'
262: 'r'
263: '$'
264: '$'
265: '{'
266: '}'
267: '$'
268: '*'
269: '@'
270: '@'
271: '{'
272: '}'
273: '@'
274: '*'
275: 'a'
276: 'l'
277: 'l'
278: '%'
279: '%'
280: '%'
281: 'p'
282: 'a'
283: 'n'
284: 'i'
285: 'c'
286: '%'
287: '%'
288: '%'
289: ';'
290: '{'
291: '}'
292: '='
293: '>'
294: '>'
295: '>'
296: '|'
297: ','
298: '('
299: ')'
300: '$'
301: '['
302: ']'
303: '$'
304: '['
305: '['
306: '$'
307: '['
308: '['
309: '['
310: '@'
311: '['
312: '|'
313: '|'
314: '='
315: '^'
316: '^'
317: '='
318: '&'
319: '&'
320: '='
321: '?'
322: '?'
323: '='
324: '?'
325: '?'
326: '?'
327: '='
328: '|'
329: '='
330: '&'
331: '='
332: '^'
333: '='
334: '<'
335: '<'
336: '='
337: '>'
338: '>'
339: '='
340: '>'
341: '>'
342: '>'
343: '='
344: '+'
345: '='
346: '.'
347: '='
348: '-'
349: '='
350: '*'
351: '='
352: '/'
353: '='
354: '/'
355: '/'
356: '='
357: '%'
358: '='
359: '*'
360: '*'
361: '='
362: '?'
363: ':'
364: '|'
365: '|'
366: '^'
367: '^'
368: '&'
369: '&'
370: '='
371: '~'
372: '!'
373: '='
374: '~'
375: '='
376: '='
377: '!'
378: '='
379: '<'
380: '='
381: '>'
382: '>'
383: '='
384: '<'
385: '<'
386: '='
387: '^'
388: '&'
389: '<'
390: '<'
391: '>'
392: '>'
393: '>'
394: '+'
395: '-'
396: '.'
397: '+'
398: '.'
399: '-'
400: '*'
401: '/'
402: '/'
403: '/'
404: '%'
405: '.'
406: '*'
407: '.'
408: '/'
409: '.'
410: '/'
411: '/'
412: '.'
413: '!'
414: '~'
415: '?'
416: '?'
417: '?'
418: '?'
419: '?'
420: '*'
421: '*'
422: '['
423: '['
424: '['
425: '['
426: '['
427: '['
428: '_'
429: '_'
430: '\n'
431: ' '
432: '!'
433: '#'
434: '$'
435: '%'
436: '&'
437: '''
438: '\'
439: '('
440: ')'
441: '*'
442: '+'
443: ','
444: '-'
445: '.'
446: '/'
447: ':'
448: ';'
449: '<'
450: '='
451: '>'
452: '?'
453: '@'
454: '['
455: ']'
456: '^'
457: '_'
458: '`'
459: '{'
460: '|'
461: '}'
462: '~'
463: '\'
464: '\'
465: '\'
466: '"'
467: '\'
468: '['
469: '\'
470: ']'
471: '\'
472: '.'
473: '\'
474: '*'
475: '\'
476: '%'
477: '\'
478: '^'
479: '\'
480: '$'
481: '\'
482: '+'
483: '\'
484: '('
485: '\'
486: ')'
487: '\'
488: '&'
489: '\'
490: 'A'
491: '\'
492: 'B'
493: '\'
494: 'C'
495: '\'
496: 'D'
497: '\'
498: 'G'
499: '\'
500: 'H'
501: '\'
502: 'K'
503: '\'
504: 'L'
505: '\'
506: 'N'
507: '\'
508: 'P'
509: '\'
510: 'R'
511: '\'
512: 'S'
513: '\'
514: 'U'
515: '\'
516: 'V'
517: '\'
518: 'W'
519: '\'
520: 'X'
521: '\'
522: 'Z'
523: '\'
524: 'a'
525: '\'
526: 'b'
527: '\'
528: 'c'
529: '\'
530: 'd'
531: '\'
532: 'f'
533: '\'
534: 'g'
535: '\'
536: 'h'
537: '\'
538: 'k'
539: '\'
540: 'l'
541: '\'
542: 'n'
543: '\'
544: 'p'
545: '\'
546: 'r'
547: '\'
548: 's'
549: '\'
550: 't'
551: '\'
552: 'u'
553: '\'
554: 'v'
555: '\'
556: 'w'
557: '\'
558: 'x'
559: '\'
560: 'z'
561: '\'
562: '0'
563: '\'
564: '1'
565: '\'
566: '2'
567: '\'
568: '3'
569: '\'
570: '4'
571: '\'
572: '5'
573: '\'
574: '6'
575: '\'
576: '7'
577: '\'
578: '8'
579: '\'
580: '9'
581: 'e'
582: 'E'
583: 't'
584: 'r'
585: 'u'
586: 'e'
587: 'f'
588: 'a'
589: 'l'
590: 's'
591: 'e'
592: ' '
593: '!'
594: '#'
595: '$'
596: '%'
597: '&'
598: '''
599: '\'
600: '('
601: ')'
602: '*'
603: '+'
604: ','
605: '-'
606: '.'
607: '/'
608: ':'
609: ';'
610: '<'
611: '='
612: '>'
613: '?'
614: '@'
615: '['
616: ']'
617: '^'
618: '_'
619: '`'
620: '|'
621: '~'
622: '\'
623: '{'
624: '\'
625: '}'
626: ' '
627: '\t'
628: '\n'
629: '\r'
630: '#'
631: '\n'
632: 'a'-'z'
633: 'A'-'Z'
634: \u00a0-\u00ff
635: \u0100-\U0010ffff
636: '0'-'9'
637: '0'-'9'
638: 'a'-'f'
639: 'A'-'F'
640: '0'-'7'
641: '0'-'1'
642: 'A'-'Z'
643: 'a'-'z'
644: '0'-'9'
645: \u00a0-\u00ff
646: \u0100-\U0010ffff
647: 'A'-'Z'
648: 'a'-'z'
649: '0'-'9'
650: \u00a0-\u00ff
651: \u0100-\U0010ffff
652: .
*/