Miller produces no output on TSV with > 64K characters per line (#1505)

* Switch to bufio.Reader, first pass

* temp

* Simplify ILineReader by making it stateless

* Interface not necessary; ILineReader -> TLineReader

* neaten

* iterating
This commit is contained in:
John Kerl 2024-02-25 15:50:50 -05:00 committed by GitHub
parent 57b32c3e9b
commit 3ff43fa818
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 198 additions and 66 deletions

View file

@ -8,6 +8,7 @@ package cli
import (
"bufio"
"errors"
"fmt"
"io"
"os"
@ -29,7 +30,7 @@ import (
// - IFS/IPS can have escapes like "\x1f" which aren't valid regex literals
// so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f'
// -- to the single byte with hex code 0x1f.
func FinalizeReaderOptions(readerOptions *TReaderOptions) {
func FinalizeReaderOptions(readerOptions *TReaderOptions) error {
readerOptions.IFS = lib.UnhexStringLiteral(readerOptions.IFS)
readerOptions.IPS = lib.UnhexStringLiteral(readerOptions.IPS)
@ -57,12 +58,17 @@ func FinalizeReaderOptions(readerOptions *TReaderOptions) {
readerOptions.IFS = lib.UnbackslashStringLiteral(readerOptions.IFS)
readerOptions.IPS = lib.UnbackslashStringLiteral(readerOptions.IPS)
readerOptions.IRS = lib.UnbackslashStringLiteral(readerOptions.IRS)
if readerOptions.IRS == "" {
return errors.New("empty IRS")
}
return nil
}
// FinalizeWriterOptions unbackslashes OPS, OFS, and ORS. This is because
// because the '\n' at the command line which is Go "\\n" (a backslash and an
// n) needs to become the single newline character., and likewise for "\t", etc.
func FinalizeWriterOptions(writerOptions *TWriterOptions) {
func FinalizeWriterOptions(writerOptions *TWriterOptions) error {
if !writerOptions.ofsWasSpecified {
writerOptions.OFS = defaultFSes[writerOptions.OutputFileFormat]
}
@ -84,6 +90,8 @@ func FinalizeWriterOptions(writerOptions *TWriterOptions) {
writerOptions.OFS = lib.UnbackslashStringLiteral(writerOptions.OFS)
writerOptions.OPS = lib.UnbackslashStringLiteral(writerOptions.OPS)
writerOptions.ORS = lib.UnbackslashStringLiteral(writerOptions.ORS)
return nil
}
// ================================================================

View file

@ -82,6 +82,7 @@ var SEPARATOR_REGEX_NAMES_TO_VALUES = map[string]string{
// E.g. if IFS isn't specified, it's space for NIDX and comma for DKVP, etc.
var defaultFSes = map[string]string{
"gen": ",",
"csv": ",",
"csvlite": ",",
"dkvp": ",",
@ -94,6 +95,7 @@ var defaultFSes = map[string]string{
}
var defaultPSes = map[string]string{
"gen": "N/A",
"csv": "N/A",
"csvlite": "N/A",
"dkvp": "=",
@ -106,6 +108,7 @@ var defaultPSes = map[string]string{
}
var defaultRSes = map[string]string{
"gen": "\n",
"csv": "\n",
"csvlite": "\n",
"dkvp": "\n",
@ -118,6 +121,7 @@ var defaultRSes = map[string]string{
}
var defaultAllowRepeatIFSes = map[string]bool{
"gen": false,
"csv": false,
"csvlite": false,
"dkvp": false,