mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
Don't parse CSV comments (#1859)
* `mlr sort -b` feature * mlr regtest -p test/cases/cli-help && make dev * Don't parse CSV comments * Add tests for PR 1346 * Add tests for PR 1787 * Add test CSV files
This commit is contained in:
parent
369156b70d
commit
06e16ea3ee
18 changed files with 62 additions and 37 deletions
|
|
@ -311,15 +311,28 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
|||
var errRead error
|
||||
for errRead == nil {
|
||||
line, errRead = r.readLine()
|
||||
if r.Comment != 0 && nextRune(line) == r.Comment {
|
||||
line = nil
|
||||
continue // Skip comment lines
|
||||
}
|
||||
|
||||
// MILLER-SPECIFIC UPDATE: DO NOT DO THIS
|
||||
// if r.Comment != 0 && nextRune(line) == r.Comment {
|
||||
// line = nil
|
||||
// continue // Skip comment lines
|
||||
// }
|
||||
|
||||
// MILLER-SPECIFIC UPDATE: DO NOT DO THIS
|
||||
// if errRead == nil && len(line) == lengthNL(line) {
|
||||
// line = nil
|
||||
// continue // Skip empty lines
|
||||
// line = nil
|
||||
// continue // Skip empty lines
|
||||
// }
|
||||
|
||||
// MILLER-SPECIFIC UPDATE: If the line starts with the comment character,
|
||||
// don't attempt to CSV-parse it -- just hand it back as a single field.
|
||||
// This allows two things:
|
||||
// * User comments get passed through as intended, without being reformatted;
|
||||
// * Users can do things like `# a"b` in their comments without getting an
|
||||
// imbalanced-double-quote error.
|
||||
if r.Comment != 0 && nextRune(line) == r.Comment {
|
||||
return []string{string(line)}, nil
|
||||
}
|
||||
break
|
||||
}
|
||||
if errRead == io.EOF {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package input
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"container/list"
|
||||
"fmt"
|
||||
"io"
|
||||
|
|
@ -109,6 +108,14 @@ func (reader *RecordReaderCSV) processHandle(
|
|||
csvReader.Comma = rune(reader.ifs0)
|
||||
csvReader.LazyQuotes = reader.csvLazyQuotes
|
||||
csvReader.TrimLeadingSpace = reader.csvTrimLeadingSpace
|
||||
|
||||
if reader.readerOptions.CommentHandling != cli.CommentsAreData {
|
||||
if len(reader.readerOptions.CommentString) == 1 {
|
||||
// Use our modified fork of the go-csv package
|
||||
csvReader.Comment = rune(reader.readerOptions.CommentString[0])
|
||||
}
|
||||
}
|
||||
|
||||
csvRecordsChannel := make(chan *list.List, recordsPerBatch)
|
||||
go channelizedCSVRecordScanner(csvReader, csvRecordsChannel, downstreamDoneChannel, errorChannel,
|
||||
recordsPerBatch)
|
||||
|
|
@ -318,42 +325,17 @@ func (reader *RecordReaderCSV) maybeConsumeComment(
|
|||
// However, sadly, bytes.Buffer does not implement io.Writer because
|
||||
// its Write method has pointer receiver. So we have a WorkaroundBuffer
|
||||
// struct below which has non-pointer receiver.
|
||||
buffer := NewWorkaroundBuffer()
|
||||
csvWriter := csv.NewWriter(buffer)
|
||||
csvWriter.Comma = rune(reader.ifs0)
|
||||
csvWriter.Write(csvRecord)
|
||||
csvWriter.Flush()
|
||||
recordsAndContexts.PushBack(types.NewOutputString(buffer.String(), context))
|
||||
|
||||
// Contract with our fork of the go-csv CSV Reader
|
||||
lib.InternalCodingErrorIf(len(csvRecord) != 1)
|
||||
recordsAndContexts.PushBack(types.NewOutputString(csvRecord[0], context))
|
||||
|
||||
} else /* reader.readerOptions.CommentHandling == cli.SkipComments */ {
|
||||
// discard entirely
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// As noted above: wraps a bytes.Buffer, whose Write method has pointer
|
||||
// receiver, in a struct with non-pointer receiver so that it implements
|
||||
// io.Writer.
|
||||
|
||||
type WorkaroundBuffer struct {
|
||||
pbuffer *bytes.Buffer
|
||||
}
|
||||
|
||||
func NewWorkaroundBuffer() WorkaroundBuffer {
|
||||
var buffer bytes.Buffer
|
||||
return WorkaroundBuffer{
|
||||
pbuffer: &buffer,
|
||||
}
|
||||
}
|
||||
|
||||
func (wb WorkaroundBuffer) Write(p []byte) (n int, err error) {
|
||||
return wb.pbuffer.Write(p)
|
||||
}
|
||||
|
||||
func (wb WorkaroundBuffer) String() string {
|
||||
return wb.pbuffer.String()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
// BOM-stripping
|
||||
//
|
||||
|
|
|
|||
1
test/cases/io-skip-pass-comments/pr-1346/cmd
Normal file
1
test/cases/io-skip-pass-comments/pr-1346/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --skip-comments --csv --pass-comments cat test/input/pr-1346.csv
|
||||
1
test/cases/io-skip-pass-comments/pr-1346/experr
Normal file
1
test/cases/io-skip-pass-comments/pr-1346/experr
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr: mlr: CSV header/data length mismatch 2 != 1 at filename test/input/pr-1346.csv row 4.
|
||||
5
test/cases/io-skip-pass-comments/pr-1346/expout
Normal file
5
test/cases/io-skip-pass-comments/pr-1346/expout
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
field1,field2
|
||||
a,b
|
||||
# that was the first record
|
||||
c,d
|
||||
# that was the second record, and there is no more data
|
||||
0
test/cases/io-skip-pass-comments/pr-1346/should-fail
Normal file
0
test/cases/io-skip-pass-comments/pr-1346/should-fail
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-a/cmd
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-a/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv cat test/input/pr-1787.csv
|
||||
1
test/cases/io-skip-pass-comments/pr-1787-a/experr
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-a/experr
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr: parse error on line 3, column 4: bare " in non-quoted-field.
|
||||
2
test/cases/io-skip-pass-comments/pr-1787-a/expout
Normal file
2
test/cases/io-skip-pass-comments/pr-1787-a/expout
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
a,b,c
|
||||
1,2,3
|
||||
0
test/cases/io-skip-pass-comments/pr-1787-a/should-fail
Normal file
0
test/cases/io-skip-pass-comments/pr-1787-a/should-fail
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-b/cmd
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-b/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv --pass-comments cat test/input/pr-1787.csv
|
||||
0
test/cases/io-skip-pass-comments/pr-1787-b/experr
Normal file
0
test/cases/io-skip-pass-comments/pr-1787-b/experr
Normal file
4
test/cases/io-skip-pass-comments/pr-1787-b/expout
Normal file
4
test/cases/io-skip-pass-comments/pr-1787-b/expout
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a,b,c
|
||||
1,2,3
|
||||
# x"y
|
||||
4,5,6
|
||||
1
test/cases/io-skip-pass-comments/pr-1787-c/cmd
Normal file
1
test/cases/io-skip-pass-comments/pr-1787-c/cmd
Normal file
|
|
@ -0,0 +1 @@
|
|||
mlr --csv --skip-comments cat test/input/pr-1787.csv
|
||||
0
test/cases/io-skip-pass-comments/pr-1787-c/experr
Normal file
0
test/cases/io-skip-pass-comments/pr-1787-c/experr
Normal file
3
test/cases/io-skip-pass-comments/pr-1787-c/expout
Normal file
3
test/cases/io-skip-pass-comments/pr-1787-c/expout
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
a,b,c
|
||||
1,2,3
|
||||
4,5,6
|
||||
6
test/input/pr-1346.csv
Normal file
6
test/input/pr-1346.csv
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
field1,field2
|
||||
a,b
|
||||
# that was the first record
|
||||
c,d
|
||||
# that was the second record, and there is no more data
|
||||
|
||||
|
4
test/input/pr-1787.csv
Normal file
4
test/input/pr-1787.csv
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
a,b,c
|
||||
1,2,3
|
||||
# x"y
|
||||
4,5,6
|
||||
|
Can't render this file because it contains an unexpected character in line 3 and column 4.
|
Loading…
Add table
Add a link
Reference in a new issue