fix mlr termcvt

This commit is contained in:
John Kerl 2017-05-13 19:23:49 -04:00
parent 92bfc6ab6f
commit a8b1370676
9 changed files with 141 additions and 98 deletions

1
.gitattributes vendored
View file

@ -24,3 +24,4 @@ out binary
*.csv-crlf binary
*.pprint-crlf binary
*.dkvp-crlf binary
*.bin binary

View file

@ -156,90 +156,16 @@ static int lecat_stream(FILE* input_stream, int do_color) {
}
// ================================================================
typedef void line_cvt_func_t(char* line, ssize_t linelen, FILE* output_stream);
static void cr_to_crlf(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen == 1) {
if (line[0] == '\r') {
fputc('\r', output_stream);
fputc('\n', output_stream);
} else {
fputc(line[0], output_stream);
}
} else {
if (line[linelen-2] == '\r' && line[linelen-1] == '\n') {
fputs(line, output_stream);
} else if (line[linelen-1] == '\r') {
fputs(line, output_stream);
fputc('\n', output_stream);
} else {
fputs(line, output_stream);
}
}
}
static void lf_to_crlf(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen == 1) {
if (line[0] == '\n') {
fputc('\r', output_stream);
fputc('\n', output_stream);
} else {
fputc(line[0], output_stream);
}
} else {
if (line[linelen-2] == '\r' && line[linelen-1] == '\n') {
fputs(line, output_stream);
} else if (line[linelen-1] == '\n') {
line[linelen-1] = '\r';
fputs(line, output_stream);
fputc('\n', output_stream);
} else {
fputs(line, output_stream);
}
}
}
static void crlf_to_cr(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen >= 2 && line[linelen-2] == '\r' && line[linelen-1] == '\n') {
line[linelen-2] = '\r';
line[linelen-1] = '\0';
}
fputs(line, output_stream);
}
static void crlf_to_lf(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen >= 2 && line[linelen-2] == '\r' && line[linelen-1] == '\n') {
line[linelen-2] = '\n';
line[linelen-1] = '\0';
}
fputs(line, output_stream);
}
static void cr_to_lf(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen >= 1 && line[linelen-1] == '\r') {
line[linelen-1] = '\n';
}
fputs(line, output_stream);
}
static void lf_to_cr(char* line, ssize_t linelen, FILE* output_stream) {
if (linelen >= 1 && line[linelen-1] == '\n') {
line[linelen-1] = '\r';
}
fputs(line, output_stream);
}
// ----------------------------------------------------------------
static int termcvt_stream(FILE* input_stream, FILE* output_stream, char inend, line_cvt_func_t* pcvt_func) {
static int termcvt_stream(FILE* input_stream, FILE* output_stream, char* inend, char* outend) {
size_t line_length = MLR_ALLOC_READ_LINE_INITIAL_SIZE;
int inend_length = strlen(inend);
while (1) {
char* line = mlr_alloc_read_line_single_delimiter(input_stream, inend, &line_length, FALSE, NULL);
char* line = mlr_alloc_read_line_multiple_delimiter(input_stream, inend, inend_length, &line_length);
if (line == NULL) {
break;
}
pcvt_func(line, line_length, output_stream);
fputs(line, output_stream);
fputs(outend, output_stream);
free(line);
}
return 1;
@ -265,9 +191,9 @@ static void termcvt_usage(char* argv0, char* argv1, FILE* o, int exit_code) {
// ----------------------------------------------------------------
static int termcvt_main(int argc, char** argv) {
int ok = 1;
char inend = '\n';
char* inend = "\n";
char* outend = "\n";
int do_in_place = FALSE;
line_cvt_func_t* pcvt_func = lf_to_crlf;
// argv[0] is 'mlr'
// argv[1] is 'termcvt'
@ -287,23 +213,23 @@ static int termcvt_main(int argc, char** argv) {
} else if (streq(opt, "-I")) {
do_in_place = TRUE;
} else if (streq(opt, "--cr2crlf")) {
pcvt_func = cr_to_crlf;
inend = '\r';
inend = "\r";
outend = "\r\n";
} else if (streq(opt, "--lf2crlf")) {
pcvt_func = lf_to_crlf;
inend = '\n';
inend = "\n";
outend = "\r\n";
} else if (streq(opt, "--crlf2cr")) {
pcvt_func = crlf_to_cr;
inend = '\n';
} else if (streq(opt, "--crlf2lf")) {
pcvt_func = crlf_to_lf;
inend = '\n';
} else if (streq(opt, "--cr2lf")) {
pcvt_func = cr_to_lf;
inend = '\r';
inend = "\r\n";
outend = "\r";
} else if (streq(opt, "--lf2cr")) {
pcvt_func = lf_to_cr;
inend = '\n';
inend = "\n";
outend = "\r";
} else if (streq(opt, "--crlf2lf")) {
inend = "\r\n";
outend = "\n";
} else if (streq(opt, "--cr2lf")) {
inend = "\r";
outend = "\n";
} else {
termcvt_usage(argv[0], argv[1], stdout, 0);
}
@ -311,7 +237,7 @@ static int termcvt_main(int argc, char** argv) {
int nfiles = argc - argi;
if (nfiles == 0) {
ok = ok && termcvt_stream(stdin, stdout, inend, pcvt_func);
ok = ok && termcvt_stream(stdin, stdout, inend, outend);
} else if (do_in_place) {
for (; argi < argc; argi++) {
@ -333,7 +259,7 @@ static int termcvt_main(int argc, char** argv) {
exit(1);
}
ok = termcvt_stream(input_stream, output_stream, inend, pcvt_func);
ok = termcvt_stream(input_stream, output_stream, inend, outend);
fclose(input_stream);
fclose(output_stream);
@ -356,7 +282,7 @@ static int termcvt_main(int argc, char** argv) {
perror(file_name);
exit(1);
}
ok = termcvt_stream(input_stream, stdout, inend, pcvt_func);
ok = termcvt_stream(input_stream, stdout, inend, outend);
fclose(input_stream);
}

View file

@ -56359,3 +56359,87 @@ i 10
x 0.5026260055412137
y 0.9526183602969864
mlr lecat --mono ./reg_test/input/line-ending-cr.bin
hello[CR]there[CR]how[CR]are[CR]you[CR]
mlr lecat --mono ./reg_test/input/line-ending-lf.bin
hello[LF]
there[LF]
how[LF]
are[LF]
you[LF]
mlr lecat --mono ./reg_test/input/line-ending-crlf.bin
hello[CR][LF]
there[CR][LF]
how[CR][LF]
are[CR][LF]
you[CR][LF]
mlr termcvt --cr2lf ./reg_test/input/line-ending-cr.bin
mlr hex
hello
there
how
are
you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 63 72 |mlr termcvt --cr|
00000010: 32 6c 66 20 2e 2f 72 65 67 5f 74 65 73 74 2f 69 |2lf ./reg_test/i|
00000020: 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 6e 67 |nput/line-ending|
00000030: 2d 63 72 2e 62 69 6e 0a |-cr.bin.|
mlr termcvt --cr2crlf ./reg_test/input/line-ending-cr.bin
mlr hex
hello
there
how
are
you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 63 72 |mlr termcvt --cr|
00000010: 32 63 72 6c 66 20 2e 2f 72 65 67 5f 74 65 73 74 |2crlf ./reg_test|
00000020: 2f 69 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 |/input/line-endi|
00000030: 6e 67 2d 63 72 2e 62 69 6e 0a |ng-cr.bin.|
mlr termcvt --lf2cr ./reg_test/input/line-ending-lf.bin
mlr hex
hello there how are you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 6c 66 |mlr termcvt --lf|
00000010: 32 63 72 20 2e 2f 72 65 67 5f 74 65 73 74 2f 69 |2cr ./reg_test/i|
00000020: 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 6e 67 |nput/line-ending|
00000030: 2d 6c 66 2e 62 69 6e 0a |-lf.bin.|
mlr termcvt --lf2crlf ./reg_test/input/line-ending-lf.bin
mlr hex
hello
there
how
are
you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 6c 66 |mlr termcvt --lf|
00000010: 32 63 72 6c 66 20 2e 2f 72 65 67 5f 74 65 73 74 |2crlf ./reg_test|
00000020: 2f 69 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 |/input/line-endi|
00000030: 6e 67 2d 6c 66 2e 62 69 6e 0a |ng-lf.bin.|
mlr termcvt --crlf2cr ./reg_test/input/line-ending-crlf.bin
mlr hex
hello there how are you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 63 72 |mlr termcvt --cr|
00000010: 6c 66 32 63 72 20 2e 2f 72 65 67 5f 74 65 73 74 |lf2cr ./reg_test|
00000020: 2f 69 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 |/input/line-endi|
00000030: 6e 67 2d 63 72 6c 66 2e 62 69 6e 0a |ng-crlf.bin.|
mlr termcvt --crlf2lf ./reg_test/input/line-ending-crlf.bin
mlr hex
hello
there
how
are
you
00000000: 6d 6c 72 20 74 65 72 6d 63 76 74 20 2d 2d 63 72 |mlr termcvt --cr|
00000010: 6c 66 32 6c 66 20 2e 2f 72 65 67 5f 74 65 73 74 |lf2lf ./reg_test|
00000020: 2f 69 6e 70 75 74 2f 6c 69 6e 65 2d 65 6e 64 69 |/input/line-endi|
00000030: 6e 67 2d 63 72 6c 66 2e 62 69 6e 0a |ng-crlf.bin.|

View file

@ -88,6 +88,9 @@ EXTRA_DIST= \
joina.dkvp \
joinb.dkvp \
json-output-options.dkvp \
line-ending-cr.bin \
line-ending-crlf.bin \
line-ending-lf.bin \
line-term-lf.dkvp \
line-term-crlf.dkvp \
line-term-lf.csv \

View file

@ -0,0 +1 @@
hello there how are you

View file

@ -0,0 +1,5 @@
hello
there
how
are
you

View file

@ -0,0 +1,5 @@
hello
there
how
are
you

View file

@ -6576,6 +6576,20 @@ run_mlr --oxtab --from $indir/abixy-het put '
)
'
# ----------------------------------------------------------------
# AUX ENTRIES
run_mlr lecat --mono $indir/line-ending-cr.bin
run_mlr lecat --mono $indir/line-ending-lf.bin
run_mlr lecat --mono $indir/line-ending-crlf.bin
run_mlr termcvt --cr2lf $indir/line-ending-cr.bin | run_mlr hex
run_mlr termcvt --cr2crlf $indir/line-ending-cr.bin | run_mlr hex
run_mlr termcvt --lf2cr $indir/line-ending-lf.bin | run_mlr hex
run_mlr termcvt --lf2crlf $indir/line-ending-lf.bin | run_mlr hex
run_mlr termcvt --crlf2cr $indir/line-ending-crlf.bin | run_mlr hex
run_mlr termcvt --crlf2lf $indir/line-ending-crlf.bin | run_mlr hex
# ================================================================
# A key feature of this regression script is that it can be invoked from any
# directory. Depending on the directory it's invoked from, the path to $outdir

View file

@ -22,6 +22,10 @@ FUNDAM:
----------------------------------------------------------------
airable:
!!! aux-list -> main help; dox too
!! fix term-cvt and UT it along with hex -r !!
! faqent/cookbook/more:
mlr termcvt --cr2lf foo.csv.cr > foo.csv