diff --git a/docs/src/manpage.md b/docs/src/manpage.md index de7ce4b6f..19cb2de07 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -220,18 +220,19 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub - nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile - percentiles pow qnorm reduce regextract regextract_or_else rightpad round - roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime - select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul + mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os + percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad + round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate + sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort + sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub + stddev strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2650,6 +2651,16 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. + 1mmatch0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + + 1mmatchx0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3649,5 +3660,5 @@ MILLER(1) MILLER(1) - 2023-12-13 MILLER(1) + 2023-12-16 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index e7e3d3582..7f3a122af 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -199,18 +199,19 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub - nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile - percentiles pow qnorm reduce regextract regextract_or_else rightpad round - roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime - select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul + mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os + percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad + round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate + sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort + sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub + stddev strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2629,6 +2630,16 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. + 1mmatch0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + + 1mmatchx0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3628,4 +3639,4 @@ MILLER(1) MILLER(1) - 2023-12-13 MILLER(1) + 2023-12-16 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 8c3b49640..d391e8341 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [match](#match), [matchx](#matchx), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1296,6 +1296,22 @@ lstrip (class=string #args=1) Strip leading whitespace from string. +### match +
+match (class=string #args=2) TODO: WRITE ME +Example: +TODO: WRITE ME ++ + +### matchx +
+matchx (class=string #args=2) TODO: WRITE ME +Example: +TODO: WRITE ME ++ + ### regextract
regextract (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/man/manpage.txt b/man/manpage.txt
index e7e3d3582..7f3a122af 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -199,18 +199,19 @@ MILLER(1) MILLER(1)
is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
- mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
- nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
- percentiles pow qnorm reduce regextract regextract_or_else rightpad round
- roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
- select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
- splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
- strfntime_local strftime strftime_local string strip strlen strpntime
- strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
- sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
- typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
- urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
- .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+ mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+ mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+ percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+ round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+ sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+ sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+ stddev strfntime strfntime_local strftime strftime_local string strip strlen
+ strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+ sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+ truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+ urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+ && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+ || ~
1mCOMMENTS-IN-DATA FLAGS0m
Miller lets you put comments in your data, such as
@@ -2629,6 +2630,16 @@ MILLER(1) MILLER(1)
1mmapsum0m
(class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
+ 1mmatch0m
+ (class=string #args=2) TODO: WRITE ME
+ Example:
+ TODO: WRITE ME
+
+ 1mmatchx0m
+ (class=string #args=2) TODO: WRITE ME
+ Example:
+ TODO: WRITE ME
+
1mmax0m
(class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
@@ -3628,4 +3639,4 @@ MILLER(1) MILLER(1)
- 2023-12-13 MILLER(1)
+ 2023-12-16 MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 4d5ee4f5c..4f0644ed7 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
-.\" Date: 2023-12-13
+.\" Date: 2023-12-16
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
-.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -246,18 +246,19 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null
is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-strfntime_local strftime strftime_local string strip strlen strpntime
-strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+stddev strfntime strfntime_local strftime strftime_local string strip strlen
+strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+|| ~
.fi
.if n \{\
.RE
@@ -3938,6 +3939,28 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
.fi
.if n \{\
.RE
+.SS "match"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
+.SS "matchx"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
.SS "max"
.if n \{\
.RS 0
diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go
index 52cab9ac5..74c0840f6 100644
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@@ -81,7 +81,7 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
sregex := input2.AcquireStringValue()
replacement := input3.AcquireStringValue()
- stringOutput := lib.RegexSub(input, sregex, replacement)
+ stringOutput := lib.RegexStringSub(input, sregex, replacement)
return mlrval.FromString(stringOutput)
}
@@ -111,7 +111,7 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
sregex := input2.AcquireStringValue()
replacement := input3.AcquireStringValue()
- stringOutput := lib.RegexGsub(input, sregex, replacement)
+ stringOutput := lib.RegexStringGsub(input, sregex, replacement)
return mlrval.FromString(stringOutput)
}
@@ -129,7 +129,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
return mlrval.FromNotStringError("=~", input2), nil
}
- boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue())
+ boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
return mlrval.FromBool(boolOutput), captures
}
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index 08b3200a9..c0b4d8875 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -266,7 +266,7 @@ func (root *RootNode) BuildStringLiteralNode(literal string) IEvaluable {
// RegexLiteralNode. See also https://github.com/johnkerl/miller/issues/297.
literal = lib.UnbackslashStringLiteral(literal)
- hasCaptures, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(literal)
+ hasCaptures, replacementCaptureMatrix := lib.ReplacementHasCaptures(literal)
if !hasCaptures {
return &StringLiteralNode{
literal: mlrval.FromString(literal),
diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go
index 280201936..096060e62 100644
--- a/pkg/input/record_reader.go
+++ b/pkg/input/record_reader.go
@@ -158,7 +158,7 @@ type tIPSRegexSplitter struct {
}
func (s *tIPSRegexSplitter) Split(input string) []string {
- return lib.RegexSplitString(s.ipsRegex, input, 2)
+ return lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
}
// IFieldSplitter splits a string into pieces, e.g. for IFS.
@@ -193,5 +193,5 @@ type tIFSRegexSplitter struct {
}
func (s *tIFSRegexSplitter) Split(input string) []string {
- return lib.RegexSplitString(s.ifsRegex, input, -1)
+ return lib.RegexCompiledSplitString(s.ifsRegex, input, -1)
}
diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go
index 0cfc74b25..74d8dac41 100644
--- a/pkg/input/record_reader_xtab.go
+++ b/pkg/input/record_reader_xtab.go
@@ -304,7 +304,7 @@ type tXTABIPSSplitter struct {
// which we need to produce just a pair of items -- a key and a value -- delimited by one or more
// IPS. For exaemple, with IPS being a space, in 'abc 123' we need to get key 'abc' and value
// '123'; for 'abc 123 456' we need key 'abc' and value '123 456'. It's super-elegant to simply
-// regex-split the line like 'kv = lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)' --
+// regex-split the line like 'kv = lib.RegexCompiledSplitString(reader.readerOptions.IPSRegex, line, 2)' --
// however, that's 3x slower than the current implementation. It turns out regexes are great
// but we should use them only when we must, since they are expensive.
func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
@@ -358,7 +358,7 @@ type tXTABIPSRegexSplitter struct {
}
func (s *tXTABIPSRegexSplitter) Split(input string) (key, value string, err error) {
- kv := lib.RegexSplitString(s.ipsRegex, input, 2)
+ kv := lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
if len(kv) == 0 {
return "", "", fmt.Errorf("internal coding error in XTAB reader")
} else if len(kv) == 1 {
diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go
index 3bab04036..cabbc1510 100644
--- a/pkg/lib/regex.go
+++ b/pkg/lib/regex.go
@@ -1,5 +1,5 @@
// ================================================================
-// Support for regexes in Miller.
+// Support for regular expressions in Miller.
//
// * By and large we use the Go library.
//
@@ -13,17 +13,24 @@
// $y = "\2:\1";
// }
// where the '=~' sets the captures and the "\2:\1" uses them. (Note that
-// https://github.com/johnkerl/miller/issues/388 has a better suggestion
-// which would make the captures explicit as variables, rather than implicit
-// within CST state -- regardless, the current syntax will still be supported
-// for backward compatibility and so is here to stay.) Here we make use of Go
-// regexp-library functions to write to, and then later interpolate from, a
-// captures array which is stored within CST state. (See the `runtime.State`
-// object.)
+// https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the
+// captures explicit as variables, rather than implicit within CST state: this is implemented by
+// the `match` and `matchx` DSL functions. Regardless, the `=~` syntax will still be supported
+// for backward compatibility and so is here to stay.) Here we make use of Go regexp-library
+// functions to write to, and then later interpolate from, a captures array which is stored within
+// CST state. (See the `runtime.State` object.)
//
// * "\0" is for a full match; "\1" .. "\9" are for submatch cqptures. E.g.
// if $x is "foobarbaz" and the regex is "foo(.)(..)baz", then "\0" is
// "foobarbaz", "\1" is "b", "\2" is "ar", and "\3".."\9" are "".
+//
+// * Naming:
+//
+// o "regexp" and "Regexp" are used for the Go library and its data structure, respectively;
+//
+// o "regex" is used for regular-expression strings following Miller's idiosyncratic syntax and
+// semantics as described above.
+//
// ================================================================
package lib
@@ -34,6 +41,7 @@ import (
"os"
"regexp"
"strings"
+ "sync"
)
// captureDetector is used to see if a string literal interpolates previous
@@ -44,20 +52,54 @@ var captureDetector = regexp.MustCompile(`\\[0-9]`)
// "\2:\1" so they don't need to be recomputed on every record.
var captureSplitter = regexp.MustCompile(`(\\[0-9])`)
-// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax
-// which predate the port of Miller from C to Go. Miller regexes use a final
-// 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".
+// See regexpCompileCached
+var regexpCache map[string]*regexp.Regexp
+
+const cacheMaxSize = 1000
+
+var cacheMutex sync.Mutex
+
+// regexpCompileCached keeps a cache of compiled regexes, so that the caller has the flexibility to
+// only pass in strings while getting the benefits of compilation avoidance.
//
-// (See also mlr.bnf where we specify which things can be backslash-escaped
-// without a syntax error at parse time.)
+// Regarding cache size: in nominal use, regexp strings are within Miller DSL code statements, and
+// there will be a handful. These will all get re-used after their first application, and the cache
+// will remain bounded by the size of the user's DSL code. However, it is possible to have regex
+// strings contained within Miller record-field data.
//
-// * If the regex_string is of the form a.*b, compiles it case-sensisitively.
-// * If the regex_string is of the form "a.*b", compiles a.*b case-sensisitively.
+// We could solve this by using an LRU cache. However, for simplicity, we limit the number of
+// cached compiles, and for any extras that appear during record processing, we simply recompile
+// each time.
+func regexpCompileCached(s string) (*regexp.Regexp, error) {
+ if len(regexpCache) > cacheMaxSize {
+ return regexp.Compile(s)
+ }
+ r, err := regexp.Compile(s)
+ if err == nil {
+ cacheMutex.Lock()
+ if regexpCache == nil {
+ regexpCache = make(map[string]*regexp.Regexp)
+ }
+ regexpCache[s] = r
+ cacheMutex.Unlock()
+ }
+ return r, err
+}
+
+// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the
+// port of Miller from C to Go. Miller regexes use a final 'i' to indicate case-insensitivity; Go
+// regexes use an initial "(?i)".
+//
+// (See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error
+// at parse time.)
+//
+// * If the regex_string is of the form a.*b, compiles it case-sensitively.
+// * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively.
// * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.
func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
n := len(regexString)
if n < 2 {
- return regexp.Compile(regexString)
+ return regexpCompileCached(regexString)
}
// TODO: rethink this. This will strip out things people have entered, e.g. "\"...\"".
@@ -68,20 +110,20 @@ func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
// literals) and from verbs (like cut -r or having-fields).
if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"") {
- return regexp.Compile(regexString[1 : n-1])
+ return regexpCompileCached(regexString[1 : n-1])
}
if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/") {
- return regexp.Compile(regexString[1 : n-1])
+ return regexpCompileCached(regexString[1 : n-1])
}
if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"i") {
- return regexp.Compile("(?i)" + regexString[1:n-2])
+ return regexpCompileCached("(?i)" + regexString[1:n-2])
}
if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/i") {
- return regexp.Compile("(?i)" + regexString[1:n-2])
+ return regexpCompileCached("(?i)" + regexString[1:n-2])
}
- return regexp.Compile(regexString)
+ return regexpCompileCached(regexString)
}
// CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to
@@ -110,7 +152,7 @@ func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp {
// In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
// on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
// but "" splits to [""] when I wish it were []. This function does the latter.
-func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string {
if input == "" {
return make([]string, 0)
} else {
@@ -118,193 +160,42 @@ func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
}
}
-// MakeEmptyRegexCaptures is for initial CST state at the start of executing
-// the DSL expression for the current record. Even if '$x =~ "(..)_(...)" set
-// "\1" and "\2" on the previous record, at start of processing for the current
-// record we need to start with a clean slate.
-func MakeEmptyRegexCaptures() []string {
- return nil
-}
-
-// RegexReplacementHasCaptures is used by the CST builder to see if
-// string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it
-// needs to retain the compiled offsets-matrix information.
-func RegexReplacementHasCaptures(
- replacement string,
-) (
- hasCaptures bool,
- matrix [][]int,
-) {
- if captureDetector.MatchString(replacement) {
- return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
- } else {
- return false, nil
- }
-}
-
-// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
-// state and may be used by a DSL statement after the =~. For example, in
-//
-// sub($a, "(..)_(...)", "\1:\2")
-//
-// the replacement string is an argument to sub and therefore the captures are
-// confined to the implementation of the sub function. Similarly for gsub. But
-// for the match operator, people can do
-//
-// if ($x =~ "(..)_(...)") {
-// ... other lines of code ...
-// $y = "\2:\1"
-// }
-//
-// and the =~ callsite doesn't know if captures will be used or not. So,
-// RegexMatches always returns the captures array. It is stored within the CST
-// state.
-func RegexMatches(
- input string,
- sregex string,
-) (
- matches bool,
- capturesOneUp []string,
-) {
- regex := CompileMillerRegexOrDie(sregex)
- return RegexMatchesCompiled(input, regex)
-}
-
-// RegexMatchesCompiled is the implementation for the =~ operator. Without
-// Miller-style regex captures this would a simple one-line
-// regex.MatchString(input). However, we return the captures array for the
-// benefit of subsequent references to "\0".."\9".
-func RegexMatchesCompiled(
- input string,
- regex *regexp.Regexp,
-) (bool, []string) {
- matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
- if matrix == nil || len(matrix) == 0 {
- // Set all captures to ""
- return false, make([]string, 10)
- }
-
- // "\0" .. "\9"
- captures := make([]string, 10)
-
- // If there are multiple matches -- e.g. input is
- //
- // "...ab_cde...fg_hij..."
- //
- // with regex
- //
- // "(..)_(...)"
- //
- // -- then we only consider the first match: boolean return value is true
- // (the input string matched the regex), and the captures array will map
- // "\1" to "ab" and "\2" to "cde".
- row := matrix[0]
- n := len(row)
-
- // Example return value from FindAllSubmatchIndex with input
- // "...ab_cde...fg_hij..." and regex "(..)_(...)":
- //
- // Matrix is [][]int{
- // []int{3, 9, 3, 5, 6, 9},
- // []int{12, 18, 12, 14, 15, 18},
- // }
- //
- // As noted above we look at only the first row.
- //
- // * 3-9 is for the entire match "ab_cde"
- // * 3-5 is for the first capture "ab"
- // * 6-9 is for the second capture "cde"
-
- di := 0
- for si := 0; si < n && di <= 9; si += 2 {
- start := row[si]
- end := row[si+1]
- if start >= 0 && end >= 0 {
- captures[di] = input[start:end]
- }
- di += 1
- }
-
- return true, captures
-}
-
-// InterpolateCaptures example:
-// - Input $x is "ab_cde"
-// - DSL expression
-// if ($x =~ "(..)_(...)") {
-// ... other lines of code ...
-// $y = "\2:\1";
-// }
-// - InterpolateCaptures is used on the evaluation of "\2:\1"
-// - replacementString is "\2:\1"
-// - replacementMatrix contains precomputed/cached offsets for the "\2" and
-// "\1" substrings within "\2:\1"
-// - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
-// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
-func InterpolateCaptures(
- replacementString string,
- replacementMatrix [][]int,
- captures []string,
-) string {
- if replacementMatrix == nil || captures == nil {
- return replacementString
- }
- var buffer bytes.Buffer
-
- nonMatchStartIndex := 0
-
- for _, row := range replacementMatrix {
- start := row[0]
- buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
-
- // Map "\0".."\9" to integer index 0..9
- index := replacementString[start+1] - '0'
- buffer.WriteString(captures[index])
-
- nonMatchStartIndex = row[1]
- }
-
- buffer.WriteString(replacementString[nonMatchStartIndex:])
-
- return buffer.String()
-}
-
-// RegexSub implements the sub DSL function.
-func RegexSub(
+// RegexStringSub implements the sub DSL function.
+func RegexStringSub(
input string,
sregex string,
replacement string,
) string {
regex := CompileMillerRegexOrDie(sregex)
- _, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
- return RegexSubCompiled(input, regex, replacement, replacementCaptureMatrix)
+ _, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+ return RegexCompiledSub(input, regex, replacement, replacementCaptureMatrix)
}
-// RegexSubCompiled is the same as RegexSub but with compiled regex and
+// RegexCompiledSub is the same as RegexStringSub but with compiled regex and
// replacement strings.
-func RegexSubCompiled(
+func RegexCompiledSub(
input string,
regex *regexp.Regexp,
replacement string,
replacementCaptureMatrix [][]int,
) string {
- return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, true)
+ return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, true)
}
-// RegexGsub implements the gsub DSL function.
-func RegexGsub(
+// RegexStringGsub implements the `gsub` DSL function.
+func RegexStringGsub(
input string,
sregex string,
replacement string,
) string {
regex := CompileMillerRegexOrDie(sregex)
- _, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
- return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, false)
+ _, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+ return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, false)
}
-// regexSubGsubCompiled is the implementation for sub/gsub with compilex regex
+// regexCompiledSubOrGsub is the implementation for `sub`/`gsub` with compilex regex
// and replacement strings.
-func regexSubGsubCompiled(
+func regexCompiledSubOrGsub(
input string,
regex *regexp.Regexp,
replacement string,
@@ -384,3 +275,177 @@ func regexSubGsubCompiled(
buffer.WriteString(input[nonMatchStartIndex:])
return buffer.String()
}
+
+// RegexStringMatchSimple is for simple boolean return without any substring captures.
+func RegexStringMatchSimple(
+ input string,
+ sregex string,
+) bool {
+ regex := CompileMillerRegexOrDie(sregex)
+ return RegexCompiledMatchSimple(input, regex)
+}
+
+// RegexCompiledMatchSimple is for simple boolean return without any substring captures.
+func RegexCompiledMatchSimple(
+ input string,
+ regex *regexp.Regexp,
+) bool {
+ return regex.Match([]byte(input))
+}
+
+// RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL
+// state and may be used by a DSL statement after the =~. For example, in
+//
+// sub($a, "(..)_(...)", "\1:\2")
+//
+// the replacement string is an argument to sub and therefore the captures are
+// confined to the implementation of the sub function. Similarly for gsub. But
+// for the match operator, people can do
+//
+// if ($x =~ "(..)_(...)") {
+// ... other lines of code ...
+// $y = "\2:\1"
+// }
+//
+// and the =~ callsite doesn't know if captures will be used or not. So,
+// RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST
+// state.
+func RegexStringMatchWithCaptures(
+ input string,
+ sregex string,
+) (
+ matches bool,
+ capturesOneUp []string,
+) {
+ regex := CompileMillerRegexOrDie(sregex)
+ return RegexCompiledMatchWithCaptures(input, regex)
+}
+
+// RegexCompiledMatchWithCaptures is the implementation for the =~ operator. Without
+// Miller-style regex captures this would a simple one-line
+// regex.MatchString(input). However, we return the captures array for the
+// benefit of subsequent references to "\0".."\9".
+func RegexCompiledMatchWithCaptures(
+ input string,
+ regex *regexp.Regexp,
+) (bool, []string) {
+ matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
+ if matrix == nil || len(matrix) == 0 {
+ // Set all captures to ""
+ return false, make([]string, 10)
+ }
+
+ // "\0" .. "\9"
+ captures := make([]string, 10)
+
+ // If there are multiple matches -- e.g. input is
+ //
+ // "...ab_cde...fg_hij..."
+ //
+ // with regex
+ //
+ // "(..)_(...)"
+ //
+ // -- then we only consider the first match: boolean return value is true
+ // (the input string matched the regex), and the captures array will map
+ // "\1" to "ab" and "\2" to "cde".
+ row := matrix[0]
+ n := len(row)
+
+ // Example return value from FindAllSubmatchIndex with input
+ // "...ab_cde...fg_hij..." and regex "(..)_(...)":
+ //
+ // Matrix is [][]int{
+ // []int{3, 9, 3, 5, 6, 9},
+ // []int{12, 18, 12, 14, 15, 18},
+ // }
+ //
+ // As noted above we look at only the first row.
+ //
+ // * 3-9 is for the entire match "ab_cde"
+ // * 3-5 is for the first capture "ab"
+ // * 6-9 is for the second capture "cde"
+
+ di := 0
+ for si := 0; si < n && di <= 9; si += 2 {
+ start := row[si]
+ end := row[si+1]
+ if start >= 0 && end >= 0 {
+ captures[di] = input[start:end]
+ }
+ di += 1
+ }
+
+ return true, captures
+}
+
+// MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the
+// current record. Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start
+// of processing for the current record we need to start with a clean slate. This is in support of
+// CST state, which `=~` semantics requires.
+func MakeEmptyCaptures() []string {
+ return nil
+}
+
+// ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or
+// "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information.
+// This is in support of CST state, which `=~` semantics requires.
+func ReplacementHasCaptures(
+ replacement string,
+) (
+ hasCaptures bool,
+ matrix [][]int,
+) {
+ if captureDetector.MatchString(replacement) {
+ return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
+ } else {
+ return false, nil
+ }
+}
+
+// InterpolateCaptures example:
+//
+// * Input $x is "ab_cde"
+//
+// - DSL expression
+// if ($x =~ "(..)_(...)") {
+// ... other lines of code ...
+// $y = "\2:\1";
+// }
+//
+// * InterpolateCaptures is used on the evaluation of "\2:\1"
+//
+// * replacementString is "\2:\1"
+//
+// - replacementMatrix contains precomputed/cached offsets for the "\2" and
+// "\1" substrings within "\2:\1"
+//
+// - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
+// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
+func InterpolateCaptures(
+ replacementString string,
+ replacementMatrix [][]int,
+ captures []string,
+) string {
+ if replacementMatrix == nil || captures == nil {
+ return replacementString
+ }
+ var buffer bytes.Buffer
+
+ nonMatchStartIndex := 0
+
+ for _, row := range replacementMatrix {
+ start := row[0]
+ buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
+
+ // Map "\0".."\9" to integer index 0..9
+ index := replacementString[start+1] - '0'
+ buffer.WriteString(captures[index])
+
+ nonMatchStartIndex = row[1]
+ }
+
+ buffer.WriteString(replacementString[nonMatchStartIndex:])
+
+ return buffer.String()
+}
diff --git a/pkg/lib/regex_test.go b/pkg/lib/regex_test.go
index 961d73f8d..d2a8f5f70 100644
--- a/pkg/lib/regex_test.go
+++ b/pkg/lib/regex_test.go
@@ -88,7 +88,7 @@ var dataForMatches = []tDataForMatches{
func TestRegexReplacementHasCaptures(t *testing.T) {
for i, entry := range dataForHasCaptures {
- actualHasCaptures, actualMatrix := RegexReplacementHasCaptures(entry.replacement)
+ actualHasCaptures, actualMatrix := ReplacementHasCaptures(entry.replacement)
if actualHasCaptures != entry.expectedHasCaptures {
t.Fatalf("case %d replacement \"%s\" expected %v got %v\n",
i, entry.replacement, entry.expectedHasCaptures, actualHasCaptures,
@@ -104,7 +104,7 @@ func TestRegexReplacementHasCaptures(t *testing.T) {
func TestRegexSub(t *testing.T) {
for i, entry := range dataForSub {
- actualOutput := RegexSub(entry.input, entry.sregex, entry.replacement)
+ actualOutput := RegexStringSub(entry.input, entry.sregex, entry.replacement)
if actualOutput != entry.expectedOutput {
t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -115,7 +115,7 @@ func TestRegexSub(t *testing.T) {
func TestRegexGsub(t *testing.T) {
for i, entry := range dataForGsub {
- actualOutput := RegexGsub(entry.input, entry.sregex, entry.replacement)
+ actualOutput := RegexStringGsub(entry.input, entry.sregex, entry.replacement)
if actualOutput != entry.expectedOutput {
t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -126,7 +126,7 @@ func TestRegexGsub(t *testing.T) {
func TestRegexMatches(t *testing.T) {
for i, entry := range dataForMatches {
- actualOutput, actualCaptures := RegexMatches(entry.input, entry.sregex)
+ actualOutput, actualCaptures := RegexStringMatchWithCaptures(entry.input, entry.sregex)
if actualOutput != entry.expectedOutput {
t.Fatalf("case %d input \"%s\" sregex \"%s\" expected %v got %v\n",
i, entry.input, entry.sregex, entry.expectedOutput, actualOutput,
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index e94fd4ce5..820f40c3d 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -43,8 +43,8 @@ func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
// OutputRecordsAndContexts is assigned after construction
- // See lib.MakeEmptyRegexCaptures for context.
- RegexCaptures: lib.MakeEmptyRegexCaptures(),
+ // See lib.MakeEmptyCaptures for context.
+ RegexCaptures: lib.MakeEmptyCaptures(),
Options: options,
StrictMode: strictMode,
@@ -57,5 +57,5 @@ func (state *State) Update(
) {
state.Inrec = inrec
state.Context = context
- state.RegexCaptures = lib.MakeEmptyRegexCaptures()
+ state.RegexCaptures = lib.MakeEmptyCaptures()
}
diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go
index de1a555c3..7ee2d9fad 100644
--- a/pkg/transformers/merge_fields.go
+++ b/pkg/transformers/merge_fields.go
@@ -479,7 +479,7 @@ func (tr *TransformerMergeFields) transformByCollapsing(
matched = valueFieldNameRegex.MatchString(pe.Key)
if matched {
// TODO: comment re matrix
- shortName = lib.RegexSubCompiled(valueFieldName, valueFieldNameRegex, "", nil)
+ shortName = lib.RegexCompiledSub(valueFieldName, valueFieldNameRegex, "", nil)
break
}
}
diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go
index e5f0658b8..7880b6ead 100644
--- a/pkg/transformers/rename.go
+++ b/pkg/transformers/rename.go
@@ -169,7 +169,7 @@ func NewTransformerRename(
regexString := pe.Key
regex := lib.CompileMillerRegexOrDie(regexString)
replacement := pe.Value.(string)
- _, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(replacement)
+ _, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
regexAndReplacement := tRegexAndReplacement{
regex: regex,
replacement: replacement,
@@ -241,7 +241,7 @@ func (tr *TransformerRename) transformWithRegexes(
inrec.Rename(oldName, newName)
}
} else {
- newName := lib.RegexSubCompiled(oldName, regex, replacement, replacementCaptureMatrix)
+ newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
if newName != oldName {
inrec.Rename(oldName, newName)
}