Rename internal regex functions (#1446)

2026-01-23 02:14:13 +00:00 · 2023-12-17 12:46:28 -05:00 · 2023-12-17 12:46:28 -05:00 · 1ae670fd4a
commit 1ae670fd4a
parent b5dbd7a751
14 changed files with 395 additions and 258 deletions
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@ -220,18 +220,19 @@ MILLER(1)                                                            MILLER(1)
       is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
       kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
       localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // &lt; &lt;&lt; &lt;= &lt;=&gt; == =~ &gt; &gt;= &gt;&gt; &gt;&gt;&gt; ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // &lt; &lt;&lt; &lt;= &lt;=&gt; == =~ &gt; &gt;= &gt;&gt; &gt;&gt;&gt; ?: ?? ??? ^ ^^ |
+       || ~

 1mCOMMENTS-IN-DATA FLAGS0m
       Miller lets you put comments in your data, such as
@ -2650,6 +2651,16 @@ MILLER(1)                                                            MILLER(1)
   1mmapsum0m
        (class=collections #args=variadic) With 0 args, returns empty map. With &gt;= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.

+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
   1mmax0m
        (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.

@ -3649,5 +3660,5 @@ MILLER(1)                                                            MILLER(1)



-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
 </pre>
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
       is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
       kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
       localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~

 1mCOMMENTS-IN-DATA FLAGS0m
       Miller lets you put comments in your data, such as
@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
   1mmapsum0m
        (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.

+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
   1mmax0m
        (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.

@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)



-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
--- a/docs/src/reference-dsl-builtin-functions.md
+++ b/docs/src/reference-dsl-builtin-functions.md
@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary
 * [**Higher-order-functions functions**](#higher-order-functions-functions):  [any](#any),  [apply](#apply),  [every](#every),  [fold](#fold),  [reduce](#reduce),  [select](#select),  [sort](#sort).
 * [**Math functions**](#math-functions):  [abs](#abs),  [acos](#acos),  [acosh](#acosh),  [asin](#asin),  [asinh](#asinh),  [atan](#atan),  [atan2](#atan2),  [atanh](#atanh),  [cbrt](#cbrt),  [ceil](#ceil),  [cos](#cos),  [cosh](#cosh),  [erf](#erf),  [erfc](#erfc),  [exp](#exp),  [expm1](#expm1),  [floor](#floor),  [invqnorm](#invqnorm),  [log](#log),  [log10](#log10),  [log1p](#log1p),  [logifit](#logifit),  [max](#max),  [min](#min),  [qnorm](#qnorm),  [round](#round),  [roundm](#roundm),  [sgn](#sgn),  [sin](#sin),  [sinh](#sinh),  [sqrt](#sqrt),  [tan](#tan),  [tanh](#tanh),  [urand](#urand),  [urand32](#urand32),  [urandelement](#urandelement),  [urandint](#urandint),  [urandrange](#urandrange).
 * [**Stats functions**](#stats-functions):  [antimode](#antimode),  [count](#count),  [distinct_count](#distinct_count),  [kurtosis](#kurtosis),  [maxlen](#maxlen),  [mean](#mean),  [meaneb](#meaneb),  [median](#median),  [minlen](#minlen),  [mode](#mode),  [null_count](#null_count),  [percentile](#percentile),  [percentiles](#percentiles),  [skewness](#skewness),  [sort_collection](#sort_collection),  [stddev](#stddev),  [sum](#sum),  [sum2](#sum2),  [sum3](#sum3),  [sum4](#sum4),  [variance](#variance).
-* [**String functions**](#string-functions):  [capitalize](#capitalize),  [clean_whitespace](#clean_whitespace),  [collapse_whitespace](#collapse_whitespace),  [contains](#contains),  [format](#format),  [gssub](#gssub),  [gsub](#gsub),  [index](#index),  [latin1_to_utf8](#latin1_to_utf8),  [leftpad](#leftpad),  [lstrip](#lstrip),  [regextract](#regextract),  [regextract_or_else](#regextract_or_else),  [rightpad](#rightpad),  [rstrip](#rstrip),  [ssub](#ssub),  [strip](#strip),  [strlen](#strlen),  [sub](#sub),  [substr](#substr),  [substr0](#substr0),  [substr1](#substr1),  [tolower](#tolower),  [toupper](#toupper),  [truncate](#truncate),  [unformat](#unformat),  [unformatx](#unformatx),  [utf8_to_latin1](#utf8_to_latin1),  [\.](#dot).
+* [**String functions**](#string-functions):  [capitalize](#capitalize),  [clean_whitespace](#clean_whitespace),  [collapse_whitespace](#collapse_whitespace),  [contains](#contains),  [format](#format),  [gssub](#gssub),  [gsub](#gsub),  [index](#index),  [latin1_to_utf8](#latin1_to_utf8),  [leftpad](#leftpad),  [lstrip](#lstrip),  [match](#match),  [matchx](#matchx),  [regextract](#regextract),  [regextract_or_else](#regextract_or_else),  [rightpad](#rightpad),  [rstrip](#rstrip),  [ssub](#ssub),  [strip](#strip),  [strlen](#strlen),  [sub](#sub),  [substr](#substr),  [substr0](#substr0),  [substr1](#substr1),  [tolower](#tolower),  [toupper](#toupper),  [truncate](#truncate),  [unformat](#unformat),  [unformatx](#unformatx),  [utf8_to_latin1](#utf8_to_latin1),  [\.](#dot).
 * [**System functions**](#system-functions):  [exec](#exec),  [hostname](#hostname),  [os](#os),  [system](#system),  [version](#version).
 * [**Time functions**](#time-functions):  [dhms2fsec](#dhms2fsec),  [dhms2sec](#dhms2sec),  [fsec2dhms](#fsec2dhms),  [fsec2hms](#fsec2hms),  [gmt2localtime](#gmt2localtime),  [gmt2nsec](#gmt2nsec),  [gmt2sec](#gmt2sec),  [hms2fsec](#hms2fsec),  [hms2sec](#hms2sec),  [localtime2gmt](#localtime2gmt),  [localtime2nsec](#localtime2nsec),  [localtime2sec](#localtime2sec),  [nsec2gmt](#nsec2gmt),  [nsec2gmtdate](#nsec2gmtdate),  [nsec2localdate](#nsec2localdate),  [nsec2localtime](#nsec2localtime),  [sec2dhms](#sec2dhms),  [sec2gmt](#sec2gmt),  [sec2gmtdate](#sec2gmtdate),  [sec2hms](#sec2hms),  [sec2localdate](#sec2localdate),  [sec2localtime](#sec2localtime),  [strfntime](#strfntime),  [strfntime_local](#strfntime_local),  [strftime](#strftime),  [strftime_local](#strftime_local),  [strpntime](#strpntime),  [strpntime_local](#strpntime_local),  [strptime](#strptime),  [strptime_local](#strptime_local),  [sysntime](#sysntime),  [systime](#systime),  [systimeint](#systimeint),  [upntime](#upntime),  [uptime](#uptime).
 * [**Typing functions**](#typing-functions):  [asserting_absent](#asserting_absent),  [asserting_array](#asserting_array),  [asserting_bool](#asserting_bool),  [asserting_boolean](#asserting_boolean),  [asserting_empty](#asserting_empty),  [asserting_empty_map](#asserting_empty_map),  [asserting_error](#asserting_error),  [asserting_float](#asserting_float),  [asserting_int](#asserting_int),  [asserting_map](#asserting_map),  [asserting_nonempty_map](#asserting_nonempty_map),  [asserting_not_array](#asserting_not_array),  [asserting_not_empty](#asserting_not_empty),  [asserting_not_map](#asserting_not_map),  [asserting_not_null](#asserting_not_null),  [asserting_null](#asserting_null),  [asserting_numeric](#asserting_numeric),  [asserting_present](#asserting_present),  [asserting_string](#asserting_string),  [is_absent](#is_absent),  [is_array](#is_array),  [is_bool](#is_bool),  [is_boolean](#is_boolean),  [is_empty](#is_empty),  [is_empty_map](#is_empty_map),  [is_error](#is_error),  [is_float](#is_float),  [is_int](#is_int),  [is_map](#is_map),  [is_nan](#is_nan),  [is_nonempty_map](#is_nonempty_map),  [is_not_array](#is_not_array),  [is_not_empty](#is_not_empty),  [is_not_map](#is_not_map),  [is_not_null](#is_not_null),  [is_null](#is_null),  [is_numeric](#is_numeric),  [is_present](#is_present),  [is_string](#is_string),  [typeof](#typeof).
@ -1296,6 +1296,22 @@ lstrip  (class=string #args=1) Strip leading whitespace from string.
 </pre>


+### match
+<pre class="pre-non-highlight-non-pair">
+match  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+</pre>
+
+
+### matchx
+<pre class="pre-non-highlight-non-pair">
+matchx  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+</pre>
+
+
 ### regextract
 <pre class="pre-non-highlight-non-pair">
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
--- a/man/manpage.txt
+++ b/man/manpage.txt
@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
       is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
       kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
       localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~

 1mCOMMENTS-IN-DATA FLAGS0m
       Miller lets you put comments in your data, such as
@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
   1mmapsum0m
        (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.

+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
   1mmax0m
        (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.

@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)



-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
--- a/man/mlr.1
+++ b/man/mlr.1
@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-13
+.\"      Date: 2023-12-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -246,18 +246,19 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null
 is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
 kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
 localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-strfntime_local strftime strftime_local string strip strlen strpntime
-strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+stddev strfntime strfntime_local strftime strftime_local string strip strlen
+strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+|| ~
 .fi
 .if n \{\
 .RE
@ -3938,6 +3939,28 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "match"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
+.SS "matchx"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
 .SS "max"
 .if n \{\
 .RS 0
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@ -81,7 +81,7 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()

-	stringOutput := lib.RegexSub(input, sregex, replacement)
+	stringOutput := lib.RegexStringSub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }

@ -111,7 +111,7 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()

-	stringOutput := lib.RegexGsub(input, sregex, replacement)
+	stringOutput := lib.RegexStringGsub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }

@ -129,7 +129,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
 		return mlrval.FromNotStringError("=~", input2), nil
 	}

-	boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue())
+	boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
 	return mlrval.FromBool(boolOutput), captures
 }

--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@ -266,7 +266,7 @@ func (root *RootNode) BuildStringLiteralNode(literal string) IEvaluable {
 	// RegexLiteralNode.  See also https://github.com/johnkerl/miller/issues/297.
 	literal = lib.UnbackslashStringLiteral(literal)

-	hasCaptures, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(literal)
+	hasCaptures, replacementCaptureMatrix := lib.ReplacementHasCaptures(literal)
 	if !hasCaptures {
 		return &StringLiteralNode{
 			literal: mlrval.FromString(literal),
--- a/pkg/input/record_reader.go
+++ b/pkg/input/record_reader.go
@ -158,7 +158,7 @@ type tIPSRegexSplitter struct {
 }

 func (s *tIPSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ipsRegex, input, 2)
+	return lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 }

 // IFieldSplitter splits a string into pieces, e.g. for IFS.
@ -193,5 +193,5 @@ type tIFSRegexSplitter struct {
 }

 func (s *tIFSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ifsRegex, input, -1)
+	return lib.RegexCompiledSplitString(s.ifsRegex, input, -1)
 }
--- a/pkg/input/record_reader_xtab.go
+++ b/pkg/input/record_reader_xtab.go
@ -304,7 +304,7 @@ type tXTABIPSSplitter struct {
 // which we need to produce just a pair of items -- a key and a value -- delimited by one or more
 // IPS. For exaemple, with IPS being a space, in 'abc     123' we need to get key 'abc' and value
 // '123'; for 'abc    123 456' we need key 'abc' and value '123 456'.  It's super-elegant to simply
-// regex-split the line like 'kv = lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)' --
+// regex-split the line like 'kv = lib.RegexCompiledSplitString(reader.readerOptions.IPSRegex, line, 2)' --
 // however, that's 3x slower than the current implementation. It turns out regexes are great
 // but we should use them only when we must, since they are expensive.
 func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
@ -358,7 +358,7 @@ type tXTABIPSRegexSplitter struct {
 }

 func (s *tXTABIPSRegexSplitter) Split(input string) (key, value string, err error) {
-	kv := lib.RegexSplitString(s.ipsRegex, input, 2)
+	kv := lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 	if len(kv) == 0 {
 		return "", "", fmt.Errorf("internal coding error in XTAB reader")
 	} else if len(kv) == 1 {
--- a/pkg/lib/regex.go
+++ b/pkg/lib/regex.go
@ -1,5 +1,5 @@
 // ================================================================
-// Support for regexes in Miller.
+// Support for regular expressions in Miller.
 //
 // * By and large we use the Go library.
 //
@ -13,17 +13,24 @@
 //       $y = "\2:\1";
 //     }
 //   where the '=~' sets the captures and the "\2:\1" uses them.  (Note that
-//   https://github.com/johnkerl/miller/issues/388 has a better suggestion
-//   which would make the captures explicit as variables, rather than implicit
-//   within CST state -- regardless, the current syntax will still be supported
-//   for backward compatibility and so is here to stay.) Here we make use of Go
-//   regexp-library functions to write to, and then later interpolate from, a
-//   captures array which is stored within CST state. (See the `runtime.State`
-//   object.)
+//   https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the
+//   captures explicit as variables, rather than implicit within CST state: this is implemented by
+//   the `match` and `matchx` DSL functions.  Regardless, the `=~` syntax will still be supported
+//   for backward compatibility and so is here to stay.) Here we make use of Go regexp-library
+//   functions to write to, and then later interpolate from, a captures array which is stored within
+//   CST state. (See the `runtime.State` object.)
 //
 // * "\0" is for a full match; "\1" .. "\9" are for submatch cqptures. E.g.
 //   if $x is "foobarbaz" and the regex is "foo(.)(..)baz", then "\0" is
 //   "foobarbaz", "\1" is "b", "\2" is "ar", and "\3".."\9" are "".
+//
+// * Naming:
+//
+//   o "regexp" and "Regexp" are used for the Go library and its data structure, respectively;
+//
+//   o "regex" is used for regular-expression strings following Miller's idiosyncratic syntax and
+//     semantics as described above.
+//
 // ================================================================

 package lib
@ -34,6 +41,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"sync"
 )

 // captureDetector is used to see if a string literal interpolates previous
@ -44,20 +52,54 @@ var captureDetector = regexp.MustCompile(`\\[0-9]`)
 // "\2:\1" so they don't need to be recomputed on every record.
 var captureSplitter = regexp.MustCompile(`(\\[0-9])`)

-// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax
-// which predate the port of Miller from C to Go.  Miller regexes use a final
-// 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".
+// See regexpCompileCached
+var regexpCache map[string]*regexp.Regexp
+
+const cacheMaxSize = 1000
+
+var cacheMutex sync.Mutex
+
+// regexpCompileCached keeps a cache of compiled regexes, so that the caller has the flexibility to
+// only pass in strings while getting the benefits of compilation avoidance.
 //
-// (See also mlr.bnf where we specify which things can be backslash-escaped
-// without a syntax error at parse time.)
+// Regarding cache size: in nominal use, regexp strings are within Miller DSL code statements, and
+// there will be a handful. These will all get re-used after their first application, and the cache
+// will remain bounded by the size of the user's DSL code. However, it is possible to have regex
+// strings contained within Miller record-field data.
 //
-// * If the regex_string is of the form a.*b, compiles it case-sensisitively.
-// * If the regex_string is of the form "a.*b", compiles a.*b case-sensisitively.
+// We could solve this by using an LRU cache. However, for simplicity, we limit the number of
+// cached compiles, and for any extras that appear during record processing, we simply recompile
+// each time.
+func regexpCompileCached(s string) (*regexp.Regexp, error) {
+	if len(regexpCache) > cacheMaxSize {
+		return regexp.Compile(s)
+	}
+	r, err := regexp.Compile(s)
+	if err == nil {
+		cacheMutex.Lock()
+		if regexpCache == nil {
+			regexpCache = make(map[string]*regexp.Regexp)
+		}
+		regexpCache[s] = r
+		cacheMutex.Unlock()
+	}
+	return r, err
+}
+
+// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the
+// port of Miller from C to Go.  Miller regexes use a final 'i' to indicate case-insensitivity; Go
+// regexes use an initial "(?i)".
+//
+// (See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error
+// at parse time.)
+//
+// * If the regex_string is of the form a.*b, compiles it case-sensitively.
+// * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively.
 // * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.
 func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	n := len(regexString)
 	if n < 2 {
-		return regexp.Compile(regexString)
+		return regexpCompileCached(regexString)
 	}

 	// TODO: rethink this. This will strip out things people have entered, e.g. "\"...\"".
@ -68,20 +110,20 @@ func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	// literals) and from verbs (like cut -r or having-fields).

 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}

 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}

-	return regexp.Compile(regexString)
+	return regexpCompileCached(regexString)
 }

 // CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to
@ -110,7 +152,7 @@ func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp {
 // In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
 // on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
 // but "" splits to [""] when I wish it were []. This function does the latter.
-func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string {
 	if input == "" {
 		return make([]string, 0)
 	} else {
@ -118,193 +160,42 @@ func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
 	}
 }

-// MakeEmptyRegexCaptures is for initial CST state at the start of executing
-// the DSL expression for the current record.  Even if '$x =~ "(..)_(...)" set
-// "\1" and "\2" on the previous record, at start of processing for the current
-// record we need to start with a clean slate.
-func MakeEmptyRegexCaptures() []string {
-	return nil
-}
-
-// RegexReplacementHasCaptures is used by the CST builder to see if
-// string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it
-// needs to retain the compiled offsets-matrix information.
-func RegexReplacementHasCaptures(
-	replacement string,
-) (
-	hasCaptures bool,
-	matrix [][]int,
-) {
-	if captureDetector.MatchString(replacement) {
-		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
-	} else {
-		return false, nil
-	}
-}
-
-// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
-// state and may be used by a DSL statement after the =~. For example, in
-//
-//	sub($a, "(..)_(...)", "\1:\2")
-//
-// the replacement string is an argument to sub and therefore the captures are
-// confined to the implementation of the sub function.  Similarly for gsub. But
-// for the match operator, people can do
-//
-//	if ($x =~ "(..)_(...)") {
-//	  ... other lines of code ...
-//	  $y = "\2:\1"
-//	}
-//
-// and the =~ callsite doesn't know if captures will be used or not. So,
-// RegexMatches always returns the captures array. It is stored within the CST
-// state.
-func RegexMatches(
-	input string,
-	sregex string,
-) (
-	matches bool,
-	capturesOneUp []string,
-) {
-	regex := CompileMillerRegexOrDie(sregex)
-	return RegexMatchesCompiled(input, regex)
-}
-
-// RegexMatchesCompiled is the implementation for the =~ operator.  Without
-// Miller-style regex captures this would a simple one-line
-// regex.MatchString(input). However, we return the captures array for the
-// benefit of subsequent references to "\0".."\9".
-func RegexMatchesCompiled(
-	input string,
-	regex *regexp.Regexp,
-) (bool, []string) {
-	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
-	if matrix == nil || len(matrix) == 0 {
-		// Set all captures to ""
-		return false, make([]string, 10)
-	}
-
-	// "\0" .. "\9"
-	captures := make([]string, 10)
-
-	// If there are multiple matches -- e.g. input is
-	//
-	//   "...ab_cde...fg_hij..."
-	//
-	// with regex
-	//
-	//   "(..)_(...)"
-	//
-	// -- then we only consider the first match: boolean return value is true
-	// (the input string matched the regex), and the captures array will map
-	// "\1" to "ab" and "\2" to "cde".
-	row := matrix[0]
-	n := len(row)
-
-	// Example return value from FindAllSubmatchIndex with input
-	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
-	//
-	// Matrix is [][]int{
-	//   []int{3, 9, 3, 5, 6, 9},
-	//   []int{12, 18, 12, 14, 15, 18},
-	// }
-	//
-	// As noted above we look at only the first row.
-	//
-	// * 3-9 is for the entire match "ab_cde"
-	// * 3-5 is for the first capture "ab"
-	// * 6-9 is for the second capture "cde"
-
-	di := 0
-	for si := 0; si < n && di <= 9; si += 2 {
-		start := row[si]
-		end := row[si+1]
-		if start >= 0 && end >= 0 {
-			captures[di] = input[start:end]
-		}
-		di += 1
-	}
-
-	return true, captures
-}
-
-// InterpolateCaptures example:
-//   - Input $x is "ab_cde"
-//   - DSL expression
-//     if ($x =~ "(..)_(...)") {
-//     ... other lines of code ...
-//     $y = "\2:\1";
-//     }
-//   - InterpolateCaptures is used on the evaluation of "\2:\1"
-//   - replacementString is "\2:\1"
-//   - replacementMatrix contains precomputed/cached offsets for the "\2" and
-//     "\1" substrings within "\2:\1"
-//   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
-//     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
-func InterpolateCaptures(
-	replacementString string,
-	replacementMatrix [][]int,
-	captures []string,
-) string {
-	if replacementMatrix == nil || captures == nil {
-		return replacementString
-	}
-	var buffer bytes.Buffer
-
-	nonMatchStartIndex := 0
-
-	for _, row := range replacementMatrix {
-		start := row[0]
-		buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
-
-		// Map "\0".."\9" to integer index 0..9
-		index := replacementString[start+1] - '0'
-		buffer.WriteString(captures[index])
-
-		nonMatchStartIndex = row[1]
-	}
-
-	buffer.WriteString(replacementString[nonMatchStartIndex:])
-
-	return buffer.String()
-}
-
-// RegexSub implements the sub DSL function.
-func RegexSub(
+// RegexStringSub implements the sub DSL function.
+func RegexStringSub(
 	input string,
 	sregex string,
 	replacement string,
 ) string {
 	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return RegexSubCompiled(input, regex, replacement, replacementCaptureMatrix)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return RegexCompiledSub(input, regex, replacement, replacementCaptureMatrix)
 }

-// RegexSubCompiled is the same as RegexSub but with compiled regex and
+// RegexCompiledSub is the same as RegexStringSub but with compiled regex and
 // replacement strings.
-func RegexSubCompiled(
+func RegexCompiledSub(
 	input string,
 	regex *regexp.Regexp,
 	replacement string,
 	replacementCaptureMatrix [][]int,
 ) string {
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, true)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, true)
 }

-// RegexGsub implements the gsub DSL function.
-func RegexGsub(
+// RegexStringGsub implements the `gsub` DSL function.
+func RegexStringGsub(
 	input string,
 	sregex string,
 	replacement string,
 ) string {
 	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, false)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, false)
 }

-// regexSubGsubCompiled is the implementation for sub/gsub with compilex regex
+// regexCompiledSubOrGsub is the implementation for `sub`/`gsub` with compilex regex
 // and replacement strings.
-func regexSubGsubCompiled(
+func regexCompiledSubOrGsub(
 	input string,
 	regex *regexp.Regexp,
 	replacement string,
@ -384,3 +275,177 @@ func regexSubGsubCompiled(
 	buffer.WriteString(input[nonMatchStartIndex:])
 	return buffer.String()
 }
+
+// RegexStringMatchSimple is for simple boolean return without any substring captures.
+func RegexStringMatchSimple(
+	input string,
+	sregex string,
+) bool {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchSimple(input, regex)
+}
+
+// RegexCompiledMatchSimple is for simple boolean return without any substring captures.
+func RegexCompiledMatchSimple(
+	input string,
+	regex *regexp.Regexp,
+) bool {
+	return regex.Match([]byte(input))
+}
+
+// RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL
+// state and may be used by a DSL statement after the =~. For example, in
+//
+//	sub($a, "(..)_(...)", "\1:\2")
+//
+// the replacement string is an argument to sub and therefore the captures are
+// confined to the implementation of the sub function.  Similarly for gsub. But
+// for the match operator, people can do
+//
+//	if ($x =~ "(..)_(...)") {
+//	  ... other lines of code ...
+//	  $y = "\2:\1"
+//	}
+//
+// and the =~ callsite doesn't know if captures will be used or not. So,
+// RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST
+// state.
+func RegexStringMatchWithCaptures(
+	input string,
+	sregex string,
+) (
+	matches bool,
+	capturesOneUp []string,
+) {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchWithCaptures(input, regex)
+}
+
+// RegexCompiledMatchWithCaptures is the implementation for the =~ operator.  Without
+// Miller-style regex captures this would a simple one-line
+// regex.MatchString(input). However, we return the captures array for the
+// benefit of subsequent references to "\0".."\9".
+func RegexCompiledMatchWithCaptures(
+	input string,
+	regex *regexp.Regexp,
+) (bool, []string) {
+	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
+	if matrix == nil || len(matrix) == 0 {
+		// Set all captures to ""
+		return false, make([]string, 10)
+	}
+
+	// "\0" .. "\9"
+	captures := make([]string, 10)
+
+	// If there are multiple matches -- e.g. input is
+	//
+	//   "...ab_cde...fg_hij..."
+	//
+	// with regex
+	//
+	//   "(..)_(...)"
+	//
+	// -- then we only consider the first match: boolean return value is true
+	// (the input string matched the regex), and the captures array will map
+	// "\1" to "ab" and "\2" to "cde".
+	row := matrix[0]
+	n := len(row)
+
+	// Example return value from FindAllSubmatchIndex with input
+	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
+	//
+	// Matrix is [][]int{
+	//   []int{3, 9, 3, 5, 6, 9},
+	//   []int{12, 18, 12, 14, 15, 18},
+	// }
+	//
+	// As noted above we look at only the first row.
+	//
+	// * 3-9 is for the entire match "ab_cde"
+	// * 3-5 is for the first capture "ab"
+	// * 6-9 is for the second capture "cde"
+
+	di := 0
+	for si := 0; si < n && di <= 9; si += 2 {
+		start := row[si]
+		end := row[si+1]
+		if start >= 0 && end >= 0 {
+			captures[di] = input[start:end]
+		}
+		di += 1
+	}
+
+	return true, captures
+}
+
+// MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the
+// current record.  Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start
+// of processing for the current record we need to start with a clean slate. This is in support of
+// CST state, which `=~` semantics requires.
+func MakeEmptyCaptures() []string {
+	return nil
+}
+
+// ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or
+// "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information.
+// This is in support of CST state, which `=~` semantics requires.
+func ReplacementHasCaptures(
+	replacement string,
+) (
+	hasCaptures bool,
+	matrix [][]int,
+) {
+	if captureDetector.MatchString(replacement) {
+		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
+	} else {
+		return false, nil
+	}
+}
+
+// InterpolateCaptures example:
+//
+// * Input $x is "ab_cde"
+//
+//   - DSL expression
+//     if ($x =~ "(..)_(...)") {
+//     ... other lines of code ...
+//     $y = "\2:\1";
+//     }
+//
+// * InterpolateCaptures is used on the evaluation of "\2:\1"
+//
+// * replacementString is "\2:\1"
+//
+//   - replacementMatrix contains precomputed/cached offsets for the "\2" and
+//     "\1" substrings within "\2:\1"
+//
+//   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
+//     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
+func InterpolateCaptures(
+	replacementString string,
+	replacementMatrix [][]int,
+	captures []string,
+) string {
+	if replacementMatrix == nil || captures == nil {
+		return replacementString
+	}
+	var buffer bytes.Buffer
+
+	nonMatchStartIndex := 0
+
+	for _, row := range replacementMatrix {
+		start := row[0]
+		buffer.WriteString(replacementString[nonMatchStartIndex:row[0]])
+
+		// Map "\0".."\9" to integer index 0..9
+		index := replacementString[start+1] - '0'
+		buffer.WriteString(captures[index])
+
+		nonMatchStartIndex = row[1]
+	}
+
+	buffer.WriteString(replacementString[nonMatchStartIndex:])
+
+	return buffer.String()
+}
--- a/pkg/lib/regex_test.go
+++ b/pkg/lib/regex_test.go
@ -88,7 +88,7 @@ var dataForMatches = []tDataForMatches{

 func TestRegexReplacementHasCaptures(t *testing.T) {
 	for i, entry := range dataForHasCaptures {
-		actualHasCaptures, actualMatrix := RegexReplacementHasCaptures(entry.replacement)
+		actualHasCaptures, actualMatrix := ReplacementHasCaptures(entry.replacement)
 		if actualHasCaptures != entry.expectedHasCaptures {
 			t.Fatalf("case %d replacement \"%s\" expected %v got %v\n",
 				i, entry.replacement, entry.expectedHasCaptures, actualHasCaptures,
@ -104,7 +104,7 @@ func TestRegexReplacementHasCaptures(t *testing.T) {

 func TestRegexSub(t *testing.T) {
 	for i, entry := range dataForSub {
-		actualOutput := RegexSub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringSub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@ -115,7 +115,7 @@ func TestRegexSub(t *testing.T) {

 func TestRegexGsub(t *testing.T) {
 	for i, entry := range dataForGsub {
-		actualOutput := RegexGsub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringGsub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@ -126,7 +126,7 @@ func TestRegexGsub(t *testing.T) {

 func TestRegexMatches(t *testing.T) {
 	for i, entry := range dataForMatches {
-		actualOutput, actualCaptures := RegexMatches(entry.input, entry.sregex)
+		actualOutput, actualCaptures := RegexStringMatchWithCaptures(entry.input, entry.sregex)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" expected %v got %v\n",
 				i, entry.input, entry.sregex, entry.expectedOutput, actualOutput,
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@ -43,8 +43,8 @@ func NewEmptyState(options *cli.TOptions, strictMode bool) *State {

 		// OutputRecordsAndContexts is assigned after construction

-		// See lib.MakeEmptyRegexCaptures for context.
-		RegexCaptures: lib.MakeEmptyRegexCaptures(),
+		// See lib.MakeEmptyCaptures for context.
+		RegexCaptures: lib.MakeEmptyCaptures(),
 		Options:       options,

 		StrictMode: strictMode,
@ -57,5 +57,5 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyRegexCaptures()
+	state.RegexCaptures = lib.MakeEmptyCaptures()
 }
--- a/pkg/transformers/merge_fields.go
+++ b/pkg/transformers/merge_fields.go
@ -479,7 +479,7 @@ func (tr *TransformerMergeFields) transformByCollapsing(
 			matched = valueFieldNameRegex.MatchString(pe.Key)
 			if matched {
 				// TODO: comment re matrix
-				shortName = lib.RegexSubCompiled(valueFieldName, valueFieldNameRegex, "", nil)
+				shortName = lib.RegexCompiledSub(valueFieldName, valueFieldNameRegex, "", nil)
 				break
 			}
 		}
--- a/pkg/transformers/rename.go
+++ b/pkg/transformers/rename.go
@ -169,7 +169,7 @@ func NewTransformerRename(
 			regexString := pe.Key
 			regex := lib.CompileMillerRegexOrDie(regexString)
 			replacement := pe.Value.(string)
-			_, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(replacement)
+			_, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
 			regexAndReplacement := tRegexAndReplacement{
 				regex:                    regex,
 				replacement:              replacement,
@ -241,7 +241,7 @@ func (tr *TransformerRename) transformWithRegexes(
 						inrec.Rename(oldName, newName)
 					}
 				} else {
-					newName := lib.RegexSubCompiled(oldName, regex, replacement, replacementCaptureMatrix)
+					newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
 					if newName != oldName {
 						inrec.Rename(oldName, newName)
 					}