miller/pkg/bifs/regex.go
John Kerl 8c791f5466
Static-check fixes from @lespea #1657, batch 4/n (#1706)
* Static-check fixes from @lespea #1657, batch 2/n

* Static-check fixes from @lespea #1657, batch 3/n

* Static-check fixes from @lespea #1657, batch 4/n
2024-10-27 12:00:25 -04:00

245 lines
7.1 KiB
Go

package bifs
import (
"strings"
"github.com/johnkerl/miller/v6/pkg/lib"
"github.com/johnkerl/miller/v6/pkg/mlrval"
)
// BIF_ssub implements the ssub function -- no-frills string-replace, no
// regexes, no escape sequences.
func BIF_ssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
return bif_ssub_gssub(input1, input2, input3, false, "ssub")
}
// BIF_gssub implements the gssub function -- no-frills string-replace, no
// regexes, no escape sequences.
func BIF_gssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
return bif_ssub_gssub(input1, input2, input3, true, "gssub")
}
// bif_ssub_gssub is shared code for BIF_ssub and BIF_gssub.
func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool, funcname string) *mlrval.Mlrval {
if input1.IsErrorOrAbsent() {
return input1
}
if input2.IsErrorOrAbsent() {
return input2
}
if input3.IsErrorOrAbsent() {
return input3
}
if !input1.IsStringOrVoid() {
return mlrval.FromNotStringError(funcname, input1)
}
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError(funcname, input2)
}
if !input3.IsStringOrVoid() {
return mlrval.FromNotStringError(funcname, input3)
}
if doAll {
return mlrval.FromString(
strings.ReplaceAll(input1.AcquireStringValue(), input2.AcquireStringValue(), input3.AcquireStringValue()),
)
} else {
return mlrval.FromString(
strings.Replace(input1.AcquireStringValue(), input2.AcquireStringValue(), input3.AcquireStringValue(), 1),
)
}
}
// BIF_sub implements the sub function, with support for regexes and regex captures
// of the form "\1" .. "\9".
func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
if input1.IsErrorOrAbsent() {
return input1
}
if input2.IsErrorOrAbsent() {
return input2
}
if input3.IsErrorOrAbsent() {
return input3
}
if !input1.IsStringOrVoid() {
return mlrval.FromNotStringError("sub", input1)
}
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError("sub", input2)
}
if !input3.IsStringOrVoid() {
return mlrval.FromNotStringError("sub", input3)
}
input := input1.AcquireStringValue()
sregex := input2.AcquireStringValue()
replacement := input3.AcquireStringValue()
stringOutput := lib.RegexStringSub(input, sregex, replacement)
return mlrval.FromString(stringOutput)
}
// BIF_gsub implements the gsub function, with support for regexes and regex captures
// of the form "\1" .. "\9".
func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
if input1.IsErrorOrAbsent() {
return input1
}
if input2.IsErrorOrAbsent() {
return input2
}
if input3.IsErrorOrAbsent() {
return input3
}
if !input1.IsStringOrVoid() {
return mlrval.FromNotStringError("gsub", input1)
}
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError("gsub", input2)
}
if !input3.IsStringOrVoid() {
return mlrval.FromNotStringError("gsub", input3)
}
input := input1.AcquireStringValue()
sregex := input2.AcquireStringValue()
replacement := input3.AcquireStringValue()
stringOutput := lib.RegexStringGsub(input, sregex, replacement)
return mlrval.FromString(stringOutput)
}
func BIF_strmatch(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
if !input1.IsLegit() {
return mlrval.FromNotStringError("strmatch", input1)
}
if !input2.IsLegit() {
return mlrval.FromNotStringError("strmatch", input2)
}
input1string := input1.String()
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError("strmatch", input2)
}
boolOutput := lib.RegexStringMatchSimple(input1string, input2.AcquireStringValue())
return mlrval.FromBool(boolOutput)
}
func BIF_strmatchx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
if !input1.IsLegit() {
return mlrval.FromNotStringError("strmatchx", input1)
}
if !input2.IsLegit() {
return mlrval.FromNotStringError("strmatchx", input2)
}
input1string := input1.String()
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError("strmatchx", input2)
}
boolOutput, captures, starts, ends := lib.RegexStringMatchWithMapResults(input1string, input2.AcquireStringValue())
results := mlrval.NewMlrmap()
results.PutReference("matched", mlrval.FromBool(boolOutput))
captures_array := make([]*mlrval.Mlrval, len(captures))
if len(captures) > 0 {
for i := range captures {
if i == 0 {
results.PutReference("full_capture", mlrval.FromString(captures[i]))
} else {
captures_array[i] = mlrval.FromString(captures[i])
}
}
starts_array := make([]*mlrval.Mlrval, len(starts))
for i := range starts {
if i == 0 {
results.PutReference("full_start", mlrval.FromInt(int64(starts[i])))
} else {
starts_array[i] = mlrval.FromInt(int64(starts[i]))
}
}
ends_array := make([]*mlrval.Mlrval, len(ends))
for i := range ends {
if i == 0 {
results.PutReference("full_end", mlrval.FromInt(int64(ends[i])))
} else {
ends_array[i] = mlrval.FromInt(int64(ends[i]))
}
}
if len(captures) > 1 {
results.PutReference("captures", mlrval.FromArray(captures_array[1:]))
results.PutReference("starts", mlrval.FromArray(starts_array[1:]))
results.PutReference("ends", mlrval.FromArray(ends_array[1:]))
}
}
return mlrval.FromMap(results)
}
// BIF_string_matches_regexp implements the =~ operator, with support for
// setting regex-captures for later expressions to access using "\1" .. "\9".
func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Mlrval, captures []string) {
if !input1.IsLegit() {
return input1, nil
}
if !input2.IsLegit() {
return input2, nil
}
input1string := input1.String()
if !input2.IsStringOrVoid() {
return mlrval.FromNotStringError("=~", input2), nil
}
boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
return mlrval.FromBool(boolOutput), captures
}
// BIF_string_does_not_match_regexp implements the !=~ operator.
func BIF_string_does_not_match_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Mlrval, captures []string) {
output, captures := BIF_string_matches_regexp(input1, input2)
if output.IsBool() {
return mlrval.FromBool(!output.AcquireBoolValue()), captures
} else {
// else leave it as error, absent, etc.
return output, captures
}
}
func BIF_regextract(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
if !input1.IsString() {
return mlrval.FromNotStringError("!=~", input1)
}
if !input2.IsString() {
return mlrval.FromNotStringError("!=~", input2)
}
regex := lib.CompileMillerRegexOrDie(input2.AcquireStringValue())
match := regex.FindStringIndex(input1.AcquireStringValue())
if match != nil {
return mlrval.FromString(input1.AcquireStringValue()[match[0]:match[1]])
} else {
return mlrval.ABSENT
}
}
func BIF_regextract_or_else(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
if !input1.IsString() {
return mlrval.FromNotStringError("regextract_or_else", input1)
}
if !input2.IsString() {
return mlrval.FromNotStringError("regextract_or_else", input2)
}
regex := lib.CompileMillerRegexOrDie(input2.AcquireStringValue())
match := regex.FindStringIndex(input1.AcquireStringValue())
if match != nil {
return mlrval.FromString(input1.AcquireStringValue()[match[0]:match[1]])
} else {
return input3
}
}