Search: Add support for numerical keywords like "2nd" or "80s" #3447

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2024-10-25 22:46:14 +02:00
parent d32c9e9ef7
commit 833685251c
5 changed files with 112 additions and 13 deletions

View file

@ -27,23 +27,42 @@ func IsASCII(s string) bool {
return true
}
// IsNumeric tests if the string only starts and ends with an ascii number character.
// IsNumeric tests if the string starts and ends with an ascii number character.
func IsNumeric(s string) bool {
if s == "" {
return false
}
if s[0] < 48 || s[0] > 57 {
if s[0] < '0' || s[0] > '9' {
return false
}
if l := len(s); l < 2 {
return true
} else if r := s[l-1]; r < '0' || r > '9' {
return false
}
return true
}
// IsNumeral tests if the string is a word describing a number.
func IsNumeral(s string) bool {
if s == "" {
return false
}
if s[0] < '0' || s[0] > '9' {
return false
}
if l := len(s); l < 2 {
return false
} else if s[l-1] < 48 || s[l-1] > 57 {
return false
} else if r := s[l-1]; r >= 'A' {
return true
}
return true
return false
}
// IsNumber tests if the string only contains ascii number characters.
@ -53,7 +72,7 @@ func IsNumber(s string) bool {
}
for i := 0; i < len(s); i++ {
if s[i] < 48 || s[i] > 57 {
if s[i] < '0' || s[i] > '9' {
return false
}
}
@ -68,7 +87,7 @@ func IsDateNumber(s string) bool {
}
for i := 0; i < len(s); i++ {
if (s[i] < 48 || s[i] > 57) && s[i] != '_' && s[i] != '-' {
if (s[i] < '0' || s[i] > '9') && s[i] != '_' && s[i] != '-' {
return false
}
}

View file

@ -45,6 +45,9 @@ func TestIs(t *testing.T) {
}
func TestIsASCII(t *testing.T) {
t.Run("1", func(t *testing.T) {
assert.True(t, IsASCII("1"))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsASCII("123"))
})
@ -66,12 +69,18 @@ func TestIsASCII(t *testing.T) {
t.Run("réseau", func(t *testing.T) {
assert.False(t, IsASCII("réseau"))
})
t.Run("80s", func(t *testing.T) {
assert.True(t, IsASCII("80s"))
})
}
func TestIsNumeric(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsNumeric(""))
})
t.Run("1", func(t *testing.T) {
assert.True(t, IsNumeric("1"))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsNumeric("123"))
})
@ -87,12 +96,69 @@ func TestIsNumeric(t *testing.T) {
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsNumeric("ABC"))
})
t.Run("80s", func(t *testing.T) {
assert.False(t, IsNumeric("80s"))
})
t.Run("2e4", func(t *testing.T) {
assert.True(t, IsNumeric("2e4"))
})
t.Run("2e", func(t *testing.T) {
assert.False(t, IsNumeric("2e"))
})
}
func TestIsNumeral(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsNumeral(""))
})
t.Run("1", func(t *testing.T) {
assert.False(t, IsNumeral("1"))
})
t.Run("123", func(t *testing.T) {
assert.False(t, IsNumeral("123"))
})
t.Run("123.", func(t *testing.T) {
assert.False(t, IsNumeral("123."))
})
t.Run("2024-10-23", func(t *testing.T) {
assert.False(t, IsNumeral("2024-10-23"))
})
t.Run("20200102-204030", func(t *testing.T) {
assert.False(t, IsNumeral("20200102-204030"))
})
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsNumeral("ABC"))
})
t.Run("1st", func(t *testing.T) {
assert.True(t, IsNumeral("1st"))
})
t.Run("2ND", func(t *testing.T) {
assert.True(t, IsNumeral("1ND"))
})
t.Run("40th", func(t *testing.T) {
assert.True(t, IsNumeral("40th"))
})
t.Run("-1.", func(t *testing.T) {
assert.False(t, IsNumeral("-1."))
})
t.Run("1.", func(t *testing.T) {
assert.False(t, IsNumeral("1."))
})
t.Run("40.", func(t *testing.T) {
assert.False(t, IsNumeral("40."))
})
t.Run("80s", func(t *testing.T) {
assert.True(t, IsNumeral("80s"))
})
}
func TestIsNumber(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsNumber(""))
})
t.Run("1", func(t *testing.T) {
assert.True(t, IsNumber("1"))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsNumber("123"))
})
@ -105,6 +171,9 @@ func TestIsNumber(t *testing.T) {
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsNumber("ABC"))
})
t.Run("80s", func(t *testing.T) {
assert.False(t, IsNumber("80s"))
})
}
func TestIsDateNumber(t *testing.T) {

View file

@ -123,6 +123,9 @@ var SpecialWords = map[string]string{
"heif": "HEIF",
"heic": "HEIC",
"avif": "AVIF",
"jpeg": "JPEG",
"jpg": "JPG",
"png": "PNG",
"bmp": "BMP",
"gif": "GIF",
"dng": "DNG",

View file

@ -6,7 +6,7 @@ import (
"strings"
)
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-']{1,}")
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\d\\-']{1,}")
// UnknownWord returns true if the string does not seem to be a real word.
func UnknownWord(s string) bool {
@ -36,7 +36,7 @@ func Words(s string) (results []string) {
for _, w := range KeywordsRegexp.FindAllString(s, -1) {
w = strings.Trim(w, "- '")
if w == "" || len(w) < 2 && IsLatin(w) {
if w == "" || len(w) < 2 && IsLatin(w) || IsNumeric(w) {
continue
}

View file

@ -208,25 +208,33 @@ func TestKeywords(t *testing.T) {
}
func TestUniqueWords(t *testing.T) {
t.Run("many", func(t *testing.T) {
t.Run("Many", func(t *testing.T) {
result := UniqueWords([]string{"lazy", "jpg", "Brown", "apple", "brown", "new-york", "JPG"})
assert.Equal(t, []string{"apple", "brown", "jpg", "lazy", "new-york"}, result)
})
t.Run("one", func(t *testing.T) {
t.Run("One", func(t *testing.T) {
result := UniqueWords([]string{"lazy"})
assert.Equal(t, []string{"lazy"}, result)
})
t.Run("Numerals", func(t *testing.T) {
result := UniqueWords([]string{"1st", "40.", "52nd", "ma'am", "80s"})
assert.Equal(t, []string{"1st", "40.", "52nd", "80s", "ma'am"}, result)
})
}
func TestUniqueKeywords(t *testing.T) {
t.Run("many", func(t *testing.T) {
t.Run("Many", func(t *testing.T) {
result := UniqueKeywords("lazy, Brown, apple, new-york, brown, ...")
assert.Equal(t, []string{"apple", "brown", "lazy", "new-york"}, result)
})
t.Run("one", func(t *testing.T) {
t.Run("One", func(t *testing.T) {
result := UniqueKeywords("")
assert.Equal(t, []string(nil), result)
})
t.Run("Numerals", func(t *testing.T) {
result := UniqueKeywords("1st, 40., 52nd, ma'am, 80s")
assert.Equal(t, []string{"1st", "52nd", "80s", "ma'am"}, result)
})
}
func TestRemoveFromWords(t *testing.T) {