Metadata: Improve titles generated from file and folder names #3447

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2024-10-25 21:21:30 +02:00
parent 303fab7a45
commit d32c9e9ef7
5 changed files with 153 additions and 16 deletions

View file

@ -142,7 +142,7 @@ func TestPhoto_UpdateTitle(t *testing.T) {
if err != nil {
t.Fatal(err)
}
assert.Equal(t, "Photo / Germany / 2016", m.PhotoTitle)
assert.Equal(t, "longlonglonglonglongcity / 2016", m.PhotoTitle)
})
t.Run("photo with location without loc name and short city", func(t *testing.T) {
m := PhotoFixtures.Get("Photo12")
@ -152,7 +152,7 @@ func TestPhoto_UpdateTitle(t *testing.T) {
if err != nil {
t.Fatal(err)
}
assert.Equal(t, "Photo / shortcity / 2016", m.PhotoTitle)
assert.Equal(t, "shortcity / Germany / 2016", m.PhotoTitle)
})
t.Run("no location original name", func(t *testing.T) {
m := PhotoFixtures.Get("19800101_000002_D640C559")
@ -188,7 +188,7 @@ func TestPhoto_UpdateTitle(t *testing.T) {
if len(m.SubjectNames()) > 0 {
assert.Equal(t, "Actress A / 1990", m.PhotoTitle)
} else {
assert.Equal(t, "Bridge / 1990", m.PhotoTitle)
assert.Equal(t, "Bridge1 / 1990", m.PhotoTitle)
}
})
t.Run("no location no labels no takenAt", func(t *testing.T) {
@ -199,7 +199,7 @@ func TestPhoto_UpdateTitle(t *testing.T) {
if err != nil {
t.Fatal(err)
}
assert.Equal(t, "Photo", m.PhotoTitle)
assert.Equal(t, "Photo20", m.PhotoTitle)
})
t.Run("OnePerson", func(t *testing.T) {
m := PhotoFixtures.Get("Photo10")

View file

@ -7,7 +7,7 @@ import (
"github.com/photoprism/photoprism/pkg/fs"
)
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':&+!?]{1,}|( [&+] )?")
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\d,':&+!?]{1,}[\\-]{0,}|( [&+] )?")
// FileTitle returns the string with the first characters of each word converted to uppercase.
func FileTitle(s string) string {
@ -23,7 +23,12 @@ func FileTitle(s string) string {
found := 0
for _, w := range words {
w = strings.ToLower(w)
// Ignore purely numeric values, such as those in timestamps.
if found == 0 && IsDateNumber(w) {
continue
} else if IsNumeric(w) {
continue
}
// Ignore ASCII strings < 3 characters at the beginning.
if IsASCII(w) && (len(w) < 3 && found == 0 || len(w) == 1) {
@ -39,7 +44,7 @@ func FileTitle(s string) string {
found++
if found > 10 {
if found > 16 {
break
}
}
@ -59,7 +64,7 @@ func FileTitle(s string) string {
title = strings.TrimSuffix(title, " "+w)
}
if len(title) <= 4 && IsASCII(title) {
if len(title) <= 4 && IsASCII(title) || IsDateNumber(title) {
return ""
}

View file

@ -32,10 +32,10 @@ func TestFileTitle(t *testing.T) {
assert.Equal(t, "Photo Lover", FileTitle("photo-lover"))
})
t.Run("nyc", func(t *testing.T) {
assert.Equal(t, "Bridge in, or by, NYC", FileTitle("BRIDGE in, or by, nyc"))
assert.Equal(t, "BRIDGE in, or by, NYC", FileTitle("BRIDGE in, or by, nyc"))
})
t.Run("apple", func(t *testing.T) {
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook", FileTitle("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
assert.Equal(t, "Phil Unveils iPhone, iPad, iPod, 'airpods', Airpod, AirPlay, iMac or MacBook Pro and Max", FileTitle("phil unveils iphone, ipad, ipod, 'airpods', airpod, airplay, imac or macbook 11 pro and max"))
})
t.Run("IMG_4568", func(t *testing.T) {
assert.Equal(t, "", FileTitle("IMG_4568"))
@ -53,7 +53,7 @@ func TestFileTitle(t *testing.T) {
assert.Equal(t, "Tim Robbins / TIFF", FileTitle("tim-robbins--tiff-2012_7999233420_o.jpg"))
})
t.Run("20200102-204030-Berlin-Germany-2020-3h4.jpg", func(t *testing.T) {
assert.Equal(t, "Berlin Germany", FileTitle("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
assert.Equal(t, "Berlin Germany 2020", FileTitle("20200102-204030-Berlin-Germany-2020-3h4.jpg"))
})
t.Run("changing-of-the-guard--buckingham-palace_7925318070_o.jpg", func(t *testing.T) {
assert.Equal(t, "Changing of the Guard / Buckingham Palace", FileTitle("changing-of-the-guard--buckingham-palace_7925318070_o.jpg"))
@ -79,10 +79,10 @@ func TestFileTitle(t *testing.T) {
assert.Equal(t, "Cyka Swappable Mag", FileTitle("Cyka - swappable mag (82405706) .jpg"))
})
t.Run("issue_361_d", func(t *testing.T) {
assert.Equal(t, "Dishwasher Friedrich the Smol", FileTitle("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
assert.Equal(t, "Dishwasher1910 Friedrich the Smol", FileTitle("dishwasher1910 - Friedrich the smol (82201574) 1ページ.jpg"))
})
t.Run("issue_361_e", func(t *testing.T) {
assert.Equal(t, "Eaycddvu Aafuur", FileTitle("EaycddvU0AAfuUR.jpg"))
assert.Equal(t, "EaycddvU0AAfuUR", FileTitle("EaycddvU0AAfuUR.jpg"))
})
t.Run("Eigene Bilder 1013/2007/oldies/neumühle", func(t *testing.T) {
// TODO: Normalize strings, see https://godoc.org/golang.org/x/text/unicode/norm
@ -92,9 +92,11 @@ func TestFileTitle(t *testing.T) {
assert.Equal(t, "Neumühle", FileTitle("Neumühle"))
})
t.Run("IQVG4929", func(t *testing.T) {
assert.Equal(t, "", FileTitle("IQVG4929.jpg"))
assert.Equal(t, "IQVG4929", FileTitle("IQVG4929.jpg"))
})
t.Run("IMG_1234", func(t *testing.T) {
assert.Equal(t, "", FileTitle("IMG_1234.jpg"))
})
t.Run("du,-ich,-er, Sie und es", func(t *testing.T) {
assert.Equal(t, "Du, Ich, Er, Sie und Es", FileTitle("du,-ich,-er, Sie und es"))
})
@ -114,7 +116,7 @@ func TestFileTitle(t *testing.T) {
assert.Equal(t, "Boston New Year's", FileTitle("boston new year's"))
})
t.Run("Screenshot", func(t *testing.T) {
assert.Equal(t, "Screenshot", FileTitle("Screenshot 2020-05-04 at 14:25:01.jpeg"))
assert.Equal(t, "Screenshot 2020 05", FileTitle("Screenshot 2020-05-04 at 14:25:01.jpeg"))
})
t.Run("HD", func(t *testing.T) {
assert.Equal(t, "Desktop Nebula HD Wallpapers", FileTitle("Desktop-Nebula-hd-Wallpapers.jpeg"))
@ -125,4 +127,25 @@ func TestFileTitle(t *testing.T) {
t.Run("ImgNonCommercialPics", func(t *testing.T) {
assert.Equal(t, "Non Commercial Pics", FileTitle("Img Non Commercial Pics"))
})
t.Run("Birthday", func(t *testing.T) {
assert.Equal(t, "40th Birthday in Berlin", FileTitle("2024-10-23 40th Birthday in Berlin.jpg"))
})
t.Run("February2nd", func(t *testing.T) {
assert.Equal(t, "February 2nd", FileTitle("2024-10-23 February 2nd.jpg"))
})
t.Run("Boeing737", func(t *testing.T) {
assert.Equal(t, "Boeing", FileTitle("Boeing 737.jpg"))
})
t.Run("Boeing747-8F", func(t *testing.T) {
assert.Equal(t, "Boeing 747 8F", FileTitle("Boeing 747-8F.jpg"))
})
t.Run("Boeing747-100SR", func(t *testing.T) {
assert.Equal(t, "Boeing 747 100SR", FileTitle("Boeing 747-100SR.jpg"))
})
t.Run("Apostrophe", func(t *testing.T) {
assert.Equal(t, "Ma'am", FileTitle("Ma'am"))
})
t.Run("Download", func(t *testing.T) {
assert.Equal(t, "Tourist Attraction Berlin", FileTitle("20170812-185131-Tourist-Attraction-Berlin-2017.jpg"))
})
}

View file

@ -27,6 +27,55 @@ func IsASCII(s string) bool {
return true
}
// IsNumeric tests if the string only starts and ends with an ascii number character.
func IsNumeric(s string) bool {
if s == "" {
return false
}
if s[0] < 48 || s[0] > 57 {
return false
}
if l := len(s); l < 2 {
return false
} else if s[l-1] < 48 || s[l-1] > 57 {
return false
}
return true
}
// IsNumber tests if the string only contains ascii number characters.
func IsNumber(s string) bool {
if s == "" {
return false
}
for i := 0; i < len(s); i++ {
if s[i] < 48 || s[i] > 57 {
return false
}
}
return true
}
// IsDateNumber tests if the string only contains numeric characters, common delimiters like "-" and "_".
func IsDateNumber(s string) bool {
if s == "" {
return false
}
for i := 0; i < len(s); i++ {
if (s[i] < 48 || s[i] > 57) && s[i] != '_' && s[i] != '-' {
return false
}
}
return true
}
// IsLatin reports whether the string only contains latin letters.
func IsLatin(s string) bool {
if s == "" {

View file

@ -68,6 +68,66 @@ func TestIsASCII(t *testing.T) {
})
}
func TestIsNumeric(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsNumeric(""))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsNumeric("123"))
})
t.Run("123.", func(t *testing.T) {
assert.False(t, IsNumeric("123."))
})
t.Run("2024-10-23", func(t *testing.T) {
assert.True(t, IsNumeric("2024-10-23"))
})
t.Run("20200102-204030", func(t *testing.T) {
assert.True(t, IsNumeric("20200102-204030"))
})
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsNumeric("ABC"))
})
}
func TestIsNumber(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsNumber(""))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsNumber("123"))
})
t.Run("123.", func(t *testing.T) {
assert.False(t, IsNumber("123."))
})
t.Run("2024-10-23", func(t *testing.T) {
assert.False(t, IsNumber("2024-10-23"))
})
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsNumber("ABC"))
})
}
func TestIsDateNumber(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
assert.False(t, IsDateNumber(""))
})
t.Run("123", func(t *testing.T) {
assert.True(t, IsDateNumber("123"))
})
t.Run("123.", func(t *testing.T) {
assert.False(t, IsDateNumber("123."))
})
t.Run("2024-10-23", func(t *testing.T) {
assert.True(t, IsDateNumber("2024-10-23"))
})
t.Run("20200102-204030", func(t *testing.T) {
assert.True(t, IsDateNumber("20200102-204030"))
})
t.Run("ABC", func(t *testing.T) {
assert.False(t, IsDateNumber("ABC"))
})
}
func TestIsLatin(t *testing.T) {
t.Run("The quick brown fox.", func(t *testing.T) {
assert.False(t, IsLatin("The quick brown fox."))