mirror of
https://github.com/Dispatcharr/Dispatcharr.git
synced 2026-01-23 02:35:14 +00:00
Switched logging match rest of application.
This commit is contained in:
parent
0843776b6b
commit
1508c2902e
1 changed files with 28 additions and 20 deletions
|
|
@ -4,11 +4,15 @@ import sys
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import sys
|
import logging
|
||||||
|
|
||||||
from rapidfuzz import fuzz
|
from rapidfuzz import fuzz
|
||||||
from sentence_transformers import util
|
from sentence_transformers import util
|
||||||
from sentence_transformers import SentenceTransformer as st
|
from sentence_transformers import SentenceTransformer as st
|
||||||
|
|
||||||
|
# Set up logger
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Load the sentence-transformers model once at the module level
|
# Load the sentence-transformers model once at the module level
|
||||||
SENTENCE_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
SENTENCE_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
||||||
MODEL_PATH = os.path.join("/app", "models", "all-MiniLM-L6-v2")
|
MODEL_PATH = os.path.join("/app", "models", "all-MiniLM-L6-v2")
|
||||||
|
|
@ -18,18 +22,15 @@ BEST_FUZZY_THRESHOLD = 85
|
||||||
LOWER_FUZZY_THRESHOLD = 40
|
LOWER_FUZZY_THRESHOLD = 40
|
||||||
EMBED_SIM_THRESHOLD = 0.65
|
EMBED_SIM_THRESHOLD = 0.65
|
||||||
|
|
||||||
def eprint(*args, **kwargs):
|
|
||||||
print(*args, file=sys.stderr, **kwargs)
|
|
||||||
|
|
||||||
def process_data(input_data):
|
def process_data(input_data):
|
||||||
os.makedirs(MODEL_PATH, exist_ok=True)
|
os.makedirs(MODEL_PATH, exist_ok=True)
|
||||||
|
|
||||||
# If not present locally, download:
|
# If not present locally, download:
|
||||||
if not os.path.exists(os.path.join(MODEL_PATH, "config.json")):
|
if not os.path.exists(os.path.join(MODEL_PATH, "config.json")):
|
||||||
eprint(f"Local model not found in {MODEL_PATH}; downloading from {SENTENCE_MODEL_NAME}...")
|
logger.info(f"Local model not found in {MODEL_PATH}; downloading from {SENTENCE_MODEL_NAME}...")
|
||||||
st_model = st(SENTENCE_MODEL_NAME, cache_folder=MODEL_PATH)
|
st_model = st(SENTENCE_MODEL_NAME, cache_folder=MODEL_PATH)
|
||||||
else:
|
else:
|
||||||
eprint(f"Loading local model from {MODEL_PATH}")
|
logger.info(f"Loading local model from {MODEL_PATH}")
|
||||||
st_model = st(MODEL_PATH)
|
st_model = st(MODEL_PATH)
|
||||||
|
|
||||||
channels = input_data["channels"]
|
channels = input_data["channels"]
|
||||||
|
|
@ -59,7 +60,7 @@ def process_data(input_data):
|
||||||
# Add to matched_channels list so it's counted in the total
|
# Add to matched_channels list so it's counted in the total
|
||||||
matched_channels.append((chan['id'], fallback_name, epg_by_tvg_id["tvg_id"]))
|
matched_channels.append((chan['id'], fallback_name, epg_by_tvg_id["tvg_id"]))
|
||||||
|
|
||||||
eprint(f"Channel {chan['id']} '{fallback_name}' => EPG found by tvg_id={epg_by_tvg_id['tvg_id']}")
|
logger.info(f"Channel {chan['id']} '{fallback_name}' => EPG found by tvg_id={epg_by_tvg_id['tvg_id']}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If channel has a tvg_id that doesn't exist in EPGData, do direct check.
|
# If channel has a tvg_id that doesn't exist in EPGData, do direct check.
|
||||||
|
|
@ -67,15 +68,14 @@ def process_data(input_data):
|
||||||
if chan["tvg_id"]:
|
if chan["tvg_id"]:
|
||||||
epg_match = [epg["id"] for epg in epg_data if epg["tvg_id"] == chan["tvg_id"]]
|
epg_match = [epg["id"] for epg in epg_data if epg["tvg_id"] == chan["tvg_id"]]
|
||||||
if epg_match:
|
if epg_match:
|
||||||
# Fix: Access the first element directly since epg_match contains the IDs themselves
|
chan["epg_data_id"] = epg_match[0]
|
||||||
chan["epg_data_id"] = epg_match[0] # Directly use the integer ID
|
logger.info(f"Channel {chan['id']} '{chan['name']}' => EPG found by tvg_id={chan['tvg_id']}")
|
||||||
eprint(f"Channel {chan['id']} '{chan['name']}' => EPG found by tvg_id={chan['tvg_id']}")
|
|
||||||
channels_to_update.append(chan)
|
channels_to_update.append(chan)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# C) Perform name-based fuzzy matching
|
# C) Perform name-based fuzzy matching
|
||||||
if not chan["norm_chan"]:
|
if not chan["norm_chan"]:
|
||||||
eprint(f"Channel {chan['id']} '{chan['name']}' => empty after normalization, skipping")
|
logger.debug(f"Channel {chan['id']} '{chan['name']}' => empty after normalization, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
best_score = 0
|
best_score = 0
|
||||||
|
|
@ -99,7 +99,7 @@ def process_data(input_data):
|
||||||
bonus = 15
|
bonus = 15
|
||||||
score = base_score + bonus
|
score = base_score + bonus
|
||||||
|
|
||||||
eprint(
|
logger.debug(
|
||||||
f"Channel {chan['id']} '{fallback_name}' => EPG row {row['id']}: "
|
f"Channel {chan['id']} '{fallback_name}' => EPG row {row['id']}: "
|
||||||
f"name='{row['name']}', norm_name='{row['norm_name']}', "
|
f"name='{row['name']}', norm_name='{row['norm_name']}', "
|
||||||
f"combined_text='{combined_text}', dot_regions={dot_regions}, "
|
f"combined_text='{combined_text}', dot_regions={dot_regions}, "
|
||||||
|
|
@ -112,7 +112,7 @@ def process_data(input_data):
|
||||||
|
|
||||||
# If no best match was found, skip
|
# If no best match was found, skip
|
||||||
if not best_epg:
|
if not best_epg:
|
||||||
eprint(f"Channel {chan['id']} '{fallback_name}' => no EPG match at all.")
|
logger.debug(f"Channel {chan['id']} '{fallback_name}' => no EPG match at all.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If best_score is above BEST_FUZZY_THRESHOLD => direct accept
|
# If best_score is above BEST_FUZZY_THRESHOLD => direct accept
|
||||||
|
|
@ -121,7 +121,7 @@ def process_data(input_data):
|
||||||
channels_to_update.append(chan)
|
channels_to_update.append(chan)
|
||||||
|
|
||||||
matched_channels.append((chan['id'], fallback_name, best_epg["tvg_id"]))
|
matched_channels.append((chan['id'], fallback_name, best_epg["tvg_id"]))
|
||||||
eprint(
|
logger.info(
|
||||||
f"Channel {chan['id']} '{fallback_name}' => matched tvg_id={best_epg['tvg_id']} "
|
f"Channel {chan['id']} '{fallback_name}' => matched tvg_id={best_epg['tvg_id']} "
|
||||||
f"(score={best_score})"
|
f"(score={best_score})"
|
||||||
)
|
)
|
||||||
|
|
@ -138,27 +138,35 @@ def process_data(input_data):
|
||||||
channels_to_update.append(chan)
|
channels_to_update.append(chan)
|
||||||
|
|
||||||
matched_channels.append((chan['id'], fallback_name, matched_epg["tvg_id"]))
|
matched_channels.append((chan['id'], fallback_name, matched_epg["tvg_id"]))
|
||||||
eprint(
|
logger.info(
|
||||||
f"Channel {chan['id']} '{fallback_name}' => matched EPG tvg_id={matched_epg['tvg_id']} "
|
f"Channel {chan['id']} '{fallback_name}' => matched EPG tvg_id={matched_epg['tvg_id']} "
|
||||||
f"(fuzzy={best_score}, cos-sim={top_value:.2f})"
|
f"(fuzzy={best_score}, cos-sim={top_value:.2f})"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
eprint(
|
logger.info(
|
||||||
f"Channel {chan['id']} '{fallback_name}' => fuzzy={best_score}, "
|
f"Channel {chan['id']} '{fallback_name}' => fuzzy={best_score}, "
|
||||||
f"cos-sim={top_value:.2f} < {EMBED_SIM_THRESHOLD}, skipping"
|
f"cos-sim={top_value:.2f} < {EMBED_SIM_THRESHOLD}, skipping"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
eprint(
|
# No good match found - fuzzy score is too low
|
||||||
f"Channel {chan['id']} '{fallback_name}' => fuzzy={best_score} < "
|
logger.info(
|
||||||
f"{LOWER_FUZZY_THRESHOLD}, skipping"
|
f"Channel {chan['id']} '{fallback_name}' => best fuzzy match score={best_score} < {LOWER_FUZZY_THRESHOLD}, skipping"
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"channels_to_update": channels_to_update,
|
"channels_to_update": channels_to_update,
|
||||||
"matched_channels": matched_channels,
|
"matched_channels": matched_channels
|
||||||
}
|
}
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
# Configure logging
|
||||||
|
logging_level = os.environ.get('DISPATCHARR_LOG_LEVEL', 'INFO')
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, logging_level),
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
stream=sys.stderr
|
||||||
|
)
|
||||||
|
|
||||||
# Read input data from a file
|
# Read input data from a file
|
||||||
input_file_path = sys.argv[1]
|
input_file_path = sys.argv[1]
|
||||||
with open(input_file_path, 'r') as f:
|
with open(input_file_path, 'r') as f:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue