Enhancement: Add system event logging and viewer with M3U/EPG endpoint caching

System Event Logging:
- Add SystemEvent model with 15 event types tracking channel operations, client connections, M3U/EPG activities, and buffering events
- Log detailed metrics for M3U/EPG refresh operations (streams/programs created/updated/deleted)
- Track M3U/EPG downloads with client information (IP address, user agent, profile, channel count)
- Record channel lifecycle events (start, stop, reconnect) with stream and client details
- Monitor client connections/disconnections and buffering events with stream metadata

Event Viewer UI:
- Add SystemEvents component with real-time updates via WebSocket
- Implement pagination, filtering by event type, and configurable auto-refresh
- Display events with color-coded badges and type-specific icons
- Integrate event viewer into Stats page with modal display
- Add event management settings (retention period, refresh rate)

M3U/EPG Endpoint Optimizations:
- Implement content caching with 5-minute TTL to reduce duplicate processing
- Add client-based event deduplication (2-second window) using IP and user agent hashing
- Support HEAD requests for efficient preflight checks
- Cache streamed EPG responses while maintaining streaming behavior for first request
This commit is contained in:
SergeantPanda 2025-11-20 17:41:06 -06:00
parent 204a5a0c76
commit 89a23164ff
18 changed files with 1022 additions and 67 deletions

View file

@ -34,6 +34,10 @@ class ClientManager:
self.heartbeat_interval = ConfigHelper.get('CLIENT_HEARTBEAT_INTERVAL', 10)
self.last_heartbeat_time = {}
# Get ProxyServer instance for ownership checks
from .server import ProxyServer
self.proxy_server = ProxyServer.get_instance()
# Start heartbeat thread for local clients
self._start_heartbeat_thread()
self._registered_clients = set() # Track already registered client IDs
@ -337,16 +341,30 @@ class ClientManager:
self._notify_owner_of_activity()
# Publish client disconnected event
event_data = json.dumps({
"event": EventType.CLIENT_DISCONNECTED, # Use constant instead of string
"channel_id": self.channel_id,
"client_id": client_id,
"worker_id": self.worker_id or "unknown",
"timestamp": time.time(),
"remaining_clients": remaining
})
self.redis_client.publish(RedisKeys.events_channel(self.channel_id), event_data)
# Check if we're the owner - if so, handle locally; if not, publish event
am_i_owner = self.proxy_server and self.proxy_server.am_i_owner(self.channel_id)
if am_i_owner:
# We're the owner - handle the disconnect directly
logger.debug(f"Owner handling CLIENT_DISCONNECTED for client {client_id} locally (not publishing)")
if remaining == 0:
# Trigger shutdown check directly via ProxyServer method
logger.debug(f"No clients left - triggering immediate shutdown check")
# Spawn greenlet to avoid blocking
import gevent
gevent.spawn(self.proxy_server.handle_client_disconnect, self.channel_id)
else:
# We're not the owner - publish event so owner can handle it
logger.debug(f"Non-owner publishing CLIENT_DISCONNECTED event for client {client_id} on channel {self.channel_id} from worker {self.worker_id}")
event_data = json.dumps({
"event": EventType.CLIENT_DISCONNECTED,
"channel_id": self.channel_id,
"client_id": client_id,
"worker_id": self.worker_id or "unknown",
"timestamp": time.time(),
"remaining_clients": remaining
})
self.redis_client.publish(RedisKeys.events_channel(self.channel_id), event_data)
# Trigger channel stats update via WebSocket
self._trigger_stats_update()

View file

@ -19,7 +19,7 @@ import gevent # Add gevent import
from typing import Dict, Optional, Set
from apps.proxy.config import TSConfig as Config
from apps.channels.models import Channel, Stream
from core.utils import RedisClient
from core.utils import RedisClient, log_system_event
from redis.exceptions import ConnectionError, TimeoutError
from .stream_manager import StreamManager
from .stream_buffer import StreamBuffer
@ -194,35 +194,11 @@ class ProxyServer:
self.redis_client.delete(disconnect_key)
elif event_type == EventType.CLIENT_DISCONNECTED:
logger.debug(f"Owner received {EventType.CLIENT_DISCONNECTED} event for channel {channel_id}")
# Check if any clients remain
if channel_id in self.client_managers:
# VERIFY REDIS CLIENT COUNT DIRECTLY
client_set_key = RedisKeys.clients(channel_id)
total = self.redis_client.scard(client_set_key) or 0
if total == 0:
logger.debug(f"No clients left after disconnect event - stopping channel {channel_id}")
# Set the disconnect timer for other workers to see
disconnect_key = RedisKeys.last_client_disconnect(channel_id)
self.redis_client.setex(disconnect_key, 60, str(time.time()))
# Get configured shutdown delay or default
shutdown_delay = ConfigHelper.channel_shutdown_delay()
if shutdown_delay > 0:
logger.info(f"Waiting {shutdown_delay}s before stopping channel...")
gevent.sleep(shutdown_delay) # REPLACE: time.sleep(shutdown_delay)
# Re-check client count before stopping
total = self.redis_client.scard(client_set_key) or 0
if total > 0:
logger.info(f"New clients connected during shutdown delay - aborting shutdown")
self.redis_client.delete(disconnect_key)
return
# Stop the channel directly
self.stop_channel(channel_id)
client_id = data.get("client_id")
worker_id = data.get("worker_id")
logger.debug(f"Owner received {EventType.CLIENT_DISCONNECTED} event for channel {channel_id}, client {client_id} from worker {worker_id}")
# Delegate to dedicated method
self.handle_client_disconnect(channel_id)
elif event_type == EventType.STREAM_SWITCH:
@ -646,6 +622,29 @@ class ProxyServer:
logger.info(f"Created StreamManager for channel {channel_id} with stream ID {channel_stream_id}")
self.stream_managers[channel_id] = stream_manager
# Log channel start event
try:
channel_obj = Channel.objects.get(uuid=channel_id)
# Get stream name if stream_id is available
stream_name = None
if channel_stream_id:
try:
stream_obj = Stream.objects.get(id=channel_stream_id)
stream_name = stream_obj.name
except Exception:
pass
log_system_event(
'channel_start',
channel_id=channel_id,
channel_name=channel_obj.name,
stream_name=stream_name,
stream_id=channel_stream_id
)
except Exception as e:
logger.error(f"Could not log channel start event: {e}")
# Create client manager with channel_id, redis_client AND worker_id (only if not already exists)
if channel_id not in self.client_managers:
client_manager = ClientManager(
@ -800,6 +799,44 @@ class ProxyServer:
logger.error(f"Error cleaning zombie channel {channel_id}: {e}", exc_info=True)
return False
def handle_client_disconnect(self, channel_id):
"""
Handle client disconnect event - check if channel should shut down.
Can be called directly by owner or via PubSub from non-owner workers.
"""
if channel_id not in self.client_managers:
return
try:
# VERIFY REDIS CLIENT COUNT DIRECTLY
client_set_key = RedisKeys.clients(channel_id)
total = self.redis_client.scard(client_set_key) or 0
if total == 0:
logger.debug(f"No clients left after disconnect event - stopping channel {channel_id}")
# Set the disconnect timer for other workers to see
disconnect_key = RedisKeys.last_client_disconnect(channel_id)
self.redis_client.setex(disconnect_key, 60, str(time.time()))
# Get configured shutdown delay or default
shutdown_delay = ConfigHelper.channel_shutdown_delay()
if shutdown_delay > 0:
logger.info(f"Waiting {shutdown_delay}s before stopping channel...")
gevent.sleep(shutdown_delay)
# Re-check client count before stopping
total = self.redis_client.scard(client_set_key) or 0
if total > 0:
logger.info(f"New clients connected during shutdown delay - aborting shutdown")
self.redis_client.delete(disconnect_key)
return
# Stop the channel directly
self.stop_channel(channel_id)
except Exception as e:
logger.error(f"Error handling client disconnect for channel {channel_id}: {e}")
def stop_channel(self, channel_id):
"""Stop a channel with proper ownership handling"""
try:
@ -847,6 +884,41 @@ class ProxyServer:
self.release_ownership(channel_id)
logger.info(f"Released ownership of channel {channel_id}")
# Log channel stop event (after cleanup, before releasing ownership section ends)
try:
channel_obj = Channel.objects.get(uuid=channel_id)
# Calculate runtime and get total bytes from metadata
runtime = None
total_bytes = None
if self.redis_client:
metadata_key = RedisKeys.channel_metadata(channel_id)
metadata = self.redis_client.hgetall(metadata_key)
if metadata:
# Calculate runtime from init_time
if b'init_time' in metadata:
try:
init_time = float(metadata[b'init_time'].decode('utf-8'))
runtime = round(time.time() - init_time, 2)
except Exception:
pass
# Get total bytes transferred
if b'total_bytes' in metadata:
try:
total_bytes = int(metadata[b'total_bytes'].decode('utf-8'))
except Exception:
pass
log_system_event(
'channel_stop',
channel_id=channel_id,
channel_name=channel_obj.name,
runtime=runtime,
total_bytes=total_bytes
)
except Exception as e:
logger.error(f"Could not log channel stop event: {e}")
# Always clean up local resources - WITH SAFE CHECKS
if channel_id in self.stream_managers:
del self.stream_managers[channel_id]
@ -968,6 +1040,13 @@ class ProxyServer:
# If in connecting or waiting_for_clients state, check grace period
if channel_state in [ChannelState.CONNECTING, ChannelState.WAITING_FOR_CLIENTS]:
# Check if channel is already stopping
if self.redis_client:
stop_key = RedisKeys.channel_stopping(channel_id)
if self.redis_client.exists(stop_key):
logger.debug(f"Channel {channel_id} is already stopping - skipping monitor shutdown")
continue
# Get connection_ready_time from metadata (indicates if channel reached ready state)
connection_ready_time = None
if metadata and b'connection_ready_time' in metadata:
@ -1048,6 +1127,13 @@ class ProxyServer:
logger.info(f"Channel {channel_id} activated with {total_clients} clients after grace period")
# If active and no clients, start normal shutdown procedure
elif channel_state not in [ChannelState.CONNECTING, ChannelState.WAITING_FOR_CLIENTS] and total_clients == 0:
# Check if channel is already stopping
if self.redis_client:
stop_key = RedisKeys.channel_stopping(channel_id)
if self.redis_client.exists(stop_key):
logger.debug(f"Channel {channel_id} is already stopping - skipping monitor shutdown")
continue
# Check if there's a pending no-clients timeout
disconnect_key = RedisKeys.last_client_disconnect(channel_id)
disconnect_time = None

View file

@ -14,6 +14,7 @@ from ..server import ProxyServer
from ..redis_keys import RedisKeys
from ..constants import EventType, ChannelState, ChannelMetadataField
from ..url_utils import get_stream_info_for_switch
from core.utils import log_system_event
logger = logging.getLogger("ts_proxy")
@ -598,7 +599,7 @@ class ChannelService:
def _update_stream_stats_in_db(stream_id, **stats):
"""Update stream stats in database"""
from django.db import connection
try:
from apps.channels.models import Stream
from django.utils import timezone
@ -624,7 +625,7 @@ class ChannelService:
except Exception as e:
logger.error(f"Error updating stream stats in database for stream {stream_id}: {e}")
return False
finally:
# Always close database connection after update
try:
@ -700,6 +701,7 @@ class ChannelService:
RedisKeys.events_channel(channel_id),
json.dumps(switch_request)
)
return True
@staticmethod

View file

@ -8,6 +8,8 @@ import logging
import threading
import gevent # Add this import at the top of your file
from apps.proxy.config import TSConfig as Config
from apps.channels.models import Channel
from core.utils import log_system_event
from .server import ProxyServer
from .utils import create_ts_packet, get_logger
from .redis_keys import RedisKeys
@ -88,6 +90,20 @@ class StreamGenerator:
if not self._setup_streaming():
return
# Log client connect event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'client_connect',
channel_id=self.channel_id,
channel_name=channel_obj.name,
client_ip=self.client_ip,
client_id=self.client_id,
user_agent=self.client_user_agent[:100] if self.client_user_agent else None
)
except Exception as e:
logger.error(f"Could not log client connect event: {e}")
# Main streaming loop
for chunk in self._stream_data_generator():
yield chunk
@ -439,6 +455,22 @@ class StreamGenerator:
total_clients = client_manager.get_total_client_count()
logger.info(f"[{self.client_id}] Disconnected after {elapsed:.2f}s (local: {local_clients}, total: {total_clients})")
# Log client disconnect event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'client_disconnect',
channel_id=self.channel_id,
channel_name=channel_obj.name,
client_ip=self.client_ip,
client_id=self.client_id,
user_agent=self.client_user_agent[:100] if self.client_user_agent else None,
duration=round(elapsed, 2),
bytes_sent=self.bytes_sent
)
except Exception as e:
logger.error(f"Could not log client disconnect event: {e}")
# Schedule channel shutdown if no clients left
if not stream_released: # Only if we haven't already released the stream
self._schedule_channel_shutdown_if_needed(local_clients)

View file

@ -16,6 +16,7 @@ from apps.proxy.config import TSConfig as Config
from apps.channels.models import Channel, Stream
from apps.m3u.models import M3UAccount, M3UAccountProfile
from core.models import UserAgent, CoreSettings
from core.utils import log_system_event
from .stream_buffer import StreamBuffer
from .utils import detect_stream_type, get_logger
from .redis_keys import RedisKeys
@ -260,6 +261,20 @@ class StreamManager:
# Store connection start time to measure success duration
connection_start_time = time.time()
# Log reconnection event if this is a retry (not first attempt)
if self.retry_count > 0:
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_reconnect',
channel_id=self.channel_id,
channel_name=channel_obj.name,
attempt=self.retry_count + 1,
max_attempts=self.max_retries
)
except Exception as e:
logger.error(f"Could not log reconnection event: {e}")
# Successfully connected - read stream data until disconnect/error
self._process_stream_data()
# If we get here, the connection was closed/failed
@ -289,6 +304,20 @@ class StreamManager:
if self.retry_count >= self.max_retries:
url_failed = True
logger.warning(f"Maximum retry attempts ({self.max_retries}) reached for URL: {self.url} for channel: {self.channel_id}")
# Log connection error event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_error',
channel_id=self.channel_id,
channel_name=channel_obj.name,
error_type='connection_failed',
url=self.url[:100] if self.url else None,
attempts=self.max_retries
)
except Exception as e:
logger.error(f"Could not log connection error event: {e}")
else:
# Wait with exponential backoff before retrying
timeout = min(.25 * self.retry_count, 3) # Cap at 3 seconds
@ -302,6 +331,21 @@ class StreamManager:
if self.retry_count >= self.max_retries:
url_failed = True
# Log connection error event with exception details
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_error',
channel_id=self.channel_id,
channel_name=channel_obj.name,
error_type='connection_exception',
error_message=str(e)[:200],
url=self.url[:100] if self.url else None,
attempts=self.max_retries
)
except Exception as log_error:
logger.error(f"Could not log connection error event: {log_error}")
else:
# Wait with exponential backoff before retrying
timeout = min(.25 * self.retry_count, 3) # Cap at 3 seconds
@ -702,6 +746,19 @@ class StreamManager:
# Reset buffering state
self.buffering = False
self.buffering_start_time = None
# Log failover event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_failover',
channel_id=self.channel_id,
channel_name=channel_obj.name,
reason='buffering_timeout',
duration=buffering_duration
)
except Exception as e:
logger.error(f"Could not log failover event: {e}")
else:
logger.error(f"Failed to switch to next stream for channel {self.channel_id} after buffering timeout")
else:
@ -709,6 +766,19 @@ class StreamManager:
self.buffering = True
self.buffering_start_time = time.time()
logger.warning(f"Buffering started for channel {self.channel_id} - speed: {ffmpeg_speed}x")
# Log system event for buffering
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_buffering',
channel_id=self.channel_id,
channel_name=channel_obj.name,
speed=ffmpeg_speed
)
except Exception as e:
logger.error(f"Could not log buffering event: {e}")
# Log buffering warning
logger.debug(f"FFmpeg speed on channel {self.channel_id} is below {self.buffering_speed} ({ffmpeg_speed}x) - buffering detected")
# Set channel state to buffering
@ -1004,6 +1074,19 @@ class StreamManager:
except Exception as e:
logger.warning(f"Failed to reset buffer position: {e}")
# Log stream switch event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'stream_switch',
channel_id=self.channel_id,
channel_name=channel_obj.name,
new_url=new_url[:100] if new_url else None,
stream_id=stream_id
)
except Exception as e:
logger.error(f"Could not log stream switch event: {e}")
return True
except Exception as e:
logger.error(f"Error during URL update for channel {self.channel_id}: {e}", exc_info=True)
@ -1122,6 +1205,19 @@ class StreamManager:
if connection_result:
self.connection_start_time = time.time()
logger.info(f"Reconnect successful for channel {self.channel_id}")
# Log reconnection event
try:
channel_obj = Channel.objects.get(uuid=self.channel_id)
log_system_event(
'channel_reconnect',
channel_id=self.channel_id,
channel_name=channel_obj.name,
reason='health_monitor'
)
except Exception as e:
logger.error(f"Could not log reconnection event: {e}")
return True
else:
logger.warning(f"Reconnect failed for channel {self.channel_id}")
@ -1199,25 +1295,17 @@ class StreamManager:
logger.debug(f"Error closing socket for channel {self.channel_id}: {e}")
pass
# Enhanced transcode process cleanup with more aggressive termination
# Enhanced transcode process cleanup with immediate termination
if self.transcode_process:
try:
# First try polite termination
logger.debug(f"Terminating transcode process for channel {self.channel_id}")
self.transcode_process.terminate()
logger.debug(f"Killing transcode process for channel {self.channel_id}")
self.transcode_process.kill()
# Give it a short time to terminate gracefully
# Give it a very short time to die
try:
self.transcode_process.wait(timeout=1.0)
self.transcode_process.wait(timeout=0.5)
except subprocess.TimeoutExpired:
# If it doesn't terminate quickly, kill it
logger.warning(f"Transcode process didn't terminate within timeout, killing forcefully for channel {self.channel_id}")
self.transcode_process.kill()
try:
self.transcode_process.wait(timeout=1.0)
except subprocess.TimeoutExpired:
logger.error(f"Failed to kill transcode process even with force for channel {self.channel_id}")
logger.error(f"Failed to kill transcode process even with force for channel {self.channel_id}")
except Exception as e:
logger.debug(f"Error terminating transcode process for channel {self.channel_id}: {e}")