Enhancement: Add system event logging and viewer with M3U/EPG endpoint caching

System Event Logging: - Add SystemEvent model with 15 event types tracking channel operations, client connections, M3U/EPG activities, and buffering events - Log detailed metrics for M3U/EPG refresh operations (streams/programs created/updated/deleted) - Track M3U/EPG downloads with client information (IP address, user agent, profile, channel count) - Record channel lifecycle events (start, stop, reconnect) with stream and client details - Monitor client connections/disconnections and buffering events with stream metadata Event Viewer UI: - Add SystemEvents component with real-time updates via WebSocket - Implement pagination, filtering by event type, and configurable auto-refresh - Display events with color-coded badges and type-specific icons - Integrate event viewer into Stats page with modal display - Add event management settings (retention period, refresh rate) M3U/EPG Endpoint Optimizations: - Implement content caching with 5-minute TTL to reduce duplicate processing - Add client-based event deduplication (2-second window) using IP and user agent hashing - Support HEAD requests for efficient preflight checks - Cache streamed EPG responses while maintaining streaming behavior for first request
2026-01-23 02:35:14 +00:00 · 2025-11-20 17:41:06 -06:00 · 2025-11-20 17:41:06 -06:00 · 89a23164ff
commit 89a23164ff
parent 204a5a0c76
18 changed files with 1022 additions and 67 deletions
--- a/apps/proxy/ts_proxy/client_manager.py
+++ b/apps/proxy/ts_proxy/client_manager.py
@ -34,6 +34,10 @@ class ClientManager:
        self.heartbeat_interval = ConfigHelper.get('CLIENT_HEARTBEAT_INTERVAL', 10)
        self.last_heartbeat_time = {}

+        # Get ProxyServer instance for ownership checks
+        from .server import ProxyServer
+        self.proxy_server = ProxyServer.get_instance()
+
        # Start heartbeat thread for local clients
        self._start_heartbeat_thread()
        self._registered_clients = set()  # Track already registered client IDs
@ -337,16 +341,30 @@ class ClientManager:

                self._notify_owner_of_activity()

-                # Publish client disconnected event
-                event_data = json.dumps({
-                    "event": EventType.CLIENT_DISCONNECTED,  # Use constant instead of string
-                    "channel_id": self.channel_id,
-                    "client_id": client_id,
-                    "worker_id": self.worker_id or "unknown",
-                    "timestamp": time.time(),
-                    "remaining_clients": remaining
-                })
-                self.redis_client.publish(RedisKeys.events_channel(self.channel_id), event_data)
+                # Check if we're the owner - if so, handle locally; if not, publish event
+                am_i_owner = self.proxy_server and self.proxy_server.am_i_owner(self.channel_id)
+
+                if am_i_owner:
+                    # We're the owner - handle the disconnect directly
+                    logger.debug(f"Owner handling CLIENT_DISCONNECTED for client {client_id} locally (not publishing)")
+                    if remaining == 0:
+                        # Trigger shutdown check directly via ProxyServer method
+                        logger.debug(f"No clients left - triggering immediate shutdown check")
+                        # Spawn greenlet to avoid blocking
+                        import gevent
+                        gevent.spawn(self.proxy_server.handle_client_disconnect, self.channel_id)
+                else:
+                    # We're not the owner - publish event so owner can handle it
+                    logger.debug(f"Non-owner publishing CLIENT_DISCONNECTED event for client {client_id} on channel {self.channel_id} from worker {self.worker_id}")
+                    event_data = json.dumps({
+                        "event": EventType.CLIENT_DISCONNECTED,
+                        "channel_id": self.channel_id,
+                        "client_id": client_id,
+                        "worker_id": self.worker_id or "unknown",
+                        "timestamp": time.time(),
+                        "remaining_clients": remaining
+                    })
+                    self.redis_client.publish(RedisKeys.events_channel(self.channel_id), event_data)

                # Trigger channel stats update via WebSocket
                self._trigger_stats_update()
--- a/apps/proxy/ts_proxy/server.py
+++ b/apps/proxy/ts_proxy/server.py
@ -19,7 +19,7 @@ import gevent  # Add gevent import
 from typing import Dict, Optional, Set
 from apps.proxy.config import TSConfig as Config
 from apps.channels.models import Channel, Stream
-from core.utils import RedisClient
+from core.utils import RedisClient, log_system_event
 from redis.exceptions import ConnectionError, TimeoutError
 from .stream_manager import StreamManager
 from .stream_buffer import StreamBuffer
@ -194,35 +194,11 @@ class ProxyServer:
                                        self.redis_client.delete(disconnect_key)

                                    elif event_type == EventType.CLIENT_DISCONNECTED:
-                                        logger.debug(f"Owner received {EventType.CLIENT_DISCONNECTED} event for channel {channel_id}")
-                                        # Check if any clients remain
-                                        if channel_id in self.client_managers:
-                                            # VERIFY REDIS CLIENT COUNT DIRECTLY
-                                            client_set_key = RedisKeys.clients(channel_id)
-                                            total = self.redis_client.scard(client_set_key) or 0
-
-                                            if total == 0:
-                                                logger.debug(f"No clients left after disconnect event - stopping channel {channel_id}")
-                                                # Set the disconnect timer for other workers to see
-                                                disconnect_key = RedisKeys.last_client_disconnect(channel_id)
-                                                self.redis_client.setex(disconnect_key, 60, str(time.time()))
-
-                                                # Get configured shutdown delay or default
-                                                shutdown_delay = ConfigHelper.channel_shutdown_delay()
-
-                                                if shutdown_delay > 0:
-                                                    logger.info(f"Waiting {shutdown_delay}s before stopping channel...")
-                                                    gevent.sleep(shutdown_delay)  # REPLACE: time.sleep(shutdown_delay)
-
-                                                    # Re-check client count before stopping
-                                                    total = self.redis_client.scard(client_set_key) or 0
-                                                    if total > 0:
-                                                        logger.info(f"New clients connected during shutdown delay - aborting shutdown")
-                                                        self.redis_client.delete(disconnect_key)
-                                                        return
-
-                                                # Stop the channel directly
-                                                self.stop_channel(channel_id)
+                                        client_id = data.get("client_id")
+                                        worker_id = data.get("worker_id")
+                                        logger.debug(f"Owner received {EventType.CLIENT_DISCONNECTED} event for channel {channel_id}, client {client_id} from worker {worker_id}")
+                                        # Delegate to dedicated method
+                                        self.handle_client_disconnect(channel_id)


                                    elif event_type == EventType.STREAM_SWITCH:
@ -646,6 +622,29 @@ class ProxyServer:
            logger.info(f"Created StreamManager for channel {channel_id} with stream ID {channel_stream_id}")
            self.stream_managers[channel_id] = stream_manager

+            # Log channel start event
+            try:
+                channel_obj = Channel.objects.get(uuid=channel_id)
+
+                # Get stream name if stream_id is available
+                stream_name = None
+                if channel_stream_id:
+                    try:
+                        stream_obj = Stream.objects.get(id=channel_stream_id)
+                        stream_name = stream_obj.name
+                    except Exception:
+                        pass
+
+                log_system_event(
+                    'channel_start',
+                    channel_id=channel_id,
+                    channel_name=channel_obj.name,
+                    stream_name=stream_name,
+                    stream_id=channel_stream_id
+                )
+            except Exception as e:
+                logger.error(f"Could not log channel start event: {e}")
+
            # Create client manager with channel_id, redis_client AND worker_id (only if not already exists)
            if channel_id not in self.client_managers:
                client_manager = ClientManager(
@ -800,6 +799,44 @@ class ProxyServer:
            logger.error(f"Error cleaning zombie channel {channel_id}: {e}", exc_info=True)
            return False

+    def handle_client_disconnect(self, channel_id):
+        """
+        Handle client disconnect event - check if channel should shut down.
+        Can be called directly by owner or via PubSub from non-owner workers.
+        """
+        if channel_id not in self.client_managers:
+            return
+
+        try:
+            # VERIFY REDIS CLIENT COUNT DIRECTLY
+            client_set_key = RedisKeys.clients(channel_id)
+            total = self.redis_client.scard(client_set_key) or 0
+
+            if total == 0:
+                logger.debug(f"No clients left after disconnect event - stopping channel {channel_id}")
+                # Set the disconnect timer for other workers to see
+                disconnect_key = RedisKeys.last_client_disconnect(channel_id)
+                self.redis_client.setex(disconnect_key, 60, str(time.time()))
+
+                # Get configured shutdown delay or default
+                shutdown_delay = ConfigHelper.channel_shutdown_delay()
+
+                if shutdown_delay > 0:
+                    logger.info(f"Waiting {shutdown_delay}s before stopping channel...")
+                    gevent.sleep(shutdown_delay)
+
+                    # Re-check client count before stopping
+                    total = self.redis_client.scard(client_set_key) or 0
+                    if total > 0:
+                        logger.info(f"New clients connected during shutdown delay - aborting shutdown")
+                        self.redis_client.delete(disconnect_key)
+                        return
+
+                # Stop the channel directly
+                self.stop_channel(channel_id)
+        except Exception as e:
+            logger.error(f"Error handling client disconnect for channel {channel_id}: {e}")
+
    def stop_channel(self, channel_id):
        """Stop a channel with proper ownership handling"""
        try:
@ -847,6 +884,41 @@ class ProxyServer:
                self.release_ownership(channel_id)
                logger.info(f"Released ownership of channel {channel_id}")

+                # Log channel stop event (after cleanup, before releasing ownership section ends)
+                try:
+                    channel_obj = Channel.objects.get(uuid=channel_id)
+
+                    # Calculate runtime and get total bytes from metadata
+                    runtime = None
+                    total_bytes = None
+                    if self.redis_client:
+                        metadata_key = RedisKeys.channel_metadata(channel_id)
+                        metadata = self.redis_client.hgetall(metadata_key)
+                        if metadata:
+                            # Calculate runtime from init_time
+                            if b'init_time' in metadata:
+                                try:
+                                    init_time = float(metadata[b'init_time'].decode('utf-8'))
+                                    runtime = round(time.time() - init_time, 2)
+                                except Exception:
+                                    pass
+                            # Get total bytes transferred
+                            if b'total_bytes' in metadata:
+                                try:
+                                    total_bytes = int(metadata[b'total_bytes'].decode('utf-8'))
+                                except Exception:
+                                    pass
+
+                    log_system_event(
+                        'channel_stop',
+                        channel_id=channel_id,
+                        channel_name=channel_obj.name,
+                        runtime=runtime,
+                        total_bytes=total_bytes
+                    )
+                except Exception as e:
+                    logger.error(f"Could not log channel stop event: {e}")
+
            # Always clean up local resources - WITH SAFE CHECKS
            if channel_id in self.stream_managers:
                del self.stream_managers[channel_id]
@ -968,6 +1040,13 @@ class ProxyServer:

                            # If in connecting or waiting_for_clients state, check grace period
                            if channel_state in [ChannelState.CONNECTING, ChannelState.WAITING_FOR_CLIENTS]:
+                                # Check if channel is already stopping
+                                if self.redis_client:
+                                    stop_key = RedisKeys.channel_stopping(channel_id)
+                                    if self.redis_client.exists(stop_key):
+                                        logger.debug(f"Channel {channel_id} is already stopping - skipping monitor shutdown")
+                                        continue
+
                                # Get connection_ready_time from metadata (indicates if channel reached ready state)
                                connection_ready_time = None
                                if metadata and b'connection_ready_time' in metadata:
@ -1048,6 +1127,13 @@ class ProxyServer:
                                            logger.info(f"Channel {channel_id} activated with {total_clients} clients after grace period")
                            # If active and no clients, start normal shutdown procedure
                            elif channel_state not in [ChannelState.CONNECTING, ChannelState.WAITING_FOR_CLIENTS] and total_clients == 0:
+                                # Check if channel is already stopping
+                                if self.redis_client:
+                                    stop_key = RedisKeys.channel_stopping(channel_id)
+                                    if self.redis_client.exists(stop_key):
+                                        logger.debug(f"Channel {channel_id} is already stopping - skipping monitor shutdown")
+                                        continue
+
                                # Check if there's a pending no-clients timeout
                                disconnect_key = RedisKeys.last_client_disconnect(channel_id)
                                disconnect_time = None
--- a/apps/proxy/ts_proxy/services/channel_service.py
+++ b/apps/proxy/ts_proxy/services/channel_service.py
@ -14,6 +14,7 @@ from ..server import ProxyServer
 from ..redis_keys import RedisKeys
 from ..constants import EventType, ChannelState, ChannelMetadataField
 from ..url_utils import get_stream_info_for_switch
+from core.utils import log_system_event

 logger = logging.getLogger("ts_proxy")

@ -598,7 +599,7 @@ class ChannelService:
    def _update_stream_stats_in_db(stream_id, **stats):
        """Update stream stats in database"""
        from django.db import connection
-        
+
        try:
            from apps.channels.models import Stream
            from django.utils import timezone
@ -624,7 +625,7 @@ class ChannelService:
        except Exception as e:
            logger.error(f"Error updating stream stats in database for stream {stream_id}: {e}")
            return False
-            
+
        finally:
            # Always close database connection after update
            try:
@ -700,6 +701,7 @@ class ChannelService:
            RedisKeys.events_channel(channel_id),
            json.dumps(switch_request)
        )
+
        return True

    @staticmethod
--- a/apps/proxy/ts_proxy/stream_generator.py
+++ b/apps/proxy/ts_proxy/stream_generator.py
@ -8,6 +8,8 @@ import logging
 import threading
 import gevent  # Add this import at the top of your file
 from apps.proxy.config import TSConfig as Config
+from apps.channels.models import Channel
+from core.utils import log_system_event
 from .server import ProxyServer
 from .utils import create_ts_packet, get_logger
 from .redis_keys import RedisKeys
@ -88,6 +90,20 @@ class StreamGenerator:
            if not self._setup_streaming():
                return

+            # Log client connect event
+            try:
+                channel_obj = Channel.objects.get(uuid=self.channel_id)
+                log_system_event(
+                    'client_connect',
+                    channel_id=self.channel_id,
+                    channel_name=channel_obj.name,
+                    client_ip=self.client_ip,
+                    client_id=self.client_id,
+                    user_agent=self.client_user_agent[:100] if self.client_user_agent else None
+                )
+            except Exception as e:
+                logger.error(f"Could not log client connect event: {e}")
+
            # Main streaming loop
            for chunk in self._stream_data_generator():
                yield chunk
@ -439,6 +455,22 @@ class StreamGenerator:
            total_clients = client_manager.get_total_client_count()
            logger.info(f"[{self.client_id}] Disconnected after {elapsed:.2f}s (local: {local_clients}, total: {total_clients})")

+            # Log client disconnect event
+            try:
+                channel_obj = Channel.objects.get(uuid=self.channel_id)
+                log_system_event(
+                    'client_disconnect',
+                    channel_id=self.channel_id,
+                    channel_name=channel_obj.name,
+                    client_ip=self.client_ip,
+                    client_id=self.client_id,
+                    user_agent=self.client_user_agent[:100] if self.client_user_agent else None,
+                    duration=round(elapsed, 2),
+                    bytes_sent=self.bytes_sent
+                )
+            except Exception as e:
+                logger.error(f"Could not log client disconnect event: {e}")
+
            # Schedule channel shutdown if no clients left
            if not stream_released:  # Only if we haven't already released the stream
                self._schedule_channel_shutdown_if_needed(local_clients)
--- a/apps/proxy/ts_proxy/stream_manager.py
+++ b/apps/proxy/ts_proxy/stream_manager.py
@ -16,6 +16,7 @@ from apps.proxy.config import TSConfig as Config
 from apps.channels.models import Channel, Stream
 from apps.m3u.models import M3UAccount, M3UAccountProfile
 from core.models import UserAgent, CoreSettings
+from core.utils import log_system_event
 from .stream_buffer import StreamBuffer
 from .utils import detect_stream_type, get_logger
 from .redis_keys import RedisKeys
@ -260,6 +261,20 @@ class StreamManager:
                            # Store connection start time to measure success duration
                            connection_start_time = time.time()

+                            # Log reconnection event if this is a retry (not first attempt)
+                            if self.retry_count > 0:
+                                try:
+                                    channel_obj = Channel.objects.get(uuid=self.channel_id)
+                                    log_system_event(
+                                        'channel_reconnect',
+                                        channel_id=self.channel_id,
+                                        channel_name=channel_obj.name,
+                                        attempt=self.retry_count + 1,
+                                        max_attempts=self.max_retries
+                                    )
+                                except Exception as e:
+                                    logger.error(f"Could not log reconnection event: {e}")
+
                            # Successfully connected - read stream data until disconnect/error
                            self._process_stream_data()
                            # If we get here, the connection was closed/failed
@ -289,6 +304,20 @@ class StreamManager:
                        if self.retry_count >= self.max_retries:
                            url_failed = True
                            logger.warning(f"Maximum retry attempts ({self.max_retries}) reached for URL: {self.url} for channel: {self.channel_id}")
+
+                            # Log connection error event
+                            try:
+                                channel_obj = Channel.objects.get(uuid=self.channel_id)
+                                log_system_event(
+                                    'channel_error',
+                                    channel_id=self.channel_id,
+                                    channel_name=channel_obj.name,
+                                    error_type='connection_failed',
+                                    url=self.url[:100] if self.url else None,
+                                    attempts=self.max_retries
+                                )
+                            except Exception as e:
+                                logger.error(f"Could not log connection error event: {e}")
                        else:
                            # Wait with exponential backoff before retrying
                            timeout = min(.25 * self.retry_count, 3)  # Cap at 3 seconds
@ -302,6 +331,21 @@ class StreamManager:

                        if self.retry_count >= self.max_retries:
                            url_failed = True
+
+                            # Log connection error event with exception details
+                            try:
+                                channel_obj = Channel.objects.get(uuid=self.channel_id)
+                                log_system_event(
+                                    'channel_error',
+                                    channel_id=self.channel_id,
+                                    channel_name=channel_obj.name,
+                                    error_type='connection_exception',
+                                    error_message=str(e)[:200],
+                                    url=self.url[:100] if self.url else None,
+                                    attempts=self.max_retries
+                                )
+                            except Exception as log_error:
+                                logger.error(f"Could not log connection error event: {log_error}")
                        else:
                            # Wait with exponential backoff before retrying
                            timeout = min(.25 * self.retry_count, 3)  # Cap at 3 seconds
@ -702,6 +746,19 @@ class StreamManager:
                                # Reset buffering state
                                self.buffering = False
                                self.buffering_start_time = None
+
+                                # Log failover event
+                                try:
+                                    channel_obj = Channel.objects.get(uuid=self.channel_id)
+                                    log_system_event(
+                                        'channel_failover',
+                                        channel_id=self.channel_id,
+                                        channel_name=channel_obj.name,
+                                        reason='buffering_timeout',
+                                        duration=buffering_duration
+                                    )
+                                except Exception as e:
+                                    logger.error(f"Could not log failover event: {e}")
                            else:
                                logger.error(f"Failed to switch to next stream for channel {self.channel_id} after buffering timeout")
                else:
@ -709,6 +766,19 @@ class StreamManager:
                    self.buffering = True
                    self.buffering_start_time = time.time()
                    logger.warning(f"Buffering started for channel {self.channel_id} - speed: {ffmpeg_speed}x")
+
+                    # Log system event for buffering
+                    try:
+                        channel_obj = Channel.objects.get(uuid=self.channel_id)
+                        log_system_event(
+                            'channel_buffering',
+                            channel_id=self.channel_id,
+                            channel_name=channel_obj.name,
+                            speed=ffmpeg_speed
+                        )
+                    except Exception as e:
+                        logger.error(f"Could not log buffering event: {e}")
+
                # Log buffering warning
                logger.debug(f"FFmpeg speed on channel {self.channel_id} is below {self.buffering_speed} ({ffmpeg_speed}x) - buffering detected")
                # Set channel state to buffering
@ -1004,6 +1074,19 @@ class StreamManager:
                except Exception as e:
                    logger.warning(f"Failed to reset buffer position: {e}")

+            # Log stream switch event
+            try:
+                channel_obj = Channel.objects.get(uuid=self.channel_id)
+                log_system_event(
+                    'stream_switch',
+                    channel_id=self.channel_id,
+                    channel_name=channel_obj.name,
+                    new_url=new_url[:100] if new_url else None,
+                    stream_id=stream_id
+                )
+            except Exception as e:
+                logger.error(f"Could not log stream switch event: {e}")
+
            return True
        except Exception as e:
            logger.error(f"Error during URL update for channel {self.channel_id}: {e}", exc_info=True)
@ -1122,6 +1205,19 @@ class StreamManager:
                if connection_result:
                    self.connection_start_time = time.time()
                    logger.info(f"Reconnect successful for channel {self.channel_id}")
+
+                    # Log reconnection event
+                    try:
+                        channel_obj = Channel.objects.get(uuid=self.channel_id)
+                        log_system_event(
+                            'channel_reconnect',
+                            channel_id=self.channel_id,
+                            channel_name=channel_obj.name,
+                            reason='health_monitor'
+                        )
+                    except Exception as e:
+                        logger.error(f"Could not log reconnection event: {e}")
+
                    return True
                else:
                    logger.warning(f"Reconnect failed for channel {self.channel_id}")
@ -1199,25 +1295,17 @@ class StreamManager:
                logger.debug(f"Error closing socket for channel {self.channel_id}: {e}")
                pass

-        # Enhanced transcode process cleanup with more aggressive termination
+        # Enhanced transcode process cleanup with immediate termination
        if self.transcode_process:
            try:
-                # First try polite termination
-                logger.debug(f"Terminating transcode process for channel {self.channel_id}")
-                self.transcode_process.terminate()
+                logger.debug(f"Killing transcode process for channel {self.channel_id}")
+                self.transcode_process.kill()

-                # Give it a short time to terminate gracefully
+                # Give it a very short time to die
                try:
-                    self.transcode_process.wait(timeout=1.0)
+                    self.transcode_process.wait(timeout=0.5)
                except subprocess.TimeoutExpired:
-                    # If it doesn't terminate quickly, kill it
-                    logger.warning(f"Transcode process didn't terminate within timeout, killing forcefully for channel {self.channel_id}")
-                    self.transcode_process.kill()
-
-                    try:
-                        self.transcode_process.wait(timeout=1.0)
-                    except subprocess.TimeoutExpired:
-                        logger.error(f"Failed to kill transcode process even with force for channel {self.channel_id}")
+                    logger.error(f"Failed to kill transcode process even with force for channel {self.channel_id}")
            except Exception as e:
                logger.debug(f"Error terminating transcode process for channel {self.channel_id}: {e}")