headscale/hscontrol/db/ephemeral_garbage_collector_test.go
Kristoffer Dalby b36438bf90 all: disable thelper linter and clean up nolint comments
Disable the thelper linter which triggers on inline test closures in
table-driven tests. These closures are intentionally not standalone
helpers and don't benefit from t.Helper().

Also remove explanatory comments from nolint directives throughout the
codebase as they add noise without providing significant value.
2026-01-21 15:56:57 +00:00

436 lines
12 KiB
Go

package db
import (
"runtime"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/stretchr/testify/assert"
)
const (
fiveHundred = 500 * time.Millisecond
oneHundred = 100 * time.Millisecond
fifty = 50 * time.Millisecond
)
// TestEphemeralGarbageCollectorGoRoutineLeak is a test for a goroutine leak in EphemeralGarbageCollector().
// It creates a new EphemeralGarbageCollector, schedules several nodes for deletion with a short expiry,
// and verifies that the nodes are deleted when the expiry time passes, and then
// for any leaked goroutines after the garbage collector is closed.
func TestEphemeralGarbageCollectorGoRoutineLeak(t *testing.T) {
// Count goroutines at the start
initialGoroutines := runtime.NumGoroutine()
t.Logf("Initial number of goroutines: %d", initialGoroutines)
// Basic deletion tracking mechanism
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
deletionWg sync.WaitGroup
)
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
deletionWg.Done()
}
// Start the GC
gc := NewEphemeralGarbageCollector(deleteFunc)
go gc.Start()
// Schedule several nodes for deletion with short expiry
const (
expiry = fifty
numNodes = 100
)
// Set up wait group for expected deletions
deletionWg.Add(numNodes)
for i := 1; i <= numNodes; i++ {
gc.Schedule(types.NodeID(i), expiry) //nolint:gosec
}
// Wait for all scheduled deletions to complete
deletionWg.Wait()
// Check nodes are deleted
deleteMutex.Lock()
assert.Len(t, deletedIDs, numNodes, "Not all nodes were deleted")
deleteMutex.Unlock()
// Schedule and immediately cancel to test that part of the code
for i := numNodes + 1; i <= numNodes*2; i++ {
nodeID := types.NodeID(i) //nolint:gosec
gc.Schedule(nodeID, time.Hour)
gc.Cancel(nodeID)
}
// Close GC
gc.Close()
// Wait for goroutines to clean up and verify no leaks
assert.EventuallyWithT(t, func(c *assert.CollectT) {
finalGoroutines := runtime.NumGoroutine()
// NB: We have to allow for a small number of extra goroutines because of test itself
assert.LessOrEqual(c, finalGoroutines, initialGoroutines+5,
"There are significantly more goroutines after GC usage, which suggests a leak")
}, time.Second, 10*time.Millisecond, "goroutines should clean up after GC close")
t.Logf("Final number of goroutines: %d", runtime.NumGoroutine())
}
// TestEphemeralGarbageCollectorReschedule is a test for the rescheduling of nodes in EphemeralGarbageCollector().
// It creates a new EphemeralGarbageCollector, schedules a node for deletion with a longer expiry,
// and then reschedules it with a shorter expiry, and verifies that the node is deleted only once.
func TestEphemeralGarbageCollectorReschedule(t *testing.T) {
// Deletion tracking mechanism
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
)
deletionNotifier := make(chan types.NodeID, 1)
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
deletionNotifier <- nodeID
}
// Start GC
gc := NewEphemeralGarbageCollector(deleteFunc)
go gc.Start()
defer gc.Close()
const (
shortExpiry = fifty
longExpiry = 1 * time.Hour
)
nodeID := types.NodeID(1)
// Schedule node for deletion with long expiry
gc.Schedule(nodeID, longExpiry)
// Reschedule the same node with a shorter expiry
gc.Schedule(nodeID, shortExpiry)
// Wait for deletion notification with timeout
select {
case deletedNodeID := <-deletionNotifier:
assert.Equal(t, nodeID, deletedNodeID, "The correct node should be deleted")
case <-time.After(time.Second):
t.Fatal("Timed out waiting for node deletion")
}
// Verify that the node was deleted exactly once
deleteMutex.Lock()
assert.Len(t, deletedIDs, 1, "Node should be deleted exactly once")
assert.Equal(t, nodeID, deletedIDs[0], "The correct node should be deleted")
deleteMutex.Unlock()
}
// TestEphemeralGarbageCollectorCancelAndReschedule is a test for the cancellation and rescheduling of nodes in EphemeralGarbageCollector().
// It creates a new EphemeralGarbageCollector, schedules a node for deletion, cancels it, and then reschedules it,
// and verifies that the node is deleted only once.
func TestEphemeralGarbageCollectorCancelAndReschedule(t *testing.T) {
// Deletion tracking mechanism
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
)
deletionNotifier := make(chan types.NodeID, 1)
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
deletionNotifier <- nodeID
}
// Start the GC
gc := NewEphemeralGarbageCollector(deleteFunc)
go gc.Start()
defer gc.Close()
nodeID := types.NodeID(1)
const expiry = fifty
// Schedule node for deletion
gc.Schedule(nodeID, expiry)
// Cancel the scheduled deletion
gc.Cancel(nodeID)
// Use a timeout to verify no deletion occurred
select {
case <-deletionNotifier:
t.Fatal("Node was deleted after cancellation")
case <-time.After(expiry * 2): // Still need a timeout for negative test
// This is expected - no deletion should occur
}
deleteMutex.Lock()
assert.Empty(t, deletedIDs, "Node should not be deleted after cancellation")
deleteMutex.Unlock()
// Reschedule the node
gc.Schedule(nodeID, expiry)
// Wait for deletion with timeout
select {
case deletedNodeID := <-deletionNotifier:
// Verify the correct node was deleted
assert.Equal(t, nodeID, deletedNodeID, "The correct node should be deleted")
case <-time.After(time.Second): // Longer timeout as a safety net
t.Fatal("Timed out waiting for node deletion")
}
// Verify final state
deleteMutex.Lock()
assert.Len(t, deletedIDs, 1, "Node should be deleted after rescheduling")
assert.Equal(t, nodeID, deletedIDs[0], "The correct node should be deleted")
deleteMutex.Unlock()
}
// TestEphemeralGarbageCollectorCloseBeforeTimerFires is a test for the closing of the EphemeralGarbageCollector before the timer fires.
// It creates a new EphemeralGarbageCollector, schedules a node for deletion, closes the GC, and verifies that the node is not deleted.
func TestEphemeralGarbageCollectorCloseBeforeTimerFires(t *testing.T) {
// Deletion tracking
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
)
deletionNotifier := make(chan types.NodeID, 1)
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
deletionNotifier <- nodeID
}
// Start the GC
gc := NewEphemeralGarbageCollector(deleteFunc)
go gc.Start()
const (
longExpiry = 1 * time.Hour
shortWait = fifty * 2
)
// Schedule node deletion with a long expiry
gc.Schedule(types.NodeID(1), longExpiry)
// Close the GC before the timer
gc.Close()
// Verify that no deletion occurred within a reasonable time
select {
case <-deletionNotifier:
t.Fatal("Node was deleted after GC was closed, which should not happen")
case <-time.After(shortWait):
// Expected: no deletion should occur
}
// Verify that no deletion occurred
deleteMutex.Lock()
assert.Empty(t, deletedIDs, "No node should be deleted when GC is closed before timer fires")
deleteMutex.Unlock()
}
// TestEphemeralGarbageCollectorScheduleAfterClose verifies that calling Schedule after Close
// is a no-op and doesn't cause any panics, goroutine leaks, or other issues.
func TestEphemeralGarbageCollectorScheduleAfterClose(t *testing.T) {
// Count initial goroutines to check for leaks
initialGoroutines := runtime.NumGoroutine()
t.Logf("Initial number of goroutines: %d", initialGoroutines)
// Deletion tracking
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
)
nodeDeleted := make(chan struct{})
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
close(nodeDeleted) // Signal that deletion happened
}
// Start new GC
gc := NewEphemeralGarbageCollector(deleteFunc)
// Use a WaitGroup to ensure the GC has started
var startWg sync.WaitGroup
startWg.Add(1)
go func() {
startWg.Done() // Signal that the goroutine has started
gc.Start()
}()
startWg.Wait() // Wait for the GC to start
// Close GC right away
gc.Close()
// Now try to schedule node for deletion with a very short expiry
// If the Schedule operation incorrectly creates a timer, it would fire quickly
nodeID := types.NodeID(1)
gc.Schedule(nodeID, 1*time.Millisecond)
// Check if any node was deleted (which shouldn't happen)
// Use timeout to wait for potential deletion
select {
case <-nodeDeleted:
t.Fatal("Node was deleted after GC was closed, which should not happen")
case <-time.After(fiveHundred):
// This is the expected path - no deletion should occur
}
// Check no node was deleted
deleteMutex.Lock()
nodesDeleted := len(deletedIDs)
deleteMutex.Unlock()
assert.Equal(t, 0, nodesDeleted, "No nodes should be deleted when Schedule is called after Close")
// Check for goroutine leaks after GC is fully closed
assert.EventuallyWithT(t, func(c *assert.CollectT) {
finalGoroutines := runtime.NumGoroutine()
// Allow for small fluctuations in goroutine count for testing routines etc
assert.LessOrEqual(c, finalGoroutines, initialGoroutines+2,
"There should be no significant goroutine leaks when Schedule is called after Close")
}, time.Second, 10*time.Millisecond, "goroutines should clean up after GC close")
t.Logf("Final number of goroutines: %d", runtime.NumGoroutine())
}
// TestEphemeralGarbageCollectorConcurrentScheduleAndClose tests the behavior of the garbage collector
// when Schedule and Close are called concurrently from multiple goroutines.
func TestEphemeralGarbageCollectorConcurrentScheduleAndClose(t *testing.T) {
// Count initial goroutines
initialGoroutines := runtime.NumGoroutine()
t.Logf("Initial number of goroutines: %d", initialGoroutines)
// Deletion tracking mechanism
var (
deletedIDs []types.NodeID
deleteMutex sync.Mutex
)
deleteFunc := func(nodeID types.NodeID) {
deleteMutex.Lock()
deletedIDs = append(deletedIDs, nodeID)
deleteMutex.Unlock()
}
// Start the GC
gc := NewEphemeralGarbageCollector(deleteFunc)
go gc.Start()
// Number of concurrent scheduling goroutines
const (
numSchedulers = 10
nodesPerScheduler = 50
)
const closeAfterNodes = 25 // Close GC after this many nodes per scheduler
// Use WaitGroup to wait for all scheduling goroutines to finish
var wg sync.WaitGroup
wg.Add(numSchedulers + 1) // +1 for the closer goroutine
// Create a stopper channel to signal scheduling goroutines to stop
stopScheduling := make(chan struct{})
// Track how many nodes have been scheduled
var scheduledCount int64
// Launch goroutines that continuously schedule nodes
for schedulerIndex := range numSchedulers {
go func(schedulerID int) {
defer wg.Done()
baseNodeID := schedulerID * nodesPerScheduler
// Keep scheduling nodes until signaled to stop
for j := range nodesPerScheduler {
select {
case <-stopScheduling:
return
default:
nodeID := types.NodeID(baseNodeID + j + 1) //nolint:gosec
gc.Schedule(nodeID, 1*time.Hour) // Long expiry to ensure it doesn't trigger during test
atomic.AddInt64(&scheduledCount, 1)
// Yield to other goroutines to introduce variability
runtime.Gosched()
}
}
}(schedulerIndex)
}
// Close the garbage collector after some nodes have been scheduled
go func() {
defer wg.Done()
// Wait until enough nodes have been scheduled
for atomic.LoadInt64(&scheduledCount) < int64(numSchedulers*closeAfterNodes) {
runtime.Gosched()
}
// Close GC
gc.Close()
// Signal schedulers to stop
close(stopScheduling)
}()
// Wait for all goroutines to complete
wg.Wait()
// Check for leaks using EventuallyWithT
assert.EventuallyWithT(t, func(c *assert.CollectT) {
finalGoroutines := runtime.NumGoroutine()
// Allow for a reasonable small variable routine count due to testing
assert.LessOrEqual(c, finalGoroutines, initialGoroutines+5,
"There should be no significant goroutine leaks during concurrent Schedule and Close operations")
}, time.Second, 10*time.Millisecond, "goroutines should clean up")
t.Logf("Final number of goroutines: %d", runtime.NumGoroutine())
}