mirror of
https://github.com/juanfont/headscale.git
synced 2026-01-23 02:24:10 +00:00
hscontrol: extract magic numbers to named constants
Define named constants for magic numbers to improve code clarity: - Batcher timeouts and intervals (initialMapSendTimeout, etc.) - Work channel multiplier - OIDC provider init timeout - CSRF token length - SQLite WAL autocheckpoint default
This commit is contained in:
parent
f969745db4
commit
1462847878
6 changed files with 44 additions and 15 deletions
|
|
@ -67,6 +67,9 @@ var (
|
|||
)
|
||||
)
|
||||
|
||||
// oidcProviderInitTimeout is the timeout for initializing the OIDC provider.
|
||||
const oidcProviderInitTimeout = 30 * time.Second
|
||||
|
||||
var (
|
||||
debugDeadlock = envknob.Bool("HEADSCALE_DEBUG_DEADLOCK")
|
||||
debugDeadlockTimeout = envknob.RegisterDuration("HEADSCALE_DEBUG_DEADLOCK_TIMEOUT")
|
||||
|
|
@ -161,7 +164,7 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
|||
|
||||
authProvider = NewAuthProviderWeb(cfg.ServerURL)
|
||||
if cfg.OIDC.Issuer != "" {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), oidcProviderInitTimeout)
|
||||
defer cancel()
|
||||
|
||||
oidcProvider, err := NewAuthProviderOIDC(
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ var (
|
|||
const (
|
||||
// DefaultBusyTimeout is the default busy timeout in milliseconds.
|
||||
DefaultBusyTimeout = 10000
|
||||
// DefaultWALAutocheckpoint is the default WAL autocheckpoint value (number of pages).
|
||||
// SQLite default is 1000 pages.
|
||||
DefaultWALAutocheckpoint = 1000
|
||||
)
|
||||
|
||||
// JournalMode represents SQLite journal_mode pragma values.
|
||||
|
|
@ -310,7 +313,7 @@ func Default(path string) *Config {
|
|||
BusyTimeout: DefaultBusyTimeout,
|
||||
JournalMode: JournalModeWAL,
|
||||
AutoVacuum: AutoVacuumIncremental,
|
||||
WALAutocheckpoint: 1000,
|
||||
WALAutocheckpoint: DefaultWALAutocheckpoint,
|
||||
Synchronous: SynchronousNormal,
|
||||
ForeignKeys: true,
|
||||
TxLock: TxLockImmediate,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,10 @@ var (
|
|||
ErrNodeConnectionNil = errors.New("nodeConnection is nil")
|
||||
)
|
||||
|
||||
// workChannelMultiplier is the multiplier for work channel capacity based on worker count.
|
||||
// The size is arbitrary chosen, the sizing should be revisited.
|
||||
const workChannelMultiplier = 200
|
||||
|
||||
var mapResponseGenerated = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "headscale",
|
||||
Name: "mapresponse_generated_total",
|
||||
|
|
@ -49,8 +53,7 @@ func NewBatcher(batchTime time.Duration, workers int, mapper *mapper) *LockFreeB
|
|||
workers: workers,
|
||||
tick: time.NewTicker(batchTime),
|
||||
|
||||
// The size of this channel is arbitrary chosen, the sizing should be revisited.
|
||||
workCh: make(chan work, workers*200),
|
||||
workCh: make(chan work, workers*workChannelMultiplier),
|
||||
nodes: xsync.NewMap[types.NodeID, *multiChannelNodeConn](),
|
||||
connected: xsync.NewMap[types.NodeID, *time.Time](),
|
||||
pendingChanges: xsync.NewMap[types.NodeID, []change.Change](),
|
||||
|
|
|
|||
|
|
@ -25,6 +25,25 @@ var (
|
|||
ErrConnectionTimeout = errors.New("connection timeout sending to channel (likely stale connection)")
|
||||
)
|
||||
|
||||
// Batcher configuration constants.
|
||||
const (
|
||||
// initialMapSendTimeout is the timeout for sending the initial map response to a new connection.
|
||||
initialMapSendTimeout = 5 * time.Second
|
||||
|
||||
// offlineNodeCleanupThreshold is how long a node must be offline before it's cleaned up.
|
||||
offlineNodeCleanupThreshold = 15 * time.Minute
|
||||
|
||||
// offlineNodeCleanupInterval is the interval between cleanup runs.
|
||||
offlineNodeCleanupInterval = 5 * time.Minute
|
||||
|
||||
// connectionSendTimeout is the timeout for detecting stale connections.
|
||||
// Kept short to quickly detect Docker containers that are forcefully terminated.
|
||||
connectionSendTimeout = 50 * time.Millisecond
|
||||
|
||||
// connectionIDBytes is the number of random bytes used for connection IDs.
|
||||
connectionIDBytes = 8
|
||||
)
|
||||
|
||||
// LockFreeBatcher uses atomic operations and concurrent maps to eliminate mutex contention.
|
||||
type LockFreeBatcher struct {
|
||||
tick *time.Ticker
|
||||
|
|
@ -94,9 +113,9 @@ func (b *LockFreeBatcher) AddNode(id types.NodeID, c chan<- *tailcfg.MapResponse
|
|||
select {
|
||||
case c <- initialMap:
|
||||
// Success
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-time.After(initialMapSendTimeout):
|
||||
log.Error().Uint64("node.id", id.Uint64()).Err(ErrInitialMapTimeout).Msg("Initial map send timeout")
|
||||
log.Debug().Caller().Uint64("node.id", id.Uint64()).Dur("timeout.duration", 5*time.Second).
|
||||
log.Debug().Caller().Uint64("node.id", id.Uint64()).Dur("timeout.duration", initialMapSendTimeout).
|
||||
Msg("Initial map send timed out because channel was blocked or receiver not ready")
|
||||
nodeConn.removeConnectionByChannel(c)
|
||||
|
||||
|
|
@ -187,7 +206,7 @@ func (b *LockFreeBatcher) doWork() {
|
|||
}
|
||||
|
||||
// Create a cleanup ticker for removing truly disconnected nodes
|
||||
cleanupTicker := time.NewTicker(5 * time.Minute)
|
||||
cleanupTicker := time.NewTicker(offlineNodeCleanupInterval)
|
||||
defer cleanupTicker.Stop()
|
||||
|
||||
for {
|
||||
|
|
@ -395,14 +414,13 @@ func (b *LockFreeBatcher) processBatchedChanges() {
|
|||
// cleanupOfflineNodes removes nodes that have been offline for too long to prevent memory leaks.
|
||||
// TODO(kradalby): reevaluate if we want to keep this.
|
||||
func (b *LockFreeBatcher) cleanupOfflineNodes() {
|
||||
cleanupThreshold := 15 * time.Minute
|
||||
now := time.Now()
|
||||
|
||||
var nodesToCleanup []types.NodeID
|
||||
|
||||
// Find nodes that have been offline for too long
|
||||
b.connected.Range(func(nodeID types.NodeID, disconnectTime *time.Time) bool {
|
||||
if disconnectTime != nil && now.Sub(*disconnectTime) > cleanupThreshold {
|
||||
if disconnectTime != nil && now.Sub(*disconnectTime) > offlineNodeCleanupThreshold {
|
||||
// Double-check the node doesn't have active connections
|
||||
if nodeConn, exists := b.nodes.Load(nodeID); exists {
|
||||
if !nodeConn.hasActiveConnections() {
|
||||
|
|
@ -417,7 +435,7 @@ func (b *LockFreeBatcher) cleanupOfflineNodes() {
|
|||
// Clean up the identified nodes
|
||||
for _, nodeID := range nodesToCleanup {
|
||||
log.Info().Uint64("node.id", nodeID.Uint64()).
|
||||
Dur("offline_duration", cleanupThreshold).
|
||||
Dur("offline_duration", offlineNodeCleanupThreshold).
|
||||
Msg("Cleaning up node that has been offline for too long")
|
||||
|
||||
b.nodes.Delete(nodeID)
|
||||
|
|
@ -532,7 +550,7 @@ type multiChannelNodeConn struct {
|
|||
|
||||
// generateConnectionID generates a unique connection identifier.
|
||||
func generateConnectionID() string {
|
||||
bytes := make([]byte, 8)
|
||||
bytes := make([]byte, connectionIDBytes)
|
||||
_, _ = rand.Read(bytes)
|
||||
|
||||
return hex.EncodeToString(bytes)
|
||||
|
|
@ -711,7 +729,7 @@ func (entry *connectionEntry) send(data *tailcfg.MapResponse) error {
|
|||
// Update last used timestamp on successful send
|
||||
entry.lastUsed.Store(time.Now().Unix())
|
||||
return nil
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
case <-time.After(connectionSendTimeout):
|
||||
// Connection is likely stale - client isn't reading from channel
|
||||
// This catches the case where Docker containers are killed but channels remain open
|
||||
return fmt.Errorf("%w: connection %s", ErrConnectionTimeout, entry.id)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ const (
|
|||
defaultOAuthOptionsCount = 3
|
||||
registerCacheExpiration = time.Minute * 15
|
||||
registerCacheCleanup = time.Minute * 20
|
||||
csrfTokenLength = 64
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -614,7 +615,7 @@ func getCookieName(baseName, value string) string {
|
|||
}
|
||||
|
||||
func setCSRFCookie(w http.ResponseWriter, r *http.Request, name string) (string, error) {
|
||||
val, err := util.GenerateRandomStringURLSafe(64)
|
||||
val, err := util.GenerateRandomStringURLSafe(csrfTokenLength)
|
||||
if err != nil {
|
||||
return val, err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,8 @@ const (
|
|||
PKCEMethodPlain string = "plain"
|
||||
PKCEMethodS256 string = "S256"
|
||||
|
||||
defaultNodeStoreBatchSize = 100
|
||||
defaultNodeStoreBatchSize = 100
|
||||
defaultWALAutocheckpoint = 1000 // SQLite default
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -380,7 +381,7 @@ func LoadConfig(path string, isFile bool) error {
|
|||
viper.SetDefault("database.postgres.conn_max_idle_time_secs", 3600)
|
||||
|
||||
viper.SetDefault("database.sqlite.write_ahead_log", true)
|
||||
viper.SetDefault("database.sqlite.wal_autocheckpoint", 1000) // SQLite default
|
||||
viper.SetDefault("database.sqlite.wal_autocheckpoint", defaultWALAutocheckpoint)
|
||||
|
||||
viper.SetDefault("oidc.scope", []string{oidc.ScopeOpenID, "profile", "email"})
|
||||
viper.SetDefault("oidc.only_start_if_oidc_is_available", true)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue