Why do we need samples

In an ideal scenario, the emulator generates a video frame and an audio chunk with its internal frame rate. For example, if the emulator runs a game at 60 FPS, it will produce 16 ms worth of audio and a video frame with each tick (or call of the run function). Then we need to send all this data to the user's browser, which becomes tricky with WebRTC audio. The WebRTC standard supports only Opus-encoded audio for high-quality sound. The encoder and decoder (the audio player in the browser) have a limitation: they can only operate on fixed audio frames or predefined chunks of audio, which are 5, 10, 20, 40, or 60 ms in length. Due to this limitation, we have to wait at least two ticks until the first whole audio chunk can be packed into predefined frames. If we have 16 ms of audio and one fixed buffer, we send 10 ms right away and have to wait for 4 ms to add to the remaining 6 ms. This will lead to a constant 6 ms delay between audio and video. To mitigate this issue, we can set the smallest frame size as a buffer, i.e., 5 ms. This will decrease the latency to 1 ms, but we will send 3 packets of data in this manner for 16 ms. A slightly better way is to create several buffers and dynamically select the next buffer so that the audio fits optimally, minimizing the number of network packets sent to users. This frames thing essentially accomplishes that. In the options, we can select multiple (or one) Opus buffers to store audio and choose from. They should be defined from the largest to the smallest. And that's it.
2026-01-23 02:34:42 +00:00 · 2024-12-13 18:57:25 +03:00 · 2024-12-13 18:57:25 +03:00 · 89ae98b035
commit 89ae98b035
parent ed3b195b26
2 changed files with 18 additions and 12 deletions
--- a/pkg/worker/media/buffer.go
+++ b/pkg/worker/media/buffer.go
@ -12,14 +12,14 @@ type buffer struct {
 	frameHz []int

 	raw     samples
-	buckets []Bucket
-	cur     *Bucket
+	buckets []bucket
+	cur     *bucket
 }

-type Bucket struct {
+type bucket struct {
 	mem samples
 	ms  float32
-	lv  int
+	p   int
 	dst int
 }

@ -37,10 +37,14 @@ func newBuffer(frames []float32, hz int) (*buffer, error) {
 	}
 	buf.raw = make(samples, s)

+	if len(buf.raw) == 0 {
+		return nil, errors.New("seems those params are bad and the buffer is 0")
+	}
+
 	next := 0
 	for _, f := range frames {
 		s := frame(hz, f)
-		buf.buckets = append(buf.buckets, Bucket{
+		buf.buckets = append(buf.buckets, bucket{
 			mem: buf.raw[next : next+s],
 			ms:  f,
 		})
@ -62,7 +66,7 @@ func (b *buffer) choose(l int) {
 func (b *buffer) resample(hz int) {
 	b.stretch = true
 	for i := range b.buckets {
-		b.buckets[i].dst = frame(hz, float32(b.buckets[i].ms))
+		b.buckets[i].dst = frame(hz, b.buckets[i].ms)
 	}
 }

@ -76,20 +80,21 @@ func (b *buffer) resample(hz int) {
 // by the length of the written data.
 // In the first case, we won't call the callback, but it will be called every time
 // when the internal buffer overflows until all samples are read.
+// It will choose between multiple internal buffers to fit remaining samples.
 func (b *buffer) write(s samples, onFull func(samples, float32)) (r int) {
 	for r < len(s) {
 		buf := b.cur
-		w := copy(buf.mem[buf.lv:], s[r:])
+		w := copy(buf.mem[buf.p:], s[r:])
 		r += w
-		buf.lv += w
-		if buf.lv == len(buf.mem) {
+		buf.p += w
+		if buf.p == len(buf.mem) {
 			if b.stretch {
 				onFull(buf.mem.stretch(buf.dst), buf.ms)
 			} else {
 				onFull(buf.mem, buf.ms)
 			}
 			b.choose(len(s) - r)
-			b.cur.lv = 0
+			b.cur.p = 0
 		}
 	}
 	return
--- a/pkg/worker/media/media.go
+++ b/pkg/worker/media/media.go
@ -2,13 +2,14 @@ package media

 import (
 	"fmt"
+	"sync"
+	"time"
+
 	"github.com/giongto35/cloud-game/v3/pkg/config"
 	"github.com/giongto35/cloud-game/v3/pkg/encoder"
 	"github.com/giongto35/cloud-game/v3/pkg/encoder/opus"
 	"github.com/giongto35/cloud-game/v3/pkg/logger"
 	"github.com/giongto35/cloud-game/v3/pkg/worker/caged/app"
-	"sync"
-	"time"
 )

 const (