added aggregated metrics and grafana dashboard

This commit is contained in:
Vittorio Palmisano 2021-03-15 19:26:51 +01:00
parent 530c9ffc11
commit 3551186506
11 changed files with 1967 additions and 245 deletions

3
.gitignore vendored
View file

@ -11,4 +11,5 @@ node_modules/
!/server/certs/mediasoup-demo.localhost.*
.vscode
/app/public/config/*.pem
yarn.lock
yarn.lock
yarn-error.log

View file

@ -9,7 +9,7 @@ chmod +x /usr/local/bin/docker-compose
Starting:
```sh
docker-compose up --build -d
CURRENT_USER=$UID:$GID docker-compose up --build -d
docker-compose logs -f edumeet
```

View file

@ -399,7 +399,7 @@ module.exports =
// Mediasoup settings
mediasoup :
{
numWorkers : Object.keys(os.cpus()).length,
numWorkers : 2, //Object.keys(os.cpus()).length,
// mediasoup Worker settings.
worker :
{
@ -499,7 +499,9 @@ module.exports =
// listen : 'localhost', // exporter listens on this address
numeric : false, // show numeric IP addresses
port : 8889, // allocated port
quiet : false // include fewer labels
quiet : false, // include fewer labels
// aggregated metrics options
period : 5 // update period (seconds)
}
};

View file

@ -0,0 +1,6 @@
- name: 'default'
org_id: 1
folder: ''
type: 'file'
options:
folder: '/var/lib/grafana/dashboards'

File diff suppressed because it is too large Load diff

View file

@ -14,13 +14,11 @@ services:
build: ./edumeet
container_name: edumeet
restart: unless-stopped
user: "${UID}:${GID}"
user: "${CURRENT_USER}"
volumes:
- ${PWD}/..:/edumeet
- ${PWD}/config/edumeet-server-config.js:/edumeet/server/config/config.js:ro
- ${PWD}/config/edumeet-app-config.js:/edumeet/app/public/config/config.js:ro
environment:
- DEBUG="edumeet*,mediasoup*"
network_mode: "host"
extra_hosts:
redis: 172.22.0.2
@ -47,6 +45,8 @@ services:
- ${PWD}/config/nginx.conf:/etc/nginx/conf.d/default.conf:ro
extra_hosts:
edumeet: 172.22.0.1
depends_on:
- edumeet
prometheus:
image: prom/prometheus:latest
@ -96,6 +96,8 @@ services:
- 9091:3000
volumes:
- ./config/grafana-prometheus-datasource.yml:/etc/grafana/provisioning/datasources/prometheus.yml
- ./config/grafana-dashboards.yml:/etc/grafana/provisioning/dashboards/all.yml
- ./config/grafana-dashboards:/var/lib/grafana/dashboards
- ./data/grafana:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_USER=admin

View file

@ -1,8 +1,9 @@
FROM node:14-slim
FROM node:lts-buster-slim
RUN apt-get update && \
apt-get install -y git build-essential python && \
apt-get install -y git build-essential python pkg-config libssl-dev && \
apt-get clean
WORKDIR /edumeet
ENV DEBUG=edumeet*,mediasoup*
RUN npm install -g nodemon && \
npm install -g concurrently
CMD concurrently --names "server,app" "cd server && yarn && nodemon server.js" "cd app && yarn && yarn start"

View file

@ -0,0 +1,247 @@
const promClient = require('prom-client');
const pidusage = require('pidusage');
const Stats = require('fast-stats').Stats;
const Logger = require('../Logger');
const logger = new Logger('metrics:aggregated');
//
module.exports = function(workers, config)
{
const register = new promClient.Registry();
promClient.collectDefaultMetrics({ prefix: 'mediasoup_', register });
const mediasoupStats = {};
const formatStats = (s) =>
{
return {
length: s.length || 0,
sum: s.sum || 0,
mean: s.amean() || 0,
stddev: s.stddev() || 0,
p25: s.percentile(25) || 0,
min: s.min || 0,
max: s.max || 0,
};
};
const collectStats = async () =>
{
logger.info('collectStats');
let workers_cpu = new Stats();
let workers_memory = new Stats();
let video_bitrates_in = new Stats();
let video_bitrates_out = new Stats();
let audio_bitrates_in = new Stats();
let audio_bitrates_out = new Stats();
let packets_counts_in = new Stats();
let packets_losts_in = new Stats();
let round_trip_times_out = new Stats();
let packets_counts_out = new Stats();
let packets_losts_out = new Stats();
let spatial_layers_out = new Stats();
let temporal_layers_out = new Stats();
try {
// iterate workers
for (const worker of workers.values())
{
// worker process stats
const workerStats = await pidusage(worker._pid);
workers_cpu.push(workerStats.cpu / 100);
workers_memory.push(workerStats.memory);
// iterate routers
for (const router of worker._routers.values())
{
// iterate transports
for (const transport of router._transports.values())
{
/* let stats = [];
try
{
stats = await transport.getStats();
}
catch(err)
{
logger.error('transport.getStats error:', err.message);
continue;
}
for (const s of stats)
{
if (s.type !== 'webrtc-transport'){
continue;
}
} */
// iterate producers
for (const producer of transport._producers.values())
{
let stats = [];
try
{
stats = await producer.getStats();
}
catch(err)
{
logger.error('producer.getStats error:', err.message);
continue;
}
for (const s of stats)
{
if (s.type !== 'inbound-rtp')
{
continue;
}
if (s.kind === 'video')
{
video_bitrates_in.push(s.bitrate);
}
else if (s.kind === 'audio')
{
audio_bitrates_in.push(s.bitrate);
}
packets_counts_in.push(s.packetCount || 0);
packets_losts_in.push(s.packetsLost || 0);
}
}
// iterate consumers
for (const consumer of transport._consumers.values())
{
if (consumer.type === 'pipe')
{
continue;
}
let stats = [];
try
{
stats = await consumer.getStats();
}
catch(err)
{
logger.error('consumer.getStats error:', err.message);
continue;
}
for (const s of stats)
{
if(s.type !== 'outbound-rtp'){
continue;
}
if (s.kind === 'video')
{
video_bitrates_out.push(s.bitrate || 0);
spatial_layers_out.push(consumer.currentLayers ? consumer.currentLayers.spatialLayer : 0);
temporal_layers_out.push(consumer.currentLayers ? consumer.currentLayers.temporalLayer : 0);
}
else if(s.kind === 'audio')
{
audio_bitrates_out.push(s.bitrate || 0);
}
round_trip_times_out.push(s.roundTripTime || 0);
packets_counts_out.push(s.packetCount || 0);
packets_losts_out.push(s.packetsLost || 0);
}
}
}
}
}
Object.assign(mediasoupStats, {
workers_cpu: formatStats(workers_cpu),
workers_memory: formatStats(workers_memory),
video_bitrates_in: formatStats(video_bitrates_in),
video_bitrates_out: formatStats(video_bitrates_out),
audio_bitrates_in: formatStats(audio_bitrates_in),
audio_bitrates_out: formatStats(audio_bitrates_out),
round_trip_times_out: formatStats(round_trip_times_out),
packets_counts_in: formatStats(packets_counts_in),
packets_losts_in: formatStats(packets_losts_in),
packets_counts_out: formatStats(packets_counts_out),
packets_losts_out: formatStats(packets_losts_out),
spatial_layers_out: formatStats(spatial_layers_out),
temporal_layers_out: formatStats(temporal_layers_out),
});
}
catch(err)
{
logger.error('collectStats error:', err.message);
}
setTimeout(collectStats, config.period * 1000);
}
collectStats();
// mediasoup metrics
[
{ name: 'workers_cpu', statName: 'workers_cpu', statValue: 'sum' },
{ name: 'workers_memory', statName: 'workers_memory', statValue: 'sum' },
{ name: 'audio_in_count', statName: 'audio_bitrates_in', statValue: 'length' },
{ name: 'audio_bitrates_in_sum', statName: 'audio_bitrates_in', statValue: 'sum' },
{ name: 'audio_bitrates_in_mean', statName: 'audio_bitrates_in', statValue: 'mean' },
{ name: 'audio_bitrates_in_min', statName: 'audio_bitrates_in', statValue: 'min' },
{ name: 'audio_bitrates_in_max', statName: 'audio_bitrates_in', statValue: 'max' },
{ name: 'audio_bitrates_in_p25', statName: 'audio_bitrates_in', statValue: 'p25' },
{ name: 'video_in_count', statName: 'video_bitrates_in', statValue: 'length' },
{ name: 'video_bitrates_in_sum', statName: 'video_bitrates_in', statValue: 'sum' },
{ name: 'video_bitrates_in_mean', statName: 'video_bitrates_in', statValue: 'mean' },
{ name: 'video_bitrates_in_min', statName: 'video_bitrates_in', statValue: 'min' },
{ name: 'video_bitrates_in_max', statName: 'video_bitrates_in', statValue: 'max' },
{ name: 'video_bitrates_in_p25', statName: 'video_bitrates_in', statValue: 'p25' },
{ name: 'audio_out_count', statName: 'audio_bitrates_out', statValue: 'length' },
{ name: 'audio_bitrates_out_sum', statName: 'audio_bitrates_out', statValue: 'sum' },
{ name: 'audio_bitrates_out_mean', statName: 'audio_bitrates_out', statValue: 'mean' },
{ name: 'audio_bitrates_out_min', statName: 'audio_bitrates_out', statValue: 'min' },
{ name: 'audio_bitrates_out_max', statName: 'audio_bitrates_out', statValue: 'max' },
{ name: 'audio_bitrates_out_p25', statName: 'audio_bitrates_out', statValue: 'p25' },
{ name: 'video_out_count', statName: 'video_bitrates_out', statValue: 'length' },
{ name: 'video_bitrates_out_sum', statName: 'video_bitrates_out', statValue: 'sum' },
{ name: 'video_bitrates_out_mean', statName: 'video_bitrates_out', statValue: 'mean' },
{ name: 'video_bitrates_out_min', statName: 'video_bitrates_out', statValue: 'min' },
{ name: 'video_bitrates_out_max', statName: 'video_bitrates_out', statValue: 'max' },
{ name: 'video_bitrates_out_p25', statName: 'video_bitrates_out', statValue: 'p25' },
{ name: 'spatial_layers_out_mean', statName: 'spatial_layers_out', statValue: 'mean' },
{ name: 'spatial_layers_out_min', statName: 'spatial_layers_out', statValue: 'min' },
{ name: 'spatial_layers_out_max', statName: 'spatial_layers_out', statValue: 'max' },
{ name: 'spatial_layers_out_p25', statName: 'spatial_layers_out', statValue: 'p25' },
{ name: 'temporal_layers_out_mean', statName: 'temporal_layers_out', statValue: 'mean' },
{ name: 'temporal_layers_out_min', statName: 'temporal_layers_out', statValue: 'min' },
{ name: 'temporal_layers_out_max', statName: 'temporal_layers_out', statValue: 'max' },
{ name: 'temporal_layers_out_p25', statName: 'temporal_layers_out', statValue: 'p25' },
{ name: 'round_trip_times_out_mean', statName: 'round_trip_times_out', statValue: 'mean' },
{ name: 'round_trip_times_out_min', statName: 'round_trip_times_out', statValue: 'min' },
{ name: 'round_trip_times_out_max', statName: 'round_trip_times_out', statValue: 'max' },
{ name: 'round_trip_times_out_p25', statName: 'round_trip_times_out', statValue: 'p25' },
].forEach(({ name, statName, statValue }) => {
new promClient.Gauge({
name: `mediasoup_${name}`,
help: `MediaSoup ${name}`,
labelNames: [],
registers: [ register ],
collect()
{
this.set({}, mediasoupStats[statName][statValue]);
}
});
});
return register;
}

View file

@ -0,0 +1,235 @@
const { Resolver } = require('dns').promises;
const prom = require('prom-client');
const Logger = require('../Logger');
const logger = new Logger('metrics:default');
const resolver = new Resolver();
const labelNames = [
'pid', 'room_id', 'peer_id', 'display_name', 'user_agent', 'transport_id',
'proto', 'local_addr', 'remote_addr', 'id', 'kind', 'codec', 'type'
];
const metadata = {
'byteCount' : { metricType: prom.Counter, unit: 'bytes' },
'score' : { metricType: prom.Gauge }
};
module.exports = async function(workers, registry)
{
const newMetrics = function(subsystem)
{
const namespace = 'mediasoup';
const metrics = new Map();
for (const key in metadata)
{
if (Object.prototype.hasOwnProperty.call(metadata, key))
{
const value = metadata[key];
const name = key.split(/(?=[A-Z])/).join('_')
.toLowerCase();
const unit = value.unit;
const metricType = value.metricType;
let s = `${namespace}_${subsystem}_${name}`;
if (unit)
{
s += `_${unit}`;
}
const m = new metricType({
name : s, help : `${subsystem}.${key}`, labelNames : labelNames, registers : [ registry ] });
metrics.set(key, m);
}
}
return metrics;
};
const commonLabels = function(both, fn)
{
for (const roomId of rooms.keys())
{
for (const [ peerId, peer ] of peers)
{
if (fn(peer))
{
const displayName = peer._displayName;
const userAgent = peer._socket.client.request.headers['user-agent'];
const kind = both.kind;
const codec = both.rtpParameters.codecs[0].mimeType.split('/')[1];
return { roomId, peerId, displayName, userAgent, kind, codec };
}
}
}
throw new Error('cannot find common labels');
};
const addr = async function(ip, port)
{
if (config.deidentify)
{
const a = ip.split('.');
for (let i = 0; i < a.length - 2; i++)
{
a[i] = 'xx';
}
return `${a.join('.')}:${port}`;
}
else if (config.numeric)
{
return `${ip}:${port}`;
}
else
{
try
{
const a = await resolver.reverse(ip);
ip = a[0];
}
catch (err)
{
logger.error(`reverse DNS query failed: ${ip} ${err.code}`);
}
return `${ip}:${port}`;
}
};
const quiet = function(s)
{
return config.quiet ? '' : s;
};
const setValue = function(key, m, labels, v)
{
logger.debug(`setValue key=${key} v=${v}`);
switch (metadata[key].metricType)
{
case prom.Counter:
m.inc(labels, v);
break;
case prom.Gauge:
m.set(labels, v);
break;
default:
throw new Error(`unexpected metric: ${m}`);
}
};
logger.debug('collect');
const mRooms = new prom.Gauge({ name: 'edumeet_rooms', help: '#rooms', registers: [ registry ] });
mRooms.set(rooms.size);
const mPeers = new prom.Gauge({ name: 'edumeet_peers', help: '#peers', labelNames: [ 'room_id' ], registers: [ registry ] });
for (const [ roomId, room ] of rooms)
{
mPeers.labels(roomId).set(Object.keys(room._peers).length);
}
const mConsumer = newMetrics('consumer');
const mProducer = newMetrics('producer');
for (const [ pid, worker ] of workers)
{
logger.debug(`visiting worker ${pid}`);
for (const router of worker._routers)
{
logger.debug(`visiting router ${router.id}`);
for (const [ transportId, transport ] of router._transports)
{
logger.debug(`visiting transport ${transportId}`);
const transportJson = await transport.dump();
if (transportJson.iceState != 'completed')
{
logger.debug(`skipping transport ${transportId}}: ${transportJson.iceState}`);
continue;
}
const iceSelectedTuple = transportJson.iceSelectedTuple;
const proto = iceSelectedTuple.protocol;
const localAddr = await addr(iceSelectedTuple.localIp,
iceSelectedTuple.localPort);
const remoteAddr = await addr(iceSelectedTuple.remoteIp,
iceSelectedTuple.remotePort);
for (const [ producerId, producer ] of transport._producers)
{
logger.debug(`visiting producer ${producerId}`);
const { roomId, peerId, displayName, userAgent, kind, codec } =
commonLabels(producer, (peer) => peer._producers.has(producerId));
const a = await producer.getStats();
for (const x of a)
{
const type = x.type;
const labels = {
'pid' : pid,
'room_id' : roomId,
'peer_id' : peerId,
'display_name' : displayName,
'user_agent' : userAgent,
'transport_id' : quiet(transportId),
'proto' : proto,
'local_addr' : localAddr,
'remote_addr' : remoteAddr,
'id' : quiet(producerId),
'kind' : kind,
'codec' : codec,
'type' : type
};
for (const [ key, m ] of mProducer)
{
setValue(key, m, labels, x[key]);
}
}
}
for (const [ consumerId, consumer ] of transport._consumers)
{
logger.debug(`visiting consumer ${consumerId}`);
const { roomId, peerId, displayName, userAgent, kind, codec } =
commonLabels(consumer, (peer) => peer._consumers.has(consumerId));
const a = await consumer.getStats();
for (const x of a)
{
if (x.type == 'inbound-rtp')
{
continue;
}
const type = x.type;
const labels =
{
'pid' : pid,
'room_id' : roomId,
'peer_id' : peerId,
'display_name' : displayName,
'user_agent' : userAgent,
'transport_id' : quiet(transportId),
'proto' : proto,
'local_addr' : localAddr,
'remote_addr' : remoteAddr,
'id' : quiet(consumerId),
'kind' : kind,
'codec' : codec,
'type' : type
};
for (const [ key, m ] of mConsumer)
{
setValue(key, m, labels, x[key]);
}
}
}
}
}
}
};

View file

@ -1,244 +1,16 @@
const { Resolver } = require('dns').promises;
const express = require('express');
const mediasoup = require('mediasoup');
const prom = require('prom-client');
const promClient = require('prom-client');
const Logger = require('./Logger');
const collectDefaultMetrics = require('./metrics/default');
const RegisterAggregated = require('./metrics/aggregated');
const logger = new Logger('prom');
const resolver = new Resolver();
const logger = new Logger('promClient');
const workers = new Map();
const labelNames = [
'pid', 'room_id', 'peer_id', 'display_name', 'user_agent', 'transport_id',
'proto', 'local_addr', 'remote_addr', 'id', 'kind', 'codec', 'type'
];
const metadata = {
'byteCount' : { metricType: prom.Counter, unit: 'bytes' },
'score' : { metricType: prom.Gauge }
};
module.exports = async function(rooms, peers, config)
{
const collect = async function(registry)
{
const newMetrics = function(subsystem)
{
const namespace = 'mediasoup';
const metrics = new Map();
for (const key in metadata)
{
if (Object.prototype.hasOwnProperty.call(metadata, key))
{
const value = metadata[key];
const name = key.split(/(?=[A-Z])/).join('_')
.toLowerCase();
const unit = value.unit;
const metricType = value.metricType;
let s = `${namespace}_${subsystem}_${name}`;
if (unit)
{
s += `_${unit}`;
}
const m = new metricType({
name : s, help : `${subsystem}.${key}`, labelNames : labelNames, registers : [ registry ] });
metrics.set(key, m);
}
}
return metrics;
};
const commonLabels = function(both, fn)
{
for (const roomId of rooms.keys())
{
for (const [ peerId, peer ] of peers)
{
if (fn(peer))
{
const displayName = peer._displayName;
const userAgent = peer._socket.client.request.headers['user-agent'];
const kind = both.kind;
const codec = both.rtpParameters.codecs[0].mimeType.split('/')[1];
return { roomId, peerId, displayName, userAgent, kind, codec };
}
}
}
throw new Error('cannot find common labels');
};
const addr = async function(ip, port)
{
if (config.deidentify)
{
const a = ip.split('.');
for (let i = 0; i < a.length - 2; i++)
{
a[i] = 'xx';
}
return `${a.join('.')}:${port}`;
}
else if (config.numeric)
{
return `${ip}:${port}`;
}
else
{
try
{
const a = await resolver.reverse(ip);
ip = a[0];
}
catch (err)
{
logger.error(`reverse DNS query failed: ${ip} ${err.code}`);
}
return `${ip}:${port}`;
}
};
const quiet = function(s)
{
return config.quiet ? '' : s;
};
const setValue = function(key, m, labels, v)
{
logger.debug(`setValue key=${key} v=${v}`);
switch (metadata[key].metricType)
{
case prom.Counter:
m.inc(labels, v);
break;
case prom.Gauge:
m.set(labels, v);
break;
default:
throw new Error(`unexpected metric: ${m}`);
}
};
logger.debug('collect');
const mRooms = new prom.Gauge({ name: 'edumeet_rooms', help: '#rooms', registers: [ registry ] });
mRooms.set(rooms.size);
const mPeers = new prom.Gauge({ name: 'edumeet_peers', help: '#peers', labelNames: [ 'room_id' ], registers: [ registry ] });
for (const [ roomId, room ] of rooms)
{
mPeers.labels(roomId).set(Object.keys(room._peers).length);
}
const mConsumer = newMetrics('consumer');
const mProducer = newMetrics('producer');
for (const [ pid, worker ] of workers)
{
logger.debug(`visiting worker ${pid}`);
for (const router of worker._routers)
{
logger.debug(`visiting router ${router.id}`);
for (const [ transportId, transport ] of router._transports)
{
logger.debug(`visiting transport ${transportId}`);
const transportJson = await transport.dump();
if (transportJson.iceState != 'completed')
{
logger.debug(`skipping transport ${transportId}}: ${transportJson.iceState}`);
continue;
}
const iceSelectedTuple = transportJson.iceSelectedTuple;
const proto = iceSelectedTuple.protocol;
const localAddr = await addr(iceSelectedTuple.localIp,
iceSelectedTuple.localPort);
const remoteAddr = await addr(iceSelectedTuple.remoteIp,
iceSelectedTuple.remotePort);
for (const [ producerId, producer ] of transport._producers)
{
logger.debug(`visiting producer ${producerId}`);
const { roomId, peerId, displayName, userAgent, kind, codec } =
commonLabels(producer, (peer) => peer._producers.has(producerId));
const a = await producer.getStats();
for (const x of a)
{
const type = x.type;
const labels = {
'pid' : pid,
'room_id' : roomId,
'peer_id' : peerId,
'display_name' : displayName,
'user_agent' : userAgent,
'transport_id' : quiet(transportId),
'proto' : proto,
'local_addr' : localAddr,
'remote_addr' : remoteAddr,
'id' : quiet(producerId),
'kind' : kind,
'codec' : codec,
'type' : type
};
for (const [ key, m ] of mProducer)
{
setValue(key, m, labels, x[key]);
}
}
}
for (const [ consumerId, consumer ] of transport._consumers)
{
logger.debug(`visiting consumer ${consumerId}`);
const { roomId, peerId, displayName, userAgent, kind, codec } =
commonLabels(consumer, (peer) => peer._consumers.has(consumerId));
const a = await consumer.getStats();
for (const x of a)
{
if (x.type == 'inbound-rtp')
{
continue;
}
const type = x.type;
const labels =
{
'pid' : pid,
'room_id' : roomId,
'peer_id' : peerId,
'display_name' : displayName,
'user_agent' : userAgent,
'transport_id' : quiet(transportId),
'proto' : proto,
'local_addr' : localAddr,
'remote_addr' : remoteAddr,
'id' : quiet(consumerId),
'kind' : kind,
'codec' : codec,
'type' : type
};
for (const [ key, m ] of mConsumer)
{
setValue(key, m, labels, x[key]);
}
}
}
}
}
}
};
try
{
logger.debug(`config.deidentify=${config.deidentify}`);
@ -260,17 +32,32 @@ module.exports = async function(rooms, peers, config)
const app = express();
app.get('/metrics', async (req, res) =>
// default register
app.get('/', async (req, res) =>
{
logger.debug(`GET ${req.originalUrl}`);
const registry = new prom.Registry();
const registry = new promClient.Registry();
await collect(registry);
await collectDefaultMetrics(workers, registry);
res.set('Content-Type', registry.contentType);
const data = await registry.metrics();
res.end(data);
});
// aggregated register
const registerAggregated = RegisterAggregated(workers, config);
app.get('/metrics', async (req, res) =>
{
logger.debug(`GET ${req.originalUrl}`);
res.set('Content-Type', registerAggregated.contentType);
const data = await registerAggregated.metrics();
res.end(data);
});
const server = app.listen(config.port || 8889,
config.listen || undefined, () =>
{

View file

@ -32,6 +32,7 @@
"express": "^4.17.1",
"express-session": "^1.17.0",
"express-socket.io-session": "^1.3.5",
"fast-stats": "^0.0.6",
"helmet": "^3.21.2",
"ims-lti": "^3.0.2",
"jsonwebtoken": "^8.5.1",
@ -41,7 +42,7 @@
"passport-local": "^1.0.0",
"passport-lti": "0.0.7",
"passport-saml": "^1.3.5",
"pidusage": "^2.0.17",
"pidusage": "^2.0.21",
"prom-client": "^13.1.0",
"redis": "^2.8.0",
"socket.io": "^2.4.0",