From 5cb3395694e1a77df70656a958c0815dee724798 Mon Sep 17 00:00:00 2001 From: alex Date: Fri, 7 Nov 2025 21:30:39 +0100 Subject: [PATCH] cleaned bottom references --- docker-compose/METRICS.md | 12 +- docker-compose/README.md | 34 +-- .../docker-compose-timescale.yml.ko | 61 ----- .../dashboards/bottom-overview.json | 4 +- .../provisioning/dashboards/dashboards.yml | 2 +- .../{bottom_rules.yml => symon_rules.yml} | 2 +- docker-compose/test-stack.sh | 4 +- ...ses-example.toml => processes-example.toml | 4 +- src/collector.rs | 237 +++++++++++++----- src/config.rs | 10 + src/exporter.rs | 77 ++++++ ...-example.toml => symon-config-example.toml | 0 symon.toml | 8 +- 13 files changed, 302 insertions(+), 153 deletions(-) delete mode 100644 docker-compose/docker-compose-timescale.yml.ko rename docker-compose/rules/{bottom_rules.yml => symon_rules.yml} (93%) rename docker-compose/processes-example.toml => processes-example.toml (94%) rename docker-compose/symon-config-example.toml => symon-config-example.toml (100%) diff --git a/docker-compose/METRICS.md b/docker-compose/METRICS.md index 266eafd..8db82b0 100644 --- a/docker-compose/METRICS.md +++ b/docker-compose/METRICS.md @@ -1,6 +1,6 @@ -# Bottom OpenTelemetry Metrics Reference +# Symon OpenTelemetry Metrics Reference -This document lists all metrics exported by Bottom when running with the `opentelemetry` feature enabled. +This document lists all metrics exported by Symon when running with the `opentelemetry` feature enabled. ## System Metrics @@ -106,7 +106,7 @@ sum(system_process_memory_usage_bytes{name=~".*chrome.*"}) ## Recording Rules -The following recording rules are pre-configured in Prometheus (see `rules/bottom_rules.yml`): +The following recording rules are pre-configured in Prometheus (see `rules/Symon_rules.yml`): | Rule Name | Expression | Description | |-----------|------------|-------------| @@ -166,7 +166,7 @@ system_network_rx_bytes_rate > 10485760 ```yaml groups: - - name: bottom_alerts + - name: Symon_alerts interval: 30s rules: - alert: HighCPUUsage @@ -208,8 +208,8 @@ groups: | `sensor` | Temperature | Temperature sensor name | | `name` | Process metrics | Process name | | `pid` | Process metrics | Process ID | -| `exported_job` | All | Always "bottom-system-monitor" | -| `otel_scope_name` | All | Always "bottom-system-monitor" | +| `exported_job` | All | Always "Symon-system-monitor" | +| `otel_scope_name` | All | Always "Symon-system-monitor" | ## Data Retention diff --git a/docker-compose/README.md b/docker-compose/README.md index 1bdc293..045c01c 100644 --- a/docker-compose/README.md +++ b/docker-compose/README.md @@ -1,17 +1,17 @@ -# Bottom OpenTelemetry Docker Compose Setup +# Symon OpenTelemetry Docker Compose Setup -This directory contains a Docker Compose setup for running an observability stack to monitor Bottom with OpenTelemetry. +This directory contains a Docker Compose setup for running an observability stack to monitor Symon with OpenTelemetry. ## Architecture The stack includes: -1. **OpenTelemetry Collector** - Receives metrics from Bottom via OTLP protocol +1. **OpenTelemetry Collector** - Receives metrics from Symon via OTLP protocol 2. **Prometheus** - Scrapes and stores metrics from the OTEL Collector 3. **Grafana** - Visualizes metrics from Prometheus ``` -Bottom (with --headless flag) +Symon (with --headless flag) ↓ (OTLP/gRPC on port 4317) OpenTelemetry Collector ↓ (Prometheus scrape on port 8889) @@ -34,7 +34,7 @@ This will start: - Prometheus on port 9090 - Grafana on port 3000 -### 2. Build Bottom with OpenTelemetry support +### 2. Build Symon with OpenTelemetry support ```bash cd .. @@ -43,13 +43,13 @@ cargo build --release --features opentelemetry ### 3. Create a configuration file -Create a `bottom-config.toml` file: +Create a `Symon-config.toml` file: ```toml [opentelemetry] enabled = true endpoint = "http://localhost:4317" -service_name = "bottom-system-monitor" +service_name = "Symon-system-monitor" export_interval_ms = 5000 [opentelemetry.metrics] @@ -62,10 +62,10 @@ temperature = true gpu = true ``` -### 4. Run Bottom in headless mode +### 4. Run Symon in headless mode ```bash -./target/release/btm --config bottom-config.toml --headless +./target/release/btm --config Symon-config.toml --headless ``` Or without config file: @@ -93,11 +93,11 @@ Configures the OpenTelemetry Collector to: Configures Prometheus to: - Scrape metrics from the OTEL Collector every 10 seconds -- Load alerting rules from `rules/bottom_rules.yml` +- Load alerting rules from `rules/Symon_rules.yml` -### rules/bottom_rules.yml +### rules/Symon_rules.yml -Contains Prometheus recording rules for Bottom metrics, including: +Contains Prometheus recording rules for Symon metrics, including: - Recent process CPU usage metrics - Recent process memory usage metrics @@ -132,11 +132,11 @@ topk(10, system_process_memory_usage_bytes) Grafana is automatically configured with: - **Prometheus data source** (http://prometheus:9090) - pre-configured -- **Bottom System Overview dashboard** - pre-loaded +- **Symon System Overview dashboard** - pre-loaded To access: 1. Go to http://localhost:3000 (username: `admin`, password: `admin`) -2. Navigate to Dashboards → Browse → "Bottom System Overview" +2. Navigate to Dashboards → Browse → "Symon System Overview" The dashboard includes: - CPU usage by core @@ -160,7 +160,7 @@ docker-compose down -v ## Troubleshooting -### Bottom not sending metrics +### Symon not sending metrics Check the OTEL Collector logs: ```bash @@ -178,7 +178,7 @@ You should see messages about receiving metrics. 1. Verify Prometheus data source is configured correctly 2. Check that Prometheus has data by querying directly -3. Ensure your time range in Grafana includes when Bottom was running +3. Ensure your time range in Grafana includes when Symon was running ## Advanced Configuration @@ -188,7 +188,7 @@ A TimescaleDB configuration file is available as `docker-compose-timescale.yml.k ### Custom Prometheus Rules -Edit `rules/bottom_rules.yml` to add custom recording or alerting rules. +Edit `rules/Symon_rules.yml` to add custom recording or alerting rules. ### OTEL Collector Sampling diff --git a/docker-compose/docker-compose-timescale.yml.ko b/docker-compose/docker-compose-timescale.yml.ko deleted file mode 100644 index 05347ca..0000000 --- a/docker-compose/docker-compose-timescale.yml.ko +++ /dev/null @@ -1,61 +0,0 @@ -services: - timescaledb: - image: timescale/timescaledb-ha:pg15 - environment: - POSTGRES_PASSWORD: password - POSTGRES_DB: promscale - POSTGRES_USER: postgres - ports: - - "5432:5432" - volumes: - - timescale_data:/var/lib/postgresql/data - - promscale: - image: timescale/promscale:latest - ports: - - "9201:9201" - depends_on: - - timescaledb - environment: - PROMSCALE_DB_URI: postgres://postgres:password@timescaledb:5432/promscale?sslmode=disable - PROMSCALE_STARTUP_INSTALL_EXTENSIONS: "true" - restart: on-failure - - otel-collector: - image: otel/opentelemetry-collector-contrib:latest - container_name: otel-collector - command: ["--config=/etc/otel-collector-config.yml"] - volumes: - - ./otel-collector-config.yml:/etc/otel-collector-config.yml - - ports: - - "4317:4317" - - prometheus: - image: prom/prometheus:latest - container_name: prometheus - volumes: - - ./prometheus.yml:/etc/prometheus/prometheus.yml - - ./rules:/etc/prometheus/rules - ports: - - "9090:9090" # Interfaccia Web di Prometheus - command: - - '--config.file=/etc/prometheus/prometheus.yml' - depends_on: - - otel-collector - - grafana: - image: grafana/grafana:latest - ports: - - "3000:3000" - environment: - - GF_SECURITY_ADMIN_PASSWORD=admin - - GF_SECURITY_ADMIN_USER=admin - volumes: - - grafana-storage:/var/lib/grafana - depends_on: - - prometheus - -volumes: - grafana-storage: - timescale_data: \ No newline at end of file diff --git a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json index 6010bc6..2753af5 100644 --- a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json +++ b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json @@ -1,6 +1,6 @@ { - "title": "Bottom System Overview", - "uid": "bottom-overview", + "title": "Symon System Overview", + "uid": "syon-overview", "timezone": "browser", "schemaVersion": 16, "refresh": "5s", diff --git a/docker-compose/grafana/provisioning/dashboards/dashboards.yml b/docker-compose/grafana/provisioning/dashboards/dashboards.yml index 2b5c0b3..b539d38 100644 --- a/docker-compose/grafana/provisioning/dashboards/dashboards.yml +++ b/docker-compose/grafana/provisioning/dashboards/dashboards.yml @@ -1,7 +1,7 @@ apiVersion: 1 providers: - - name: 'Bottom Dashboards' + - name: 'Syon Dashboards' orgId: 1 folder: '' type: file diff --git a/docker-compose/rules/bottom_rules.yml b/docker-compose/rules/symon_rules.yml similarity index 93% rename from docker-compose/rules/bottom_rules.yml rename to docker-compose/rules/symon_rules.yml index 4250345..16ceb96 100644 --- a/docker-compose/rules/bottom_rules.yml +++ b/docker-compose/rules/symon_rules.yml @@ -1,5 +1,5 @@ groups: - - name: bottom_process_metrics + - name: symon_process_metrics interval: 30s rules: - record: system_process_cpu_usage_percent:recent diff --git a/docker-compose/test-stack.sh b/docker-compose/test-stack.sh index 3db3037..d90d409 100755 --- a/docker-compose/test-stack.sh +++ b/docker-compose/test-stack.sh @@ -3,7 +3,7 @@ set -e -echo "🔍 Testing Bottom OpenTelemetry Stack..." +echo "🔍 Testing Symon OpenTelemetry Stack..." echo "" # Colors @@ -75,6 +75,6 @@ echo " - Grafana: http://localhost:3000 (admin/admin)" echo " - OTEL Collector metrics: http://localhost:8889/metrics" echo "" echo "💡 Next steps:" -echo " 1. Build bottom with: cargo build --release --features opentelemetry" +echo " 1. Build Symon with: cargo build --release --features opentelemetry" echo " 2. Run in headless mode: ./target/release/btm --headless" echo " 3. Check metrics in Prometheus: http://localhost:9090/graph" diff --git a/docker-compose/processes-example.toml b/processes-example.toml similarity index 94% rename from docker-compose/processes-example.toml rename to processes-example.toml index 89851df..c4f266b 100644 --- a/docker-compose/processes-example.toml +++ b/processes-example.toml @@ -1,8 +1,8 @@ # Example process filter configuration file -# This file can be included from the main bottom config to keep +# This file can be included from the main symon config to keep # server-specific process lists separate. # -# Usage in bottom-config.toml: +# Usage in symon-config.toml: # [opentelemetry.metrics.process_filter] # include = "processes.toml" diff --git a/src/collector.rs b/src/collector.rs index 4de3bbc..13e16f6 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -2,7 +2,7 @@ use crate::config::MetricsConfig; use anyhow::Result; use std::collections::HashMap; use std::time::Instant; -use sysinfo::{CpuRefreshKind, Disks, Networks, RefreshKind, System}; +use sysinfo::{Disks, Networks, ProcessesToUpdate, RefreshKind, System}; /// System metrics collected at a point in time #[derive(Debug, Clone)] @@ -13,6 +13,8 @@ pub struct SystemMetrics { pub disk: Option>, pub processes: Option>, pub temperature: Option>, + pub load_avg: Option, + pub disk_io: Option>, } #[derive(Debug, Clone)] @@ -34,6 +36,10 @@ pub struct NetworkMetric { pub interface_name: String, pub rx_bytes_per_sec: u64, pub tx_bytes_per_sec: u64, + pub rx_packets_per_sec: u64, + pub tx_packets_per_sec: u64, + pub rx_errors_per_sec: u64, + pub tx_errors_per_sec: u64, } #[derive(Debug, Clone)] @@ -58,6 +64,22 @@ pub struct TemperatureMetric { pub temperature_celsius: f32, } +#[derive(Debug, Clone)] +pub struct LoadAvgMetric { + pub load1: f64, + pub load5: f64, + pub load15: f64, +} + +#[derive(Debug, Clone)] +pub struct DiskIoMetric { + pub device_name: String, + pub read_bytes_per_sec: u64, + pub write_bytes_per_sec: u64, + pub read_ops_per_sec: u64, + pub write_ops_per_sec: u64, +} + /// Collector for system metrics pub struct MetricsCollector { system: System, @@ -65,16 +87,35 @@ pub struct MetricsCollector { disks: Disks, config: MetricsConfig, // Network rate calculation state - last_network_stats: HashMap, // interface -> (rx_bytes, tx_bytes) + last_network_stats: HashMap, last_network_time: Option, + // Disk I/O rate calculation state + last_disk_io_stats: HashMap, + last_disk_io_time: Option, +} + +#[derive(Debug, Clone)] +struct NetworkStats { + rx_bytes: u64, + tx_bytes: u64, + rx_packets: u64, + tx_packets: u64, + rx_errors: u64, + tx_errors: u64, +} + +#[derive(Debug, Clone)] +struct DiskIoStats { + read_bytes: u64, + write_bytes: u64, + read_count: u64, + write_count: u64, } impl MetricsCollector { pub fn new(config: MetricsConfig) -> Self { - let refresh_kind = RefreshKind::new() - .with_cpu(CpuRefreshKind::everything()) - .with_memory(sysinfo::MemoryRefreshKind::everything()) - .with_processes(sysinfo::ProcessRefreshKind::everything()); + // Initialize with minimal data - we'll refresh on-demand + let refresh_kind = RefreshKind::new(); Self { system: System::new_with_specifics(refresh_kind), @@ -83,14 +124,35 @@ impl MetricsCollector { config, last_network_stats: HashMap::new(), last_network_time: None, + last_disk_io_stats: HashMap::new(), + last_disk_io_time: None, } } /// Collect all enabled metrics pub fn collect(&mut self) -> Result { - // Refresh system info - self.system.refresh_all(); - self.networks.refresh(); + // Refresh only what's needed based on enabled metrics + if self.config.cpu { + self.system.refresh_cpu_all(); + } + + if self.config.memory { + self.system.refresh_memory(); + } + + if self.config.processes { + self.system.refresh_processes(ProcessesToUpdate::All); + } + + if self.config.network { + self.networks.refresh(); + } + + if self.config.disk { + self.disks.refresh(); + } + + // Note: Temperature metrics are currently not implemented Ok(SystemMetrics { cpu: if self.config.cpu { @@ -123,6 +185,16 @@ impl MetricsCollector { } else { None }, + load_avg: if self.config.load_avg { + Some(self.collect_load_avg()) + } else { + None + }, + disk_io: if self.config.disk_io { + Some(self.collect_disk_io()) + } else { + None + }, }) } @@ -160,7 +232,14 @@ impl MetricsCollector { for (interface_name, data) in self.networks.iter() { self.last_network_stats.insert( interface_name.to_string(), - (data.total_received(), data.total_transmitted()), + NetworkStats { + rx_bytes: data.total_received(), + tx_bytes: data.total_transmitted(), + rx_packets: data.total_packets_received(), + tx_packets: data.total_packets_transmitted(), + rx_errors: data.total_errors_on_received(), + tx_errors: data.total_errors_on_transmitted(), + }, ); } return metrics; // Return empty on first run @@ -171,40 +250,43 @@ impl MetricsCollector { // Calculate rates for each interface for (interface_name, data) in self.networks.iter() { - let rx_total = data.total_received(); - let tx_total = data.total_transmitted(); - - if let Some(&(last_rx, last_tx)) = self.last_network_stats.get(interface_name.as_str()) { - // Calculate bytes per second - let rx_bytes_per_sec = if rx_total >= last_rx { - ((rx_total - last_rx) as f64 / time_delta_secs) as u64 - } else { - 0 // Counter wrapped or interface reset - }; - - let tx_bytes_per_sec = if tx_total >= last_tx { - ((tx_total - last_tx) as f64 / time_delta_secs) as u64 - } else { - 0 // Counter wrapped or interface reset - }; + let current_stats = NetworkStats { + rx_bytes: data.total_received(), + tx_bytes: data.total_transmitted(), + rx_packets: data.total_packets_received(), + tx_packets: data.total_packets_transmitted(), + rx_errors: data.total_errors_on_received(), + tx_errors: data.total_errors_on_transmitted(), + }; + if let Some(last_stats) = self.last_network_stats.get(interface_name.as_str()) { + // Calculate rates per second metrics.push(NetworkMetric { interface_name: interface_name.to_string(), - rx_bytes_per_sec, - tx_bytes_per_sec, + rx_bytes_per_sec: Self::calculate_rate(current_stats.rx_bytes, last_stats.rx_bytes, time_delta_secs), + tx_bytes_per_sec: Self::calculate_rate(current_stats.tx_bytes, last_stats.tx_bytes, time_delta_secs), + rx_packets_per_sec: Self::calculate_rate(current_stats.rx_packets, last_stats.rx_packets, time_delta_secs), + tx_packets_per_sec: Self::calculate_rate(current_stats.tx_packets, last_stats.tx_packets, time_delta_secs), + rx_errors_per_sec: Self::calculate_rate(current_stats.rx_errors, last_stats.rx_errors, time_delta_secs), + tx_errors_per_sec: Self::calculate_rate(current_stats.tx_errors, last_stats.tx_errors, time_delta_secs), }); } // Update last stats - self.last_network_stats.insert( - interface_name.to_string(), - (rx_total, tx_total), - ); + self.last_network_stats.insert(interface_name.to_string(), current_stats); } metrics } + fn calculate_rate(current: u64, last: u64, time_delta: f64) -> u64 { + if current >= last { + ((current - last) as f64 / time_delta) as u64 + } else { + 0 // Counter wrapped or interface reset + } + } + fn collect_disk(&self) -> Vec { self.disks .iter() @@ -225,41 +307,63 @@ impl MetricsCollector { fn collect_processes(&self) -> Vec { let filter = self.config.process_filter.as_ref(); + let max_processes = filter.map(|f| f.max_processes).unwrap_or(10); - let mut processes: Vec = self - .system - .processes() - .iter() - .filter(|(_, process)| { - if let Some(filter_config) = filter { - filter_config.should_include_process( - process.name().to_string_lossy().as_ref(), - process.pid().as_u32(), - ) - } else { - true + // Pre-allocate with expected capacity + let mut processes: Vec = Vec::with_capacity(max_processes); + + // Collect only processes that pass the filter + for (_, process) in self.system.processes().iter() { + // Skip if filter rejects this process + if let Some(filter_config) = filter { + let process_name = process.name().to_string_lossy(); + if !filter_config.should_include_process(process_name.as_ref(), process.pid().as_u32()) { + continue; } - }) - .map(|(_, process)| ProcessMetric { - pid: process.pid().as_u32(), - name: process.name().to_string_lossy().to_string(), - cpu_usage_percent: process.cpu_usage(), - memory_bytes: process.memory(), - }) - .collect(); + } - // Sort by CPU usage and limit to top N (configurable) + let cpu_usage = process.cpu_usage(); + + // If we haven't reached max_processes yet, just add it + if processes.len() < max_processes { + processes.push(ProcessMetric { + pid: process.pid().as_u32(), + name: process.name().to_string_lossy().to_string(), + cpu_usage_percent: cpu_usage, + memory_bytes: process.memory(), + }); + } else { + // Find the process with minimum CPU usage in our list + if let Some(min_idx) = processes + .iter() + .enumerate() + .min_by(|(_, a), (_, b)| { + a.cpu_usage_percent + .partial_cmp(&b.cpu_usage_percent) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .map(|(idx, _)| idx) + { + // Replace if current process has higher CPU usage + if cpu_usage > processes[min_idx].cpu_usage_percent { + processes[min_idx] = ProcessMetric { + pid: process.pid().as_u32(), + name: process.name().to_string_lossy().to_string(), + cpu_usage_percent: cpu_usage, + memory_bytes: process.memory(), + }; + } + } + } + } + + // Final sort by CPU usage (descending) processes.sort_by(|a, b| { b.cpu_usage_percent .partial_cmp(&a.cpu_usage_percent) .unwrap_or(std::cmp::Ordering::Equal) }); - let max_processes = filter - .map(|f| f.max_processes) - .unwrap_or(10); - processes.truncate(max_processes); - processes } @@ -269,4 +373,21 @@ impl MetricsCollector { // For now, return empty vector vec![] } + + fn collect_load_avg(&self) -> LoadAvgMetric { + let load_avg = System::load_average(); + LoadAvgMetric { + load1: load_avg.one, + load5: load_avg.five, + load15: load_avg.fifteen, + } + } + + fn collect_disk_io(&mut self) -> Vec { + // Note: sysinfo 0.31 doesn't provide disk I/O stats directly + // This would require reading /proc/diskstats on Linux or using platform-specific APIs + // For now, return empty vector + // TODO: Implement platform-specific disk I/O collection + vec![] + } } diff --git a/src/config.rs b/src/config.rs index 99f06f1..0b13513 100644 --- a/src/config.rs +++ b/src/config.rs @@ -166,6 +166,14 @@ pub struct MetricsConfig { #[serde(default = "default_true")] pub temperature: bool, + /// Export load average metrics + #[serde(default = "default_true")] + pub load_avg: bool, + + /// Export disk I/O metrics + #[serde(default = "default_true")] + pub disk_io: bool, + /// Process filter configuration #[serde(default)] pub process_filter: Option, @@ -180,6 +188,8 @@ impl Default for MetricsConfig { disk: true, processes: false, temperature: true, + load_avg: true, + disk_io: true, process_filter: None, } } diff --git a/src/exporter.rs b/src/exporter.rs index 02b7ff0..67ff950 100644 --- a/src/exporter.rs +++ b/src/exporter.rs @@ -20,11 +20,22 @@ struct MetricInstruments { swap_total: opentelemetry::metrics::Gauge, network_rx: opentelemetry::metrics::Gauge, network_tx: opentelemetry::metrics::Gauge, + network_rx_packets: opentelemetry::metrics::Gauge, + network_tx_packets: opentelemetry::metrics::Gauge, + network_rx_errors: opentelemetry::metrics::Gauge, + network_tx_errors: opentelemetry::metrics::Gauge, disk_usage: opentelemetry::metrics::Gauge, disk_total: opentelemetry::metrics::Gauge, + disk_io_read_bytes: opentelemetry::metrics::Gauge, + disk_io_write_bytes: opentelemetry::metrics::Gauge, + disk_io_read_ops: opentelemetry::metrics::Gauge, + disk_io_write_ops: opentelemetry::metrics::Gauge, process_cpu: opentelemetry::metrics::Gauge, process_memory: opentelemetry::metrics::Gauge, temperature: opentelemetry::metrics::Gauge, + load_avg_1: opentelemetry::metrics::Gauge, + load_avg_5: opentelemetry::metrics::Gauge, + load_avg_15: opentelemetry::metrics::Gauge, } impl MetricsExporter { @@ -94,6 +105,22 @@ impl MetricsExporter { .u64_gauge("system_network_tx_bytes_per_sec") .with_description("Bytes transmitted per second") .init(), + network_rx_packets: meter + .u64_gauge("system_network_rx_packets_per_sec") + .with_description("Packets received per second") + .init(), + network_tx_packets: meter + .u64_gauge("system_network_tx_packets_per_sec") + .with_description("Packets transmitted per second") + .init(), + network_rx_errors: meter + .u64_gauge("system_network_rx_errors_per_sec") + .with_description("Receive errors per second") + .init(), + network_tx_errors: meter + .u64_gauge("system_network_tx_errors_per_sec") + .with_description("Transmit errors per second") + .init(), disk_usage: meter .u64_gauge("system_disk_usage_bytes") .with_description("Disk usage in bytes") @@ -114,6 +141,34 @@ impl MetricsExporter { .f64_gauge("system_temperature_celsius") .with_description("Temperature in Celsius") .init(), + disk_io_read_bytes: meter + .u64_gauge("system_disk_io_read_bytes_per_sec") + .with_description("Disk read bytes per second") + .init(), + disk_io_write_bytes: meter + .u64_gauge("system_disk_io_write_bytes_per_sec") + .with_description("Disk write bytes per second") + .init(), + disk_io_read_ops: meter + .u64_gauge("system_disk_io_read_ops_per_sec") + .with_description("Disk read operations per second") + .init(), + disk_io_write_ops: meter + .u64_gauge("system_disk_io_write_ops_per_sec") + .with_description("Disk write operations per second") + .init(), + load_avg_1: meter + .f64_gauge("system_load_average_1m") + .with_description("System load average over 1 minute") + .init(), + load_avg_5: meter + .f64_gauge("system_load_average_5m") + .with_description("System load average over 5 minutes") + .init(), + load_avg_15: meter + .f64_gauge("system_load_average_15m") + .with_description("System load average over 15 minutes") + .init(), }; Ok(Self { @@ -147,6 +202,10 @@ impl MetricsExporter { let attrs = &[KeyValue::new("interface", net.interface_name.clone())]; self.gauges.network_rx.record(net.rx_bytes_per_sec, attrs); self.gauges.network_tx.record(net.tx_bytes_per_sec, attrs); + self.gauges.network_rx_packets.record(net.rx_packets_per_sec, attrs); + self.gauges.network_tx_packets.record(net.tx_packets_per_sec, attrs); + self.gauges.network_rx_errors.record(net.rx_errors_per_sec, attrs); + self.gauges.network_tx_errors.record(net.tx_errors_per_sec, attrs); } } @@ -185,6 +244,24 @@ impl MetricsExporter { ); } } + + // Export load average metrics + if let Some(load_avg) = &metrics.load_avg { + self.gauges.load_avg_1.record(load_avg.load1, &[]); + self.gauges.load_avg_5.record(load_avg.load5, &[]); + self.gauges.load_avg_15.record(load_avg.load15, &[]); + } + + // Export disk I/O metrics + if let Some(disk_io_metrics) = &metrics.disk_io { + for disk_io in disk_io_metrics { + let attrs = &[KeyValue::new("device", disk_io.device_name.clone())]; + self.gauges.disk_io_read_bytes.record(disk_io.read_bytes_per_sec, attrs); + self.gauges.disk_io_write_bytes.record(disk_io.write_bytes_per_sec, attrs); + self.gauges.disk_io_read_ops.record(disk_io.read_ops_per_sec, attrs); + self.gauges.disk_io_write_ops.record(disk_io.write_ops_per_sec, attrs); + } + } } pub async fn shutdown(self) -> Result<()> { diff --git a/docker-compose/symon-config-example.toml b/symon-config-example.toml similarity index 100% rename from docker-compose/symon-config-example.toml rename to symon-config-example.toml diff --git a/symon.toml b/symon.toml index b7da84f..588c45c 100644 --- a/symon.toml +++ b/symon.toml @@ -33,10 +33,12 @@ environment = "production" [metrics] cpu = true # CPU usage per core memory = true # RAM and swap usage -network = true # Network RX/TX +network = true # Network RX/TX bytes, packets, errors disk = true # Disk usage -processes = true # Top 10 processes (disabled by default - can generate high cardinality) +processes = true # Top N processes by CPU (disabled by default - can generate high cardinality) temperature = true # System temperatures (if available) +load_avg = true # System load average (1m, 5m, 15m) +disk_io = true # Disk I/O read/write bytes and operations (Linux only) # Process filtering configuration # Only used when processes = true @@ -49,7 +51,7 @@ temperature = true # System temperatures (if available) filter_mode = "blacklist" # Maximum number of processes to report (sorted by CPU usage, default: 10) -max_processes = 10 +max_processes = 5 # List of process names to filter (case-insensitive substring match) names = [