diff --git a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json index 0ec600a..6010bc6 100644 --- a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json +++ b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json @@ -66,12 +66,12 @@ "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, "targets": [ { - "expr": "system_network_rx_bytes_rate", + "expr": "system_network_rx_bytes_per_sec", "legendFormat": "RX - {{interface}}", "refId": "RX" }, { - "expr": "system_network_tx_bytes_rate", + "expr": "system_network_tx_bytes_per_sec", "legendFormat": "TX - {{interface}}", "refId": "TX" } diff --git a/docker-compose/processes-example.toml b/docker-compose/processes-example.toml index 1f8f6f7..89851df 100644 --- a/docker-compose/processes-example.toml +++ b/docker-compose/processes-example.toml @@ -9,7 +9,7 @@ # Filter mode: "whitelist" or "blacklist" # - whitelist: Only export metrics for processes in the lists below # - blacklist: Export metrics for all processes EXCEPT those in the lists -filter_mode = "whitelist" +filter_mode = "blacklist" # Process names to monitor (case-insensitive substring match) # Examples for common server processes: diff --git a/src/collector.rs b/src/collector.rs index 6cb110a..4de3bbc 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -1,5 +1,7 @@ use crate::config::MetricsConfig; use anyhow::Result; +use std::collections::HashMap; +use std::time::Instant; use sysinfo::{CpuRefreshKind, Disks, Networks, RefreshKind, System}; /// System metrics collected at a point in time @@ -30,8 +32,8 @@ pub struct MemoryMetric { #[derive(Debug, Clone)] pub struct NetworkMetric { pub interface_name: String, - pub rx_bytes_total: u64, - pub tx_bytes_total: u64, + pub rx_bytes_per_sec: u64, + pub tx_bytes_per_sec: u64, } #[derive(Debug, Clone)] @@ -62,6 +64,9 @@ pub struct MetricsCollector { networks: Networks, disks: Disks, config: MetricsConfig, + // Network rate calculation state + last_network_stats: HashMap, // interface -> (rx_bytes, tx_bytes) + last_network_time: Option, } impl MetricsCollector { @@ -76,6 +81,8 @@ impl MetricsCollector { networks: Networks::new_with_refreshed_list(), disks: Disks::new_with_refreshed_list(), config, + last_network_stats: HashMap::new(), + last_network_time: None, } } @@ -140,15 +147,62 @@ impl MetricsCollector { } } - fn collect_network(&self) -> Vec { - self.networks - .iter() - .map(|(interface_name, data)| NetworkMetric { - interface_name: interface_name.to_string(), - rx_bytes_total: data.total_received(), - tx_bytes_total: data.total_transmitted(), - }) - .collect() + fn collect_network(&mut self) -> Vec { + let now = Instant::now(); + let mut metrics = Vec::new(); + + // Calculate time delta + let time_delta_secs = if let Some(last_time) = self.last_network_time { + now.duration_since(last_time).as_secs_f64() + } else { + // First collection, no rate to calculate + self.last_network_time = Some(now); + for (interface_name, data) in self.networks.iter() { + self.last_network_stats.insert( + interface_name.to_string(), + (data.total_received(), data.total_transmitted()), + ); + } + return metrics; // Return empty on first run + }; + + // Update timestamp + self.last_network_time = Some(now); + + // Calculate rates for each interface + for (interface_name, data) in self.networks.iter() { + let rx_total = data.total_received(); + let tx_total = data.total_transmitted(); + + if let Some(&(last_rx, last_tx)) = self.last_network_stats.get(interface_name.as_str()) { + // Calculate bytes per second + let rx_bytes_per_sec = if rx_total >= last_rx { + ((rx_total - last_rx) as f64 / time_delta_secs) as u64 + } else { + 0 // Counter wrapped or interface reset + }; + + let tx_bytes_per_sec = if tx_total >= last_tx { + ((tx_total - last_tx) as f64 / time_delta_secs) as u64 + } else { + 0 // Counter wrapped or interface reset + }; + + metrics.push(NetworkMetric { + interface_name: interface_name.to_string(), + rx_bytes_per_sec, + tx_bytes_per_sec, + }); + } + + // Update last stats + self.last_network_stats.insert( + interface_name.to_string(), + (rx_total, tx_total), + ); + } + + metrics } fn collect_disk(&self) -> Vec { @@ -194,13 +248,17 @@ impl MetricsCollector { }) .collect(); - // Sort by CPU usage and limit to top 10 + // Sort by CPU usage and limit to top N (configurable) processes.sort_by(|a, b| { b.cpu_usage_percent .partial_cmp(&a.cpu_usage_percent) .unwrap_or(std::cmp::Ordering::Equal) }); - processes.truncate(10); + + let max_processes = filter + .map(|f| f.max_processes) + .unwrap_or(10); + processes.truncate(max_processes); processes } diff --git a/src/config.rs b/src/config.rs index 87e7493..99f06f1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -195,6 +195,10 @@ pub struct ProcessFilterConfig { #[serde(skip_serializing_if = "Option::is_none")] pub filter_mode: Option, + /// Maximum number of processes to report (top N by CPU usage) + #[serde(default = "default_max_processes")] + pub max_processes: usize, + /// List of process names to filter (case-insensitive substring match) #[serde(default)] pub names: Vec, @@ -236,6 +240,7 @@ impl ProcessFilterConfig { let mut merged = Self { include: None, filter_mode: included.filter_mode.or(self.filter_mode), + max_processes: included.max_processes, names: if included.names.is_empty() { self.names.clone() } else { @@ -345,3 +350,7 @@ fn default_timeout() -> u64 { fn default_true() -> bool { true } + +fn default_max_processes() -> usize { + 10 +} diff --git a/src/exporter.rs b/src/exporter.rs index c36c0b5..02b7ff0 100644 --- a/src/exporter.rs +++ b/src/exporter.rs @@ -18,8 +18,8 @@ struct MetricInstruments { memory_total: opentelemetry::metrics::Gauge, swap_usage: opentelemetry::metrics::Gauge, swap_total: opentelemetry::metrics::Gauge, - network_rx: opentelemetry::metrics::Counter, - network_tx: opentelemetry::metrics::Counter, + network_rx: opentelemetry::metrics::Gauge, + network_tx: opentelemetry::metrics::Gauge, disk_usage: opentelemetry::metrics::Gauge, disk_total: opentelemetry::metrics::Gauge, process_cpu: opentelemetry::metrics::Gauge, @@ -87,12 +87,12 @@ impl MetricsExporter { .with_description("Total swap in bytes") .init(), network_rx: meter - .u64_counter("system_network_rx_bytes_total") - .with_description("Total bytes received") + .u64_gauge("system_network_rx_bytes_per_sec") + .with_description("Bytes received per second") .init(), network_tx: meter - .u64_counter("system_network_tx_bytes_total") - .with_description("Total bytes transmitted") + .u64_gauge("system_network_tx_bytes_per_sec") + .with_description("Bytes transmitted per second") .init(), disk_usage: meter .u64_gauge("system_disk_usage_bytes") @@ -145,8 +145,8 @@ impl MetricsExporter { if let Some(network_metrics) = &metrics.network { for net in network_metrics { let attrs = &[KeyValue::new("interface", net.interface_name.clone())]; - self.gauges.network_rx.add(net.rx_bytes_total, attrs); - self.gauges.network_tx.add(net.tx_bytes_total, attrs); + self.gauges.network_rx.record(net.rx_bytes_per_sec, attrs); + self.gauges.network_tx.record(net.tx_bytes_per_sec, attrs); } } diff --git a/symon.toml b/symon.toml index b7e6383..b7da84f 100644 --- a/symon.toml +++ b/symon.toml @@ -35,7 +35,7 @@ cpu = true # CPU usage per core memory = true # RAM and swap usage network = true # Network RX/TX disk = true # Disk usage -processes = false # Top 10 processes (disabled by default - can generate high cardinality) +processes = true # Top 10 processes (disabled by default - can generate high cardinality) temperature = true # System temperatures (if available) # Process filtering configuration @@ -46,23 +46,16 @@ temperature = true # System temperatures (if available) # Option 2: Configure inline # Filter mode: "whitelist" (only listed processes) or "blacklist" (exclude listed) -filter_mode = "whitelist" +filter_mode = "blacklist" + +# Maximum number of processes to report (sorted by CPU usage, default: 10) +max_processes = 10 # List of process names to filter (case-insensitive substring match) names = [ - # Web servers - "nginx", - "apache", - - # Databases - "postgres", - "mysql", - "redis", - - # Application servers - # "java", - # "node", - # "python", + # Exclude system processes that generate too much noise + # "kworker", + # "systemd", ] # List of regex patterns to match process names (case-sensitive)