init

2025-11-07 19:13:23 +01:00
commit ccf1ee5197
21 changed files with 3939 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 /target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,44 @@
 [package]
 name = "symon"
 version = "0.1.0"
 edition = "2021"
 authors = ["Alex"]
 description = "Lightweight system metrics exporter for OpenTelemetry"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/battilo/symon"
 [dependencies]
 # OpenTelemetry
 opentelemetry = { version = "0.26", features = ["metrics"] }
 opentelemetry-otlp = { version = "0.26", features = ["metrics", "grpc-tonic"] }
 opentelemetry_sdk = { version = "0.26", features = ["metrics", "rt-tokio"] }
 opentelemetry-semantic-conventions = "0.26"
 # Async runtime
 tokio = { version = "1.48", features = ["rt-multi-thread", "macros", "sync", "time", "signal"] }
 tonic = "0.11"
 # System metrics collection
 sysinfo = "0.31"
 # Configuration
 serde = { version = "1.0", features = ["derive"] }
 toml = "0.8"
 # Logging and error handling
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 anyhow = "1.0"
 thiserror = "1.0"
 # Process filtering
 regex = "1.11"
 # CLI
 clap = { version = "4.5", features = ["derive"] }
 [profile.release]
 opt-level = 3
 lto = true
 codegen-units = 1
 strip = true
--- a/docker-compose/METRICS.md
+++ b/docker-compose/METRICS.md
@@ -0,0 +1,227 @@
 # Bottom OpenTelemetry Metrics Reference
 This document lists all metrics exported by Bottom when running with the `opentelemetry` feature enabled.
 ## System Metrics
 ### CPU
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_cpu_usage_percent` | Gauge | `cpu_id` | CPU usage percentage per core |
 **Example:**
 ```promql
 # Average CPU across all cores
 avg(system_cpu_usage_percent)
 # CPU usage for core 0
 system_cpu_usage_percent{cpu_id="0"}
 ```
 ### Memory
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_memory_usage_bytes` | Gauge | - | RAM memory currently in use |
 | `system_memory_total_bytes` | Gauge | - | Total RAM memory available |
 | `system_swap_usage_bytes` | Gauge | - | Swap memory currently in use |
 | `system_swap_total_bytes` | Gauge | - | Total swap memory available |
 **Example:**
 ```promql
 # Memory usage percentage
 (system_memory_usage_bytes / system_memory_total_bytes) * 100
 # Available memory
 system_memory_total_bytes - system_memory_usage_bytes
 ```
 ### Network
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_network_rx_bytes_rate` | Gauge | `interface` | Network receive rate in bytes/sec |
 | `system_network_tx_bytes_rate` | Gauge | `interface` | Network transmit rate in bytes/sec |
 **Example:**
 ```promql
 # Total network throughput
 sum(system_network_rx_bytes_rate) + sum(system_network_tx_bytes_rate)
 # RX rate for specific interface
 system_network_rx_bytes_rate{interface="eth0"}
 ```
 ### Disk
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_disk_usage_bytes` | Gauge | `device`, `mount` | Disk space currently in use |
 | `system_disk_total_bytes` | Gauge | `device`, `mount` | Total disk space available |
 **Example:**
 ```promql
 # Disk usage percentage
 (system_disk_usage_bytes / system_disk_total_bytes) * 100
 # Free disk space
 system_disk_total_bytes - system_disk_usage_bytes
 ```
 ### Temperature
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_temperature_celsius` | Gauge | `sensor` | Temperature readings in Celsius |
 **Example:**
 ```promql
 # Average temperature across all sensors
 avg(system_temperature_celsius)
 # Maximum temperature
 max(system_temperature_celsius)
 ```
 ## Process Metrics
 | Metric Name | Type | Labels | Description |
 |------------|------|--------|-------------|
 | `system_process_cpu_usage_percent` | Gauge | `name`, `pid` | CPU usage percentage per process |
 | `system_process_memory_usage_bytes` | Gauge | `name`, `pid` | Memory usage in bytes per process |
 | `system_process_count` | Gauge | - | Total number of processes |
 **Example:**
 ```promql
 # Top 10 processes by CPU
 topk(10, system_process_cpu_usage_percent)
 # Top 10 processes by memory
 topk(10, system_process_memory_usage_bytes)
 # Total memory used by all Chrome processes
 sum(system_process_memory_usage_bytes{name=~".*chrome.*"})
 ```
 ## Recording Rules
 The following recording rules are pre-configured in Prometheus (see `rules/bottom_rules.yml`):
 | Rule Name | Expression | Description |
 |-----------|------------|-------------|
 | `system_process_cpu_usage_percent:recent` | Recent process CPU metrics | Filters out stale process data (>2 min old) |
 | `system_process_memory_usage_bytes:recent` | Recent process memory metrics | Filters out stale process data (>2 min old) |
 **Example:**
 ```promql
 # Query only recent process data
 topk(10, system_process_cpu_usage_percent:recent)
 ```
 ## Common Queries
 ### System Health
 ```promql
 # Overall system CPU usage
 avg(system_cpu_usage_percent)
 # Memory pressure (>80% is high)
 (system_memory_usage_bytes / system_memory_total_bytes) * 100
 # Disk pressure (>90% is critical)
 (system_disk_usage_bytes / system_disk_total_bytes) * 100
 ```
 ### Resource Hogs
 ```promql
 # Top CPU consumers
 topk(5, system_process_cpu_usage_percent)
 # Top memory consumers
 topk(5, system_process_memory_usage_bytes)
 # Processes using >1GB memory
 system_process_memory_usage_bytes > 1073741824
 ```
 ### Network Analysis
 ```promql
 # Total network traffic (RX + TX)
 sum(system_network_rx_bytes_rate) + sum(system_network_tx_bytes_rate)
 # Network traffic by interface
 sum by (interface) (system_network_rx_bytes_rate + system_network_tx_bytes_rate)
 # Interfaces with high RX rate (>10MB/s)
 system_network_rx_bytes_rate > 10485760
 ```
 ## Alerting Examples
 ### Sample Prometheus Alert Rules
 ```yaml
 groups:
  - name: bottom_alerts
    interval: 30s
    rules:
      - alert: HighCPUUsage
        expr: avg(system_cpu_usage_percent) > 80
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High CPU usage detected"
          description: "Average CPU usage is {{ $value }}%"
      - alert: HighMemoryUsage
        expr: (system_memory_usage_bytes / system_memory_total_bytes) * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High memory usage detected"
          description: "Memory usage is {{ $value }}%"
      - alert: DiskAlmostFull
        expr: (system_disk_usage_bytes / system_disk_total_bytes) * 100 > 90
        for: 10m
        labels:
          severity: critical
        annotations:
          summary: "Disk {{ $labels.mount }} almost full"
          description: "Disk usage is {{ $value }}% on {{ $labels.mount }}"
 ```
 ## Label Reference
 | Label | Used In | Description |
 |-------|---------|-------------|
 | `cpu_id` | CPU metrics | CPU core identifier (0, 1, 2, ...) |
 | `interface` | Network metrics | Network interface name (eth0, wlan0, ...) |
 | `device` | Disk metrics | Device name (/dev/sda1, ...) |
 | `mount` | Disk metrics | Mount point (/, /home, ...) |
 | `sensor` | Temperature | Temperature sensor name |
 | `name` | Process metrics | Process name |
 | `pid` | Process metrics | Process ID |
 | `exported_job` | All | Always "bottom-system-monitor" |
 | `otel_scope_name` | All | Always "bottom-system-monitor" |
 ## Data Retention
 By default, Prometheus stores metrics for 15 days. You can adjust this in the Prometheus configuration:
 ```yaml
 # In prometheus.yml
 global:
  retention_time: 30d  # Keep data for 30 days
 ```
 For long-term storage, consider using:
 - **TimescaleDB** (see `docker-compose-timescale.yml.ko`)
 - **Thanos** for multi-cluster metrics
 - **Cortex** for horizontally scalable storage
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -0,0 +1,195 @@
 # Bottom OpenTelemetry Docker Compose Setup
 This directory contains a Docker Compose setup for running an observability stack to monitor Bottom with OpenTelemetry.
 ## Architecture
 The stack includes:
 1. **OpenTelemetry Collector** - Receives metrics from Bottom via OTLP protocol
 2. **Prometheus** - Scrapes and stores metrics from the OTEL Collector
 3. **Grafana** - Visualizes metrics from Prometheus
 ```
 Bottom (with --headless flag)
    ↓ (OTLP/gRPC on port 4317)
 OpenTelemetry Collector
    ↓ (Prometheus scrape on port 8889)
 Prometheus
    ↓ (Query on port 9090)
 Grafana (accessible on port 3000)
 ```
 ## Quick Start
 ### 1. Start the observability stack
 ```bash
 cd docker-compose
 docker-compose up -d
 ```
 This will start:
 - OpenTelemetry Collector on ports 4317 (gRPC), 4318 (HTTP), 8889 (metrics)
 - Prometheus on port 9090
 - Grafana on port 3000
 ### 2. Build Bottom with OpenTelemetry support
 ```bash
 cd ..
 cargo build --release --features opentelemetry
 ```
 ### 3. Create a configuration file
 Create a `bottom-config.toml` file:
 ```toml
 [opentelemetry]
 enabled = true
 endpoint = "http://localhost:4317"
 service_name = "bottom-system-monitor"
 export_interval_ms = 5000
 [opentelemetry.metrics]
 cpu = true
 memory = true
 network = true
 disk = true
 processes = true
 temperature = true
 gpu = true
 ```
 ### 4. Run Bottom in headless mode
 ```bash
 ./target/release/btm --config bottom-config.toml --headless
 ```
 Or without config file:
 ```bash
 OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \
 ./target/release/btm --headless
 ```
 ### 5. Access the dashboards
 - **Prometheus**: http://localhost:9090
 - **Grafana**: http://localhost:3000 (username: `admin`, password: `admin`)
 ## Configuration Files
 ### otel-collector-config.yml
 Configures the OpenTelemetry Collector to:
 - Receive OTLP data on ports 4317 (gRPC) and 4318 (HTTP)
 - Export metrics in Prometheus format on port 9090
 - Debug log all received data
 ### prometheus.yml
 Configures Prometheus to:
 - Scrape metrics from the OTEL Collector every 10 seconds
 - Load alerting rules from `rules/bottom_rules.yml`
 ### rules/bottom_rules.yml
 Contains Prometheus recording rules for Bottom metrics, including:
 - Recent process CPU usage metrics
 - Recent process memory usage metrics
 ## Viewing Metrics in Prometheus
 1. Go to http://localhost:9090
 2. Click on "Graph"
 3. Try these example queries:
 ```promql
 # CPU usage by core
 system_cpu_usage_percent
 # Memory usage
 system_memory_usage_bytes
 # Network RX/TX
 system_network_rx_bytes
 system_network_tx_bytes
 # Disk usage
 system_disk_usage_bytes
 # Top processes by CPU
 topk(10, system_process_cpu_usage_percent)
 # Top processes by memory
 topk(10, system_process_memory_usage_bytes)
 ```
 ## Grafana Configuration
 Grafana is automatically configured with:
 - **Prometheus data source** (http://prometheus:9090) - pre-configured
 - **Bottom System Overview dashboard** - pre-loaded
 To access:
 1. Go to http://localhost:3000 (username: `admin`, password: `admin`)
 2. Navigate to Dashboards → Browse → "Bottom System Overview"
 The dashboard includes:
 - CPU usage by core
 - Memory usage (RAM/Swap)
 - Network traffic
 - Disk usage
 - Top 10 processes by CPU
 - Top 10 processes by Memory
 ## Stopping the Stack
 ```bash
 docker-compose down
 ```
 To also remove volumes:
 ```bash
 docker-compose down -v
 ```
 ## Troubleshooting
 ### Bottom not sending metrics
 Check the OTEL Collector logs:
 ```bash
 docker-compose logs -f otel-collector
 ```
 You should see messages about receiving metrics.
 ### Prometheus not scraping
 1. Check Prometheus targets at http://localhost:9090/targets
 2. The `otel-collector` target should be UP
 ### No data in Grafana
 1. Verify Prometheus data source is configured correctly
 2. Check that Prometheus has data by querying directly
 3. Ensure your time range in Grafana includes when Bottom was running
 ## Advanced Configuration
 ### Using with TimescaleDB (optional)
 A TimescaleDB configuration file is available as `docker-compose-timescale.yml.ko` for long-term storage of metrics. Rename it to include it in your stack.
 ### Custom Prometheus Rules
 Edit `rules/bottom_rules.yml` to add custom recording or alerting rules.
 ### OTEL Collector Sampling
 Edit `otel-collector-config.yml` to adjust the batch processor settings for different performance characteristics.
--- a/docker-compose/docker-compose-timescale.yml.ko
+++ b/docker-compose/docker-compose-timescale.yml.ko
@@ -0,0 +1,61 @@
 services:
  timescaledb:
    image: timescale/timescaledb-ha:pg15
    environment:
      POSTGRES_PASSWORD: password
      POSTGRES_DB: promscale
      POSTGRES_USER: postgres
    ports:
      - "5432:5432"
    volumes:
      - timescale_data:/var/lib/postgresql/data
  promscale:
    image: timescale/promscale:latest
    ports:
      - "9201:9201"
    depends_on:
      - timescaledb
    environment:
      PROMSCALE_DB_URI: postgres://postgres:password@timescaledb:5432/promscale?sslmode=disable
      PROMSCALE_STARTUP_INSTALL_EXTENSIONS: "true"
    restart: on-failure
  otel-collector:
    image: otel/opentelemetry-collector-contrib:latest
    container_name: otel-collector
    command: ["--config=/etc/otel-collector-config.yml"]
    volumes:
      - ./otel-collector-config.yml:/etc/otel-collector-config.yml
    ports:
      - "4317:4317"
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - ./rules:/etc/prometheus/rules
    ports:
      - "9090:9090" # Interfaccia Web di Prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    depends_on:
      - otel-collector
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
    volumes:
      - grafana-storage:/var/lib/grafana
    depends_on:
      - prometheus
 volumes:
  grafana-storage:
  timescale_data:
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -0,0 +1,52 @@
 services:
  otel-collector:
    image: otel/opentelemetry-collector-contrib:latest
    container_name: otel-collector
    command: ["--config=/etc/otel-collector-config.yml"]
    volumes:
      - ./otel-collector-config.yml:/etc/otel-collector-config.yml
    ports:
      - "4317:4317"   # gRPC
      - "4318:4318"   # HTTP
      - "8889:8889"   # Prometheus metrics endpoint
    networks:
      - observ-net
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - ./rules:/etc/prometheus/rules
    ports:
      - "9090:9090" # Interfaccia Web di Prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
    depends_on:
      - otel-collector
    networks:
      - observ-net
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_SECURITY_ADMIN_USER=admin
    volumes:
      - grafana-storage:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning
    depends_on:
      - prometheus
    networks:
      - observ-net
 volumes:
  grafana-storage:
 networks:
  observ-net:
    driver: bridge
--- a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json
+++ b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json
@@ -0,0 +1,278 @@
 {
  "title": "Bottom System Overview",
  "uid": "bottom-overview",
  "timezone": "browser",
  "schemaVersion": 16,
  "refresh": "5s",
  "editable": true,
  "panels": [
      {
        "id": 1,
        "title": "CPU Usage by Core",
        "type": "timeseries",
        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
        "targets": [
          {
            "expr": "system_cpu_usage_percent",
            "legendFormat": "Core {{cpu_id}}",
            "refId": "CPU"
          }
        ],
        "fieldConfig": {
          "defaults": {
            "unit": "percent",
            "min": 0,
            "max": 100
          }
        }
      },
      {
        "id": 2,
        "title": "Memory Usage",
        "type": "timeseries",
        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
        "targets": [
          {
            "expr": "system_memory_usage_bytes",
            "legendFormat": "RAM Used",
            "refId": "RAM"
          },
          {
            "expr": "system_memory_total_bytes",
            "legendFormat": "RAM Total",
            "refId": "RAM_Total"
          },
          {
            "expr": "system_swap_usage_bytes",
            "legendFormat": "Swap Used",
            "refId": "Swap"
          },
          {
            "expr": "system_swap_total_bytes",
            "legendFormat": "Swap Total",
            "refId": "Swap_Total"
          }
        ],
        "fieldConfig": {
          "defaults": {
            "unit": "bytes"
          }
        }
      },
      {
        "id": 3,
        "title": "Network Traffic",
        "type": "timeseries",
        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
        "targets": [
          {
            "expr": "system_network_rx_bytes_rate",
            "legendFormat": "RX - {{interface}}",
            "refId": "RX"
          },
          {
            "expr": "system_network_tx_bytes_rate",
            "legendFormat": "TX - {{interface}}",
            "refId": "TX"
          }
        ],
        "fieldConfig": {
          "defaults": {
            "unit": "Bps"
          }
        }
      },
      {
        "id": 4,
        "title": "Disk Usage",
        "type": "gauge",
        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
        "targets": [
          {
            "expr": "(system_disk_usage_bytes / system_disk_total_bytes) * 100",
            "legendFormat": "{{mount}} ({{device}})",
            "refId": "Disk"
          }
        ],
        "fieldConfig": {
          "defaults": {
            "unit": "percent",
            "min": 0,
            "max": 100,
            "thresholds": {
              "mode": "absolute",
              "steps": [
                {"value": 0, "color": "green"},
                {"value": 70, "color": "yellow"},
                {"value": 90, "color": "red"}
              ]
            }
          }
        }
      },
      {
        "id": 5,
        "title": "Top 10 Processes by CPU",
        "type": "table",
        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
        "targets": [
          {
            "expr": "topk(10, system_process_cpu_usage_percent and (time() - timestamp(system_process_cpu_usage_percent) < 30))",
            "format": "table",
            "instant": true,
            "refId": "Process"
          }
        ],
        "transformations": [
          {
            "id": "organize",
            "options": {
              "excludeByName": {
                "Time": true,
                "__name__": true,
                "job": true,
                "instance": true,
                "exported_job": true,
                "otel_scope_name": true
              },
              "indexByName": {
                "name": 0,
                "pid": 1,
                "Value": 2
              },
              "renameByName": {
                "name": "Process Name",
                "pid": "PID",
                "Value": "CPU %"
              }
            }
          }
        ],
        "options": {
          "showHeader": true,
          "sortBy": [
            {
              "displayName": "CPU %",
              "desc": true
            }
          ]
        },
        "fieldConfig": {
          "defaults": {
            "custom": {
              "align": "auto",
              "displayMode": "auto"
            }
          },
          "overrides": [
            {
              "matcher": {"id": "byName", "options": "CPU %"},
              "properties": [
                {
                  "id": "unit",
                  "value": "percent"
                },
                {
                  "id": "custom.displayMode",
                  "value": "color-background"
                },
                {
                  "id": "thresholds",
                  "value": {
                    "mode": "absolute",
                    "steps": [
                      {"value": 0, "color": "green"},
                      {"value": 50, "color": "yellow"},
                      {"value": 80, "color": "red"}
                    ]
                  }
                }
              ]
            }
          ]
        }
      },
      {
        "id": 6,
        "title": "Top 10 Processes by Memory",
        "type": "table",
        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
        "targets": [
          {
            "expr": "topk(10, system_process_memory_usage_bytes and (time() - timestamp(system_process_memory_usage_bytes) < 30))",
            "format": "table",
            "instant": true,
            "refId": "Process"
          }
        ],
        "transformations": [
          {
            "id": "organize",
            "options": {
              "excludeByName": {
                "Time": true,
                "__name__": true,
                "job": true,
                "instance": true,
                "exported_job": true,
                "otel_scope_name": true
              },
              "indexByName": {
                "name": 0,
                "pid": 1,
                "Value": 2
              },
              "renameByName": {
                "name": "Process Name",
                "pid": "PID",
                "Value": "Memory"
              }
            }
          }
        ],
        "options": {
          "showHeader": true,
          "sortBy": [
            {
              "displayName": "Memory",
              "desc": true
            }
          ]
        },
        "fieldConfig": {
          "defaults": {
            "custom": {
              "align": "auto",
              "displayMode": "auto"
            }
          },
          "overrides": [
            {
              "matcher": {"id": "byName", "options": "Memory"},
              "properties": [
                {
                  "id": "unit",
                  "value": "bytes"
                },
                {
                  "id": "custom.displayMode",
                  "value": "color-background"
                },
                {
                  "id": "thresholds",
                  "value": {
                    "mode": "absolute",
                    "steps": [
                      {"value": 0, "color": "green"},
                      {"value": 1073741824, "color": "yellow"},
                      {"value": 2147483648, "color": "red"}
                    ]
                  }
                }
              ]
            }
          ]
        }
      }
    ]
 }
--- a/docker-compose/grafana/provisioning/dashboards/dashboards.yml
+++ b/docker-compose/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,12 @@
 apiVersion: 1
 providers:
  - name: 'Bottom Dashboards'
    orgId: 1
    folder: ''
    type: file
    disableDeletion: false
    updateIntervalSeconds: 10
    allowUiUpdates: true
    options:
      path: /etc/grafana/provisioning/dashboards
--- a/docker-compose/grafana/provisioning/datasources/prometheus.yml
+++ b/docker-compose/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,12 @@
 apiVersion: 1
 datasources:
  - name: Prometheus
    type: prometheus
    access: proxy
    url: http://prometheus:9090
    isDefault: true
    editable: true
    jsonData:
      timeInterval: 10s
      queryTimeout: 60s
--- a/docker-compose/otel-collector-config.yml
+++ b/docker-compose/otel-collector-config.yml
@@ -0,0 +1,31 @@
 receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317
      http:
        endpoint: 0.0.0.0:4318
 processors:
  batch: 
    send_batch_size: 10000
    timeout: 10s
  metricsgeneration: {} 
 exporters:
  prometheus:
    endpoint: "0.0.0.0:8889"
  debug:
    verbosity: detailed
 service:
  pipelines:
    metrics:
      receivers: [otlp]
      processors: [batch]
      exporters: [prometheus, debug]
    logs:
      receivers: [otlp]
      processors: [batch]
      exporters: [debug] 
--- a/docker-compose/processes-example.toml
+++ b/docker-compose/processes-example.toml
@@ -0,0 +1,67 @@
 # Example process filter configuration file
 # This file can be included from the main bottom config to keep
 # server-specific process lists separate.
 #
 # Usage in bottom-config.toml:
 # [opentelemetry.metrics.process_filter]
 # include = "processes.toml"
 # Filter mode: "whitelist" or "blacklist"
 # - whitelist: Only export metrics for processes in the lists below
 # - blacklist: Export metrics for all processes EXCEPT those in the lists
 filter_mode = "whitelist"
 # Process names to monitor (case-insensitive substring match)
 # Examples for common server processes:
 names = [
    # Web servers
    "nginx",
    "apache",
    "httpd",
    # Databases
    "postgres",
    "mysql",
    "redis",
    "mongodb",
    # Application servers
    "java",
    "node",
    "python",
    # Your custom applications
    # "myapp",
 ]
 # Regex patterns to match process names (case-sensitive)
 # More powerful than simple substring matching
 patterns = [
    # Match specific versions
    # "^nginx-[0-9.]+$",
    # "^node-v[0-9]+",
    # Match Java applications with specific main class
    # "java.*MyApplication",
    # Match processes with specific format
    # "^gunicorn: worker",
    # Match kernel threads (for blacklist)
    # "^\\[.*\\]$",
 ]
 # Specific process PIDs to monitor (optional)
 # Useful for monitoring specific long-running processes
 pids = []
 # Example blacklist configuration:
 # filter_mode = "blacklist"
 # names = [
 #     "systemd",      # Exclude system processes
 #     "kworker",
 #     "migration",
 # ]
 # patterns = [
 #     "^\\[.*\\]$",   # Exclude all kernel threads
 # ]
--- a/docker-compose/prometheus.yml
+++ b/docker-compose/prometheus.yml
@@ -0,0 +1,21 @@
 global:
  scrape_interval: 10s # Quanto spesso fare lo scraping
  evaluation_interval: 10s
 rule_files:
  - /etc/prometheus/rules/*.yml
 scrape_configs:
  # Job 1: Monitora se Prometheus stesso è attivo
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']
  # Job 2: Scrape dell'OpenTelemetry Collector
  - job_name: 'otel-collector'
    # Il Collector espone le metriche per lo scraping sulla sua porta 8889
    metrics_path: '/metrics'
    static_configs:
      # Raggiunge il Collector usando il suo nome di servizio Docker
      - targets: ['otel-collector:8889'] 
--- a/docker-compose/rules/bottom_rules.yml
+++ b/docker-compose/rules/bottom_rules.yml
@@ -0,0 +1,15 @@
 groups:
  - name: bottom_process_metrics
    interval: 30s
    rules:
      - record: system_process_cpu_usage_percent:recent
        expr: |
          system_process_cpu_usage_percent
            and on(pid, name)
            (time() - timestamp(system_process_cpu_usage_percent) < 120)
      - record: system_process_memory_usage_bytes:recent
        expr: |
          system_process_memory_usage_bytes
            and on(pid, name)
            (time() - timestamp(system_process_memory_usage_bytes) < 120)
--- a/docker-compose/symon-config-example.toml
+++ b/docker-compose/symon-config-example.toml
@@ -0,0 +1,61 @@
 # Example Symon configuration file for OpenTelemetry export
 # Copy this file and customize it for your needs
 # Collection interval in seconds
 collection_interval_secs = 5
 # OTLP configuration
 [otlp]
 # OTLP endpoint (gRPC)
 # For local docker-compose setup: http://localhost:4317
 # For remote collector: http://your-collector-host:4317
 endpoint = "http://localhost:4317"
 # Export interval in seconds
 export_interval_secs = 10
 # Service name that will appear in metrics
 service_name = "symon"
 # Service version
 service_version = "0.1.0"
 # Export timeout in seconds
 export_timeout_secs = 30
 # Additional resource attributes (key-value pairs)
 [otlp.resource_attributes]
 environment = "production"
 host = "server-01"
 # Metrics configuration - enable/disable specific metric types
 [metrics]
 cpu = true           # CPU usage per core and average
 memory = true        # RAM, swap usage
 network = true       # Network RX/TX
 disk = true          # Disk usage
 temperature = true   # CPU/GPU temperatures
 processes = true     # Top 10 processes by CPU/Memory
 # Process filtering configuration
 [metrics.process_filter]
 # Option 1: Use an external file for server-specific process lists
 # This allows different servers to monitor different processes
 # Path can be relative to this config file or absolute
 #include = "processes.toml"
 # Option 2: Configure inline
 # Filter mode: "whitelist" (only listed processes) or "blacklist" (exclude listed)
 filter_mode = "whitelist"
 # List of process names to filter (case-insensitive substring match)
 # Examples: ["nginx", "postgres", "redis", "myapp"]
 names = ["nginx", "postgres", "redis"]
 # List of regex patterns to match process names (case-sensitive)
 # More powerful than substring matching
 # Examples: ["^nginx-[0-9.]+$", "java.*MyApp", "^gunicorn: worker"]
 patterns = []
 # List of specific process PIDs to filter
 pids = []
--- a/docker-compose/test-stack.sh
+++ b/docker-compose/test-stack.sh
@@ -0,0 +1,80 @@
 #!/bin/bash
 # Test script to verify the observability stack is running correctly
 set -e
 echo "🔍 Testing Bottom OpenTelemetry Stack..."
 echo ""
 # Colors
 GREEN='\033[0;32m'
 RED='\033[0;31m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 # Test OTEL Collector gRPC endpoint
 echo -n "Testing OTEL Collector gRPC (port 4317)... "
 if nc -zv localhost 4317 2>&1 | grep -q "succeeded\|open"; then
    echo -e "${GREEN}✓ OK${NC}"
 else
    echo -e "${RED}✗ FAILED${NC}"
    exit 1
 fi
 # Test OTEL Collector HTTP endpoint
 echo -n "Testing OTEL Collector HTTP (port 4318)... "
 if nc -zv localhost 4318 2>&1 | grep -q "succeeded\|open"; then
    echo -e "${GREEN}✓ OK${NC}"
 else
    echo -e "${RED}✗ FAILED${NC}"
    exit 1
 fi
 # Test OTEL Collector metrics endpoint
 echo -n "Testing OTEL Collector metrics (port 8889)... "
 if curl -s http://localhost:8889/metrics > /dev/null; then
    echo -e "${GREEN}✓ OK${NC}"
 else
    echo -e "${RED}✗ FAILED${NC}"
    exit 1
 fi
 # Test Prometheus
 echo -n "Testing Prometheus (port 9090)... "
 if curl -s http://localhost:9090/-/healthy | grep -q "Prometheus"; then
    echo -e "${GREEN}✓ OK${NC}"
 else
    echo -e "${RED}✗ FAILED${NC}"
    exit 1
 fi
 # Test Prometheus targets
 echo -n "Testing Prometheus targets... "
 TARGETS=$(curl -s http://localhost:9090/api/v1/targets | grep -o '"health":"up"' | wc -l)
 if [ "$TARGETS" -gt 0 ]; then
    echo -e "${GREEN}✓ OK${NC} (${TARGETS} targets up)"
 else
    echo -e "${YELLOW}⚠ WARNING${NC} (no targets up yet - this is normal if just started)"
 fi
 # Test Grafana
 echo -n "Testing Grafana (port 3000)... "
 if curl -s http://localhost:3000/api/health | grep -q "ok"; then
    echo -e "${GREEN}✓ OK${NC}"
 else
    echo -e "${RED}✗ FAILED${NC}"
    exit 1
 fi
 echo ""
 echo -e "${GREEN}✓ All tests passed!${NC}"
 echo ""
 echo "📊 Access points:"
 echo "   - Prometheus: http://localhost:9090"
 echo "   - Grafana: http://localhost:3000 (admin/admin)"
 echo "   - OTEL Collector metrics: http://localhost:8889/metrics"
 echo ""
 echo "💡 Next steps:"
 echo "   1. Build bottom with: cargo build --release --features opentelemetry"
 echo "   2. Run in headless mode: ./target/release/btm --headless"
 echo "   3. Check metrics in Prometheus: http://localhost:9090/graph"
--- a/src/collector.rs
+++ b/src/collector.rs
@@ -0,0 +1,214 @@
 use crate::config::MetricsConfig;
 use anyhow::Result;
 use sysinfo::{CpuRefreshKind, Disks, Networks, RefreshKind, System};
 /// System metrics collected at a point in time
 #[derive(Debug, Clone)]
 pub struct SystemMetrics {
    pub cpu: Option<Vec<CpuMetric>>,
    pub memory: Option<MemoryMetric>,
    pub network: Option<Vec<NetworkMetric>>,
    pub disk: Option<Vec<DiskMetric>>,
    pub processes: Option<Vec<ProcessMetric>>,
    pub temperature: Option<Vec<TemperatureMetric>>,
 }
 #[derive(Debug, Clone)]
 pub struct CpuMetric {
    pub core_index: usize,
    pub usage_percent: f32,
 }
 #[derive(Debug, Clone)]
 pub struct MemoryMetric {
    pub used_bytes: u64,
    pub total_bytes: u64,
    pub swap_used_bytes: u64,
    pub swap_total_bytes: u64,
 }
 #[derive(Debug, Clone)]
 pub struct NetworkMetric {
    pub interface_name: String,
    pub rx_bytes_total: u64,
    pub tx_bytes_total: u64,
 }
 #[derive(Debug, Clone)]
 pub struct DiskMetric {
    pub device_name: String,
    pub mount_point: String,
    pub used_bytes: u64,
    pub total_bytes: u64,
 }
 #[derive(Debug, Clone)]
 pub struct ProcessMetric {
    pub pid: u32,
    pub name: String,
    pub cpu_usage_percent: f32,
    pub memory_bytes: u64,
 }
 #[derive(Debug, Clone)]
 pub struct TemperatureMetric {
    pub sensor_name: String,
    pub temperature_celsius: f32,
 }
 /// Collector for system metrics
 pub struct MetricsCollector {
    system: System,
    networks: Networks,
    disks: Disks,
    config: MetricsConfig,
 }
 impl MetricsCollector {
    pub fn new(config: MetricsConfig) -> Self {
        let refresh_kind = RefreshKind::new()
            .with_cpu(CpuRefreshKind::everything())
            .with_memory(sysinfo::MemoryRefreshKind::everything())
            .with_processes(sysinfo::ProcessRefreshKind::everything());
        Self {
            system: System::new_with_specifics(refresh_kind),
            networks: Networks::new_with_refreshed_list(),
            disks: Disks::new_with_refreshed_list(),
            config,
        }
    }
    /// Collect all enabled metrics
    pub fn collect(&mut self) -> Result<SystemMetrics> {
        // Refresh system info
        self.system.refresh_all();
        self.networks.refresh();
        Ok(SystemMetrics {
            cpu: if self.config.cpu {
                Some(self.collect_cpu())
            } else {
                None
            },
            memory: if self.config.memory {
                Some(self.collect_memory())
            } else {
                None
            },
            network: if self.config.network {
                Some(self.collect_network())
            } else {
                None
            },
            disk: if self.config.disk {
                Some(self.collect_disk())
            } else {
                None
            },
            processes: if self.config.processes {
                Some(self.collect_processes())
            } else {
                None
            },
            temperature: if self.config.temperature {
                Some(self.collect_temperature())
            } else {
                None
            },
        })
    }
    fn collect_cpu(&self) -> Vec<CpuMetric> {
        self.system
            .cpus()
            .iter()
            .enumerate()
            .map(|(index, cpu)| CpuMetric {
                core_index: index,
                usage_percent: cpu.cpu_usage(),
            })
            .collect()
    }
    fn collect_memory(&self) -> MemoryMetric {
        MemoryMetric {
            used_bytes: self.system.used_memory(),
            total_bytes: self.system.total_memory(),
            swap_used_bytes: self.system.used_swap(),
            swap_total_bytes: self.system.total_swap(),
        }
    }
    fn collect_network(&self) -> Vec<NetworkMetric> {
        self.networks
            .iter()
            .map(|(interface_name, data)| NetworkMetric {
                interface_name: interface_name.to_string(),
                rx_bytes_total: data.total_received(),
                tx_bytes_total: data.total_transmitted(),
            })
            .collect()
    }
    fn collect_disk(&self) -> Vec<DiskMetric> {
        self.disks
            .iter()
            .filter_map(|disk| {
                let total_bytes = disk.total_space();
                let available_bytes = disk.available_space();
                let used_bytes = total_bytes.saturating_sub(available_bytes);
                Some(DiskMetric {
                    device_name: disk.name().to_string_lossy().to_string(),
                    mount_point: disk.mount_point().to_string_lossy().to_string(),
                    used_bytes,
                    total_bytes,
                })
            })
            .collect()
    }
    fn collect_processes(&self) -> Vec<ProcessMetric> {
        let filter = self.config.process_filter.as_ref();
        let mut processes: Vec<ProcessMetric> = self
            .system
            .processes()
            .iter()
            .filter(|(_, process)| {
                if let Some(filter_config) = filter {
                    filter_config.should_include_process(
                        process.name().to_string_lossy().as_ref(),
                        process.pid().as_u32(),
                    )
                } else {
                    true
                }
            })
            .map(|(_, process)| ProcessMetric {
                pid: process.pid().as_u32(),
                name: process.name().to_string_lossy().to_string(),
                cpu_usage_percent: process.cpu_usage(),
                memory_bytes: process.memory(),
            })
            .collect();
        // Sort by CPU usage and limit to top 10
        processes.sort_by(|a, b| {
            b.cpu_usage_percent
                .partial_cmp(&a.cpu_usage_percent)
                .unwrap_or(std::cmp::Ordering::Equal)
        });
        processes.truncate(10);
        processes
    }
    fn collect_temperature(&self) -> Vec<TemperatureMetric> {
        // sysinfo doesn't have direct temperature support in 0.31
        // This would require platform-specific implementation or additional crates
        // For now, return empty vector
        vec![]
    }
 }
--- a/src/config.rs
+++ b/src/config.rs
@@ -0,0 +1,347 @@
 use anyhow::{Context, Result};
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
 use std::time::Duration;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Config {
    /// OpenTelemetry export configuration
    #[serde(default)]
    pub otlp: OtlpConfig,
    /// Metrics collection configuration
    #[serde(default)]
    pub metrics: MetricsConfig,
    /// Collection interval
    #[serde(default = "default_collection_interval")]
    pub collection_interval_secs: u64,
 }
 impl Default for Config {
    fn default() -> Self {
        Self {
            otlp: OtlpConfig::default(),
            metrics: MetricsConfig::default(),
            collection_interval_secs: default_collection_interval(),
        }
    }
 }
 impl Config {
    /// Load configuration from file
    pub fn from_file(path: &Path) -> Result<Self> {
        let content = std::fs::read_to_string(path)
            .with_context(|| format!("Failed to read config file: {}", path.display()))?;
        let mut config: Config = toml::from_str(&content)
            .with_context(|| format!("Failed to parse config file: {}", path.display()))?;
        // Load process filter includes if configured
        if let Some(process_filter) = &config.metrics.process_filter {
            let config_dir = path.parent();
            match process_filter.load_with_includes(config_dir) {
                Ok(loaded_filter) => {
                    config.metrics.process_filter = Some(loaded_filter);
                }
                Err(e) => {
                    tracing::warn!("Failed to load process filter include: {}", e);
                }
            }
        }
        config.validate()?;
        Ok(config)
    }
    /// Validate configuration
    pub fn validate(&self) -> Result<()> {
        if self.collection_interval_secs == 0 {
            anyhow::bail!("Collection interval must be greater than 0");
        }
        self.otlp.validate()?;
        Ok(())
    }
    pub fn collection_interval(&self) -> Duration {
        Duration::from_secs(self.collection_interval_secs)
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct OtlpConfig {
    /// OTLP endpoint (e.g., "http://localhost:4317")
    #[serde(default = "default_endpoint")]
    pub endpoint: String,
    /// Export interval in seconds
    #[serde(default = "default_export_interval")]
    pub export_interval_secs: u64,
    /// Service name for the metrics
    #[serde(default = "default_service_name")]
    pub service_name: String,
    /// Service version
    #[serde(default = "default_service_version")]
    pub service_version: String,
    /// Additional resource attributes
    #[serde(default)]
    pub resource_attributes: std::collections::HashMap<String, String>,
    /// Timeout for export operations in seconds
    #[serde(default = "default_timeout")]
    pub export_timeout_secs: u64,
 }
 impl Default for OtlpConfig {
    fn default() -> Self {
        Self {
            endpoint: default_endpoint(),
            export_interval_secs: default_export_interval(),
            service_name: default_service_name(),
            service_version: default_service_version(),
            resource_attributes: std::collections::HashMap::new(),
            export_timeout_secs: default_timeout(),
        }
    }
 }
 impl OtlpConfig {
    pub fn export_interval(&self) -> Duration {
        Duration::from_secs(self.export_interval_secs)
    }
    pub fn export_timeout(&self) -> Duration {
        Duration::from_secs(self.export_timeout_secs)
    }
    pub fn validate(&self) -> Result<()> {
        if self.endpoint.is_empty() {
            anyhow::bail!("OTLP endpoint cannot be empty");
        }
        if !self.endpoint.starts_with("http://") && !self.endpoint.starts_with("https://") {
            anyhow::bail!("OTLP endpoint must be a valid HTTP/HTTPS URL");
        }
        if self.export_interval_secs == 0 {
            anyhow::bail!("Export interval must be greater than 0");
        }
        if self.service_name.is_empty() {
            anyhow::bail!("Service name cannot be empty");
        }
        Ok(())
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MetricsConfig {
    /// Export CPU metrics
    #[serde(default = "default_true")]
    pub cpu: bool,
    /// Export memory metrics
    #[serde(default = "default_true")]
    pub memory: bool,
    /// Export network metrics
    #[serde(default = "default_true")]
    pub network: bool,
    /// Export disk metrics
    #[serde(default = "default_true")]
    pub disk: bool,
    /// Export process metrics
    #[serde(default)]
    pub processes: bool,
    /// Export temperature metrics
    #[serde(default = "default_true")]
    pub temperature: bool,
    /// Process filter configuration
    #[serde(default)]
    pub process_filter: Option<ProcessFilterConfig>,
 }
 impl Default for MetricsConfig {
    fn default() -> Self {
        Self {
            cpu: true,
            memory: true,
            network: true,
            disk: true,
            processes: false,
            temperature: true,
            process_filter: None,
        }
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ProcessFilterConfig {
    /// Path to external file containing process filter (optional)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub include: Option<PathBuf>,
    /// Filter mode: "whitelist" or "blacklist"
    #[serde(skip_serializing_if = "Option::is_none")]
    pub filter_mode: Option<ProcessFilterMode>,
    /// List of process names to filter (case-insensitive substring match)
    #[serde(default)]
    pub names: Vec<String>,
    /// List of regex patterns to match process names
    #[serde(default)]
    pub patterns: Vec<String>,
    /// List of process PIDs to filter
    #[serde(default)]
    pub pids: Vec<u32>,
    /// Compiled regex patterns (not serialized, built at runtime)
    #[serde(skip)]
    compiled_patterns: Option<Vec<Regex>>,
 }
 impl ProcessFilterConfig {
    /// Load and merge process filter from include file if specified
    pub fn load_with_includes(& self, config_dir: Option<&Path>) -> Result<Self> {
        if let Some(include_path) = &self.include {
            // Resolve path relative to config directory if provided
            let full_path = if include_path.is_absolute() {
                include_path.clone()
            } else if let Some(dir) = config_dir {
                dir.join(include_path)
            } else {
                include_path.clone()
            };
            // Read and parse the included file
            let content = std::fs::read_to_string(&full_path)
                .with_context(|| format!("Failed to read process filter file: {}", full_path.display()))?;
            let included: ProcessFilterConfig = toml::from_str(&content)
                .with_context(|| format!("Failed to parse process filter file: {}", full_path.display()))?;
            // Merge: included file takes precedence
            let mut merged = Self {
                include: None,
                filter_mode: included.filter_mode.or(self.filter_mode),
                names: if included.names.is_empty() {
                    self.names.clone()
                } else {
                    included.names
                },
                patterns: if included.patterns.is_empty() {
                    self.patterns.clone()
                } else {
                    included.patterns
                },
                pids: if included.pids.is_empty() {
                    self.pids.clone()
                } else {
                    included.pids
                },
                compiled_patterns: None,
            };
            merged.compile_patterns()?;
            Ok(merged)
        } else {
            let mut result = self.clone();
            result.compile_patterns()?;
            Ok(result)
        }
    }
    /// Compile regex patterns from strings
    fn compile_patterns(&mut self) -> Result<()> {
        if self.patterns.is_empty() {
            self.compiled_patterns = None;
            return Ok(());
        }
        let mut compiled = Vec::new();
        for pattern in &self.patterns {
            let regex = Regex::new(pattern)
                .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
            compiled.push(regex);
        }
        self.compiled_patterns = Some(compiled);
        Ok(())
    }
    /// Check if a process should be included based on filter configuration
    pub fn should_include_process(&self, process_name: &str, process_pid: u32) -> bool {
        let filter_mode = match &self.filter_mode {
            Some(mode) => mode,
            None => return true,
        };
        // Check if process matches the filter lists
        let matches_name = self
            .names
            .iter()
            .any(|name| process_name.to_lowercase().contains(&name.to_lowercase()));
        let matches_pattern = if let Some(patterns) = &self.compiled_patterns {
            patterns.iter().any(|regex| regex.is_match(process_name))
        } else {
            false
        };
        let matches_pid = self.pids.contains(&process_pid);
        let matches = matches_name || matches_pattern || matches_pid;
        match filter_mode {
            ProcessFilterMode::Whitelist => matches,
            ProcessFilterMode::Blacklist => !matches,
        }
    }
 }
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ProcessFilterMode {
    Whitelist,
    Blacklist,
 }
 // Default functions
 fn default_endpoint() -> String {
    "http://localhost:4317".to_string()
 }
 fn default_export_interval() -> u64 {
    10
 }
 fn default_collection_interval() -> u64 {
    5
 }
 fn default_service_name() -> String {
    "symon".to_string()
 }
 fn default_service_version() -> String {
    env!("CARGO_PKG_VERSION").to_string()
 }
 fn default_timeout() -> u64 {
    30
 }
 fn default_true() -> bool {
    true
 }
--- a/src/exporter.rs
+++ b/src/exporter.rs
@@ -0,0 +1,196 @@
 use crate::collector::SystemMetrics;
 use crate::config::OtlpConfig;
 use anyhow::{Context, Result};
 use opentelemetry::metrics::MeterProvider;
 use opentelemetry::KeyValue;
 use opentelemetry_otlp::WithExportConfig;
 use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider};
 use opentelemetry_sdk::Resource;
 pub struct MetricsExporter {
    meter_provider: SdkMeterProvider,
    gauges: MetricInstruments,
 }
 struct MetricInstruments {
    cpu_usage: opentelemetry::metrics::Gauge<f64>,
    memory_usage: opentelemetry::metrics::Gauge<u64>,
    memory_total: opentelemetry::metrics::Gauge<u64>,
    swap_usage: opentelemetry::metrics::Gauge<u64>,
    swap_total: opentelemetry::metrics::Gauge<u64>,
    network_rx: opentelemetry::metrics::Counter<u64>,
    network_tx: opentelemetry::metrics::Counter<u64>,
    disk_usage: opentelemetry::metrics::Gauge<u64>,
    disk_total: opentelemetry::metrics::Gauge<u64>,
    process_cpu: opentelemetry::metrics::Gauge<f64>,
    process_memory: opentelemetry::metrics::Gauge<u64>,
    temperature: opentelemetry::metrics::Gauge<f64>,
 }
 impl MetricsExporter {
    pub async fn new(config: &OtlpConfig) -> Result<Self> {
        // Build resource with service information
        let mut resource_kvs = vec![
            KeyValue::new("service.name", config.service_name.clone()),
            KeyValue::new("service.version", config.service_version.clone()),
        ];
        // Add custom resource attributes
        for (key, value) in &config.resource_attributes {
            resource_kvs.push(KeyValue::new(key.clone(), value.clone()));
        }
        let resource = Resource::new(resource_kvs);
        // Build OTLP exporter using new pipeline API
        let exporter = opentelemetry_otlp::new_exporter()
            .tonic()
            .with_endpoint(&config.endpoint)
            .with_timeout(config.export_timeout())
            .build_metrics_exporter(
                Box::new(opentelemetry_sdk::metrics::reader::DefaultTemporalitySelector::default())
            )
            .context("Failed to build OTLP metrics exporter")?;
        // Build meter provider
        let reader = PeriodicReader::builder(exporter, opentelemetry_sdk::runtime::Tokio)
            .with_interval(config.export_interval())
            .build();
        let meter_provider = SdkMeterProvider::builder()
            .with_reader(reader)
            .with_resource(resource)
            .build();
        // Create meter and instruments
        let meter = meter_provider.meter("symon");
        let gauges = MetricInstruments {
            cpu_usage: meter
                .f64_gauge("system_cpu_usage_percent")
                .with_description("CPU usage percentage per core")
                .init(),
            memory_usage: meter
                .u64_gauge("system_memory_usage_bytes")
                .with_description("Memory usage in bytes")
                .init(),
            memory_total: meter
                .u64_gauge("system_memory_total_bytes")
                .with_description("Total memory in bytes")
                .init(),
            swap_usage: meter
                .u64_gauge("system_swap_usage_bytes")
                .with_description("Swap usage in bytes")
                .init(),
            swap_total: meter
                .u64_gauge("system_swap_total_bytes")
                .with_description("Total swap in bytes")
                .init(),
            network_rx: meter
                .u64_counter("system_network_rx_bytes_total")
                .with_description("Total bytes received")
                .init(),
            network_tx: meter
                .u64_counter("system_network_tx_bytes_total")
                .with_description("Total bytes transmitted")
                .init(),
            disk_usage: meter
                .u64_gauge("system_disk_usage_bytes")
                .with_description("Disk usage in bytes")
                .init(),
            disk_total: meter
                .u64_gauge("system_disk_total_bytes")
                .with_description("Total disk space in bytes")
                .init(),
            process_cpu: meter
                .f64_gauge("system_process_cpu_usage_percent")
                .with_description("Process CPU usage percentage")
                .init(),
            process_memory: meter
                .u64_gauge("system_process_memory_usage_bytes")
                .with_description("Process memory usage in bytes")
                .init(),
            temperature: meter
                .f64_gauge("system_temperature_celsius")
                .with_description("Temperature in Celsius")
                .init(),
        };
        Ok(Self {
            meter_provider,
            gauges,
        })
    }
    pub fn export(&self, metrics: &SystemMetrics) {
        // Export CPU metrics
        if let Some(cpu_metrics) = &metrics.cpu {
            for cpu in cpu_metrics {
                self.gauges.cpu_usage.record(
                    cpu.usage_percent as f64,
                    &[KeyValue::new("cpu_id", cpu.core_index as i64)],
                );
            }
        }
        // Export memory metrics
        if let Some(memory) = &metrics.memory {
            self.gauges.memory_usage.record(memory.used_bytes, &[]);
            self.gauges.memory_total.record(memory.total_bytes, &[]);
            self.gauges.swap_usage.record(memory.swap_used_bytes, &[]);
            self.gauges.swap_total.record(memory.swap_total_bytes, &[]);
        }
        // Export network metrics
        if let Some(network_metrics) = &metrics.network {
            for net in network_metrics {
                let attrs = &[KeyValue::new("interface", net.interface_name.clone())];
                self.gauges.network_rx.add(net.rx_bytes_total, attrs);
                self.gauges.network_tx.add(net.tx_bytes_total, attrs);
            }
        }
        // Export disk metrics
        if let Some(disk_metrics) = &metrics.disk {
            for disk in disk_metrics {
                let attrs = &[
                    KeyValue::new("device", disk.device_name.clone()),
                    KeyValue::new("mount", disk.mount_point.clone()),
                ];
                self.gauges.disk_usage.record(disk.used_bytes, attrs);
                self.gauges.disk_total.record(disk.total_bytes, attrs);
            }
        }
        // Export process metrics
        if let Some(process_metrics) = &metrics.processes {
            for process in process_metrics {
                let attrs = &[
                    KeyValue::new("pid", process.pid as i64),
                    KeyValue::new("name", process.name.clone()),
                ];
                self.gauges
                    .process_cpu
                    .record(process.cpu_usage_percent as f64, attrs);
                self.gauges.process_memory.record(process.memory_bytes, attrs);
            }
        }
        // Export temperature metrics
        if let Some(temp_metrics) = &metrics.temperature {
            for temp in temp_metrics {
                self.gauges.temperature.record(
                    temp.temperature_celsius as f64,
                    &[KeyValue::new("sensor", temp.sensor_name.clone())],
                );
            }
        }
    }
    pub async fn shutdown(self) -> Result<()> {
        self.meter_provider
            .shutdown()
            .context("Failed to shutdown meter provider")?;
        Ok(())
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,108 @@
 mod collector;
 mod config;
 mod exporter;
 use anyhow::{Context, Result};
 use clap::Parser;
 use collector::MetricsCollector;
 use config::Config;
 use exporter::MetricsExporter;
 use std::path::PathBuf;
 use tokio::signal;
 use tokio::time::interval;
 use tracing::{error, info};
 #[derive(Parser, Debug)]
 #[command(name = "symon")]
 #[command(about = "Lightweight system metrics exporter for OpenTelemetry", long_about = None)]
 #[command(version)]
 struct Args {
    /// Path to configuration file
    #[arg(short, long, value_name = "FILE")]
    config: Option<PathBuf>,
    /// Log level (trace, debug, info, warn, error)
    #[arg(short, long, default_value = "info")]
    log_level: String,
 }
 #[tokio::main]
 async fn main() -> Result<()> {
    let args = Args::parse();
    // Initialize tracing
    let log_level = args.log_level.parse().unwrap_or(tracing::Level::INFO);
    tracing_subscriber::fmt()
        .with_max_level(log_level)
        .with_target(false)
        .init();
    info!("Starting symon v{}", env!("CARGO_PKG_VERSION"));
    // Load configuration
    let config_path = args
        .config
        .or_else(find_default_config)
        .context("No configuration file specified and no default config found")?;
    info!("Loading configuration from: {}", config_path.display());
    let config = Config::from_file(&config_path)?;
    info!(
        "OTLP endpoint: {}, export interval: {}s, collection interval: {}s",
        config.otlp.endpoint, config.otlp.export_interval_secs, config.collection_interval_secs
    );
    // Initialize metrics collector
    let mut collector = MetricsCollector::new(config.metrics.clone());
    // Initialize OTLP exporter
    info!("Initializing OTLP exporter...");
    let exporter = MetricsExporter::new(&config.otlp)
        .await
        .context("Failed to initialize OTLP exporter")?;
    info!("Symon initialized successfully");
    info!("Press Ctrl+C to stop");
    // Main collection loop
    let mut tick_interval = interval(config.collection_interval());
    let mut shutdown = Box::pin(signal::ctrl_c());
    loop {
        tokio::select! {
            _ = tick_interval.tick() => {
                match collector.collect() {
                    Ok(metrics) => {
                        exporter.export(&metrics);
                        tracing::debug!("Metrics collected and exported");
                    }
                    Err(e) => {
                        error!("Failed to collect metrics: {}", e);
                    }
                }
            }
            _ = &mut shutdown => {
                info!("Shutdown signal received");
                break;
            }
        }
    }
    // Shutdown gracefully
    info!("Shutting down...");
    exporter.shutdown().await?;
    info!("Symon stopped");
    Ok(())
 }
 fn find_default_config() -> Option<PathBuf> {
    let candidates = vec![
        PathBuf::from("symon.toml"),
        PathBuf::from("/etc/symon/symon.toml"),
        PathBuf::from("config.toml"),
    ];
    candidates.into_iter().find(|p| p.exists())
 }
--- a/symon.toml
+++ b/symon.toml
@@ -0,0 +1,76 @@
 # Symon Configuration File
 # Lightweight system metrics exporter for OpenTelemetry
 # Collection interval in seconds
 # How often to collect system metrics
 collection_interval_secs = 5
 # OTLP configuration
 [otlp]
 # OTLP endpoint (gRPC)
 endpoint = "http://localhost:4317"
 # Export interval in seconds
 # How often to export metrics to OTLP collector
 export_interval_secs = 10
 # Service name that will appear in metrics
 service_name = "symon"
 # Service version
 service_version = "0.1.0"
 # Export timeout in seconds
 export_timeout_secs = 30
 # Additional resource attributes (key-value pairs)
 [otlp.resource_attributes]
 environment = "production"
 # host = "server-01"
 # datacenter = "us-east-1"
 # Metrics configuration - enable/disable specific metric types
 [metrics]
 cpu = true           # CPU usage per core
 memory = true        # RAM and swap usage
 network = true       # Network RX/TX
 disk = true          # Disk usage
 processes = false    # Top 10 processes (disabled by default - can generate high cardinality)
 temperature = true   # System temperatures (if available)
 # Process filtering configuration
 # Only used when processes = true
 [metrics.process_filter]
 # Option 1: Use an external file for server-specific process lists
 # include = "processes.toml"
 # Option 2: Configure inline
 # Filter mode: "whitelist" (only listed processes) or "blacklist" (exclude listed)
 filter_mode = "whitelist"
 # List of process names to filter (case-insensitive substring match)
 names = [
    # Web servers
    "nginx",
    "apache",
    # Databases
    "postgres",
    "mysql",
    "redis",
    # Application servers
    # "java",
    # "node",
    # "python",
 ]
 # List of regex patterns to match process names (case-sensitive)
 patterns = [
    # Example: Match specific versions
    # "^nginx-[0-9.]+$",
    # "^node-v[0-9]+",
 ]
 # List of specific process PIDs to filter
 pids = []