init

2025-11-07 19:13:23 +01:00
commit ccf1ee5197
21 changed files with 3939 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,44 @@
+[package]
+name = "symon"
+version = "0.1.0"
+edition = "2021"
+authors = ["Alex"]
+description = "Lightweight system metrics exporter for OpenTelemetry"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/battilo/symon"
+
+[dependencies]
+# OpenTelemetry
+opentelemetry = { version = "0.26", features = ["metrics"] }
+opentelemetry-otlp = { version = "0.26", features = ["metrics", "grpc-tonic"] }
+opentelemetry_sdk = { version = "0.26", features = ["metrics", "rt-tokio"] }
+opentelemetry-semantic-conventions = "0.26"
+
+# Async runtime
+tokio = { version = "1.48", features = ["rt-multi-thread", "macros", "sync", "time", "signal"] }
+tonic = "0.11"
+
+# System metrics collection
+sysinfo = "0.31"
+
+# Configuration
+serde = { version = "1.0", features = ["derive"] }
+toml = "0.8"
+
+# Logging and error handling
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Process filtering
+regex = "1.11"
+
+# CLI
+clap = { version = "4.5", features = ["derive"] }
+
+[profile.release]
+opt-level = 3
+lto = true
+codegen-units = 1
+strip = true
--- a/docker-compose/METRICS.md
+++ b/docker-compose/METRICS.md
@@ -0,0 +1,227 @@
+# Bottom OpenTelemetry Metrics Reference
+
+This document lists all metrics exported by Bottom when running with the `opentelemetry` feature enabled.
+
+## System Metrics
+
+### CPU
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_cpu_usage_percent` | Gauge | `cpu_id` | CPU usage percentage per core |
+
+**Example:**
+```promql
+# Average CPU across all cores
+avg(system_cpu_usage_percent)
+
+# CPU usage for core 0
+system_cpu_usage_percent{cpu_id="0"}
+```
+
+### Memory
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_memory_usage_bytes` | Gauge | - | RAM memory currently in use |
+| `system_memory_total_bytes` | Gauge | - | Total RAM memory available |
+| `system_swap_usage_bytes` | Gauge | - | Swap memory currently in use |
+| `system_swap_total_bytes` | Gauge | - | Total swap memory available |
+
+**Example:**
+```promql
+# Memory usage percentage
+(system_memory_usage_bytes / system_memory_total_bytes) * 100
+
+# Available memory
+system_memory_total_bytes - system_memory_usage_bytes
+```
+
+### Network
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_network_rx_bytes_rate` | Gauge | `interface` | Network receive rate in bytes/sec |
+| `system_network_tx_bytes_rate` | Gauge | `interface` | Network transmit rate in bytes/sec |
+
+**Example:**
+```promql
+# Total network throughput
+sum(system_network_rx_bytes_rate) + sum(system_network_tx_bytes_rate)
+
+# RX rate for specific interface
+system_network_rx_bytes_rate{interface="eth0"}
+```
+
+### Disk
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_disk_usage_bytes` | Gauge | `device`, `mount` | Disk space currently in use |
+| `system_disk_total_bytes` | Gauge | `device`, `mount` | Total disk space available |
+
+**Example:**
+```promql
+# Disk usage percentage
+(system_disk_usage_bytes / system_disk_total_bytes) * 100
+
+# Free disk space
+system_disk_total_bytes - system_disk_usage_bytes
+```
+
+### Temperature
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_temperature_celsius` | Gauge | `sensor` | Temperature readings in Celsius |
+
+**Example:**
+```promql
+# Average temperature across all sensors
+avg(system_temperature_celsius)
+
+# Maximum temperature
+max(system_temperature_celsius)
+```
+
+## Process Metrics
+
+| Metric Name | Type | Labels | Description |
+|------------|------|--------|-------------|
+| `system_process_cpu_usage_percent` | Gauge | `name`, `pid` | CPU usage percentage per process |
+| `system_process_memory_usage_bytes` | Gauge | `name`, `pid` | Memory usage in bytes per process |
+| `system_process_count` | Gauge | - | Total number of processes |
+
+**Example:**
+```promql
+# Top 10 processes by CPU
+topk(10, system_process_cpu_usage_percent)
+
+# Top 10 processes by memory
+topk(10, system_process_memory_usage_bytes)
+
+# Total memory used by all Chrome processes
+sum(system_process_memory_usage_bytes{name=~".*chrome.*"})
+```
+
+## Recording Rules
+
+The following recording rules are pre-configured in Prometheus (see `rules/bottom_rules.yml`):
+
+| Rule Name | Expression | Description |
+|-----------|------------|-------------|
+| `system_process_cpu_usage_percent:recent` | Recent process CPU metrics | Filters out stale process data (>2 min old) |
+| `system_process_memory_usage_bytes:recent` | Recent process memory metrics | Filters out stale process data (>2 min old) |
+
+**Example:**
+```promql
+# Query only recent process data
+topk(10, system_process_cpu_usage_percent:recent)
+```
+
+## Common Queries
+
+### System Health
+
+```promql
+# Overall system CPU usage
+avg(system_cpu_usage_percent)
+
+# Memory pressure (>80% is high)
+(system_memory_usage_bytes / system_memory_total_bytes) * 100
+
+# Disk pressure (>90% is critical)
+(system_disk_usage_bytes / system_disk_total_bytes) * 100
+```
+
+### Resource Hogs
+
+```promql
+# Top CPU consumers
+topk(5, system_process_cpu_usage_percent)
+
+# Top memory consumers
+topk(5, system_process_memory_usage_bytes)
+
+# Processes using >1GB memory
+system_process_memory_usage_bytes > 1073741824
+```
+
+### Network Analysis
+
+```promql
+# Total network traffic (RX + TX)
+sum(system_network_rx_bytes_rate) + sum(system_network_tx_bytes_rate)
+
+# Network traffic by interface
+sum by (interface) (system_network_rx_bytes_rate + system_network_tx_bytes_rate)
+
+# Interfaces with high RX rate (>10MB/s)
+system_network_rx_bytes_rate > 10485760
+```
+
+## Alerting Examples
+
+### Sample Prometheus Alert Rules
+
+```yaml
+groups:
+  - name: bottom_alerts
+    interval: 30s
+    rules:
+      - alert: HighCPUUsage
+        expr: avg(system_cpu_usage_percent) > 80
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High CPU usage detected"
+          description: "Average CPU usage is {{ $value }}%"
+
+      - alert: HighMemoryUsage
+        expr: (system_memory_usage_bytes / system_memory_total_bytes) * 100 > 90
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory usage detected"
+          description: "Memory usage is {{ $value }}%"
+
+      - alert: DiskAlmostFull
+        expr: (system_disk_usage_bytes / system_disk_total_bytes) * 100 > 90
+        for: 10m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Disk {{ $labels.mount }} almost full"
+          description: "Disk usage is {{ $value }}% on {{ $labels.mount }}"
+```
+
+## Label Reference
+
+| Label | Used In | Description |
+|-------|---------|-------------|
+| `cpu_id` | CPU metrics | CPU core identifier (0, 1, 2, ...) |
+| `interface` | Network metrics | Network interface name (eth0, wlan0, ...) |
+| `device` | Disk metrics | Device name (/dev/sda1, ...) |
+| `mount` | Disk metrics | Mount point (/, /home, ...) |
+| `sensor` | Temperature | Temperature sensor name |
+| `name` | Process metrics | Process name |
+| `pid` | Process metrics | Process ID |
+| `exported_job` | All | Always "bottom-system-monitor" |
+| `otel_scope_name` | All | Always "bottom-system-monitor" |
+
+## Data Retention
+
+By default, Prometheus stores metrics for 15 days. You can adjust this in the Prometheus configuration:
+
+```yaml
+# In prometheus.yml
+global:
+  retention_time: 30d  # Keep data for 30 days
+```
+
+For long-term storage, consider using:
+- **TimescaleDB** (see `docker-compose-timescale.yml.ko`)
+- **Thanos** for multi-cluster metrics
+- **Cortex** for horizontally scalable storage
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -0,0 +1,195 @@
+# Bottom OpenTelemetry Docker Compose Setup
+
+This directory contains a Docker Compose setup for running an observability stack to monitor Bottom with OpenTelemetry.
+
+## Architecture
+
+The stack includes:
+
+1. **OpenTelemetry Collector** - Receives metrics from Bottom via OTLP protocol
+2. **Prometheus** - Scrapes and stores metrics from the OTEL Collector
+3. **Grafana** - Visualizes metrics from Prometheus
+
+```
+Bottom (with --headless flag)
+    ↓ (OTLP/gRPC on port 4317)
+OpenTelemetry Collector
+    ↓ (Prometheus scrape on port 8889)
+Prometheus
+    ↓ (Query on port 9090)
+Grafana (accessible on port 3000)
+```
+
+## Quick Start
+
+### 1. Start the observability stack
+
+```bash
+cd docker-compose
+docker-compose up -d
+```
+
+This will start:
+- OpenTelemetry Collector on ports 4317 (gRPC), 4318 (HTTP), 8889 (metrics)
+- Prometheus on port 9090
+- Grafana on port 3000
+
+### 2. Build Bottom with OpenTelemetry support
+
+```bash
+cd ..
+cargo build --release --features opentelemetry
+```
+
+### 3. Create a configuration file
+
+Create a `bottom-config.toml` file:
+
+```toml
+[opentelemetry]
+enabled = true
+endpoint = "http://localhost:4317"
+service_name = "bottom-system-monitor"
+export_interval_ms = 5000
+
+[opentelemetry.metrics]
+cpu = true
+memory = true
+network = true
+disk = true
+processes = true
+temperature = true
+gpu = true
+```
+
+### 4. Run Bottom in headless mode
+
+```bash
+./target/release/btm --config bottom-config.toml --headless
+```
+
+Or without config file:
+
+```bash
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 \
+./target/release/btm --headless
+```
+
+### 5. Access the dashboards
+
+- **Prometheus**: http://localhost:9090
+- **Grafana**: http://localhost:3000 (username: `admin`, password: `admin`)
+
+## Configuration Files
+
+### otel-collector-config.yml
+
+Configures the OpenTelemetry Collector to:
+- Receive OTLP data on ports 4317 (gRPC) and 4318 (HTTP)
+- Export metrics in Prometheus format on port 9090
+- Debug log all received data
+
+### prometheus.yml
+
+Configures Prometheus to:
+- Scrape metrics from the OTEL Collector every 10 seconds
+- Load alerting rules from `rules/bottom_rules.yml`
+
+### rules/bottom_rules.yml
+
+Contains Prometheus recording rules for Bottom metrics, including:
+- Recent process CPU usage metrics
+- Recent process memory usage metrics
+
+## Viewing Metrics in Prometheus
+
+1. Go to http://localhost:9090
+2. Click on "Graph"
+3. Try these example queries:
+
+```promql
+# CPU usage by core
+system_cpu_usage_percent
+
+# Memory usage
+system_memory_usage_bytes
+
+# Network RX/TX
+system_network_rx_bytes
+system_network_tx_bytes
+
+# Disk usage
+system_disk_usage_bytes
+
+# Top processes by CPU
+topk(10, system_process_cpu_usage_percent)
+
+# Top processes by memory
+topk(10, system_process_memory_usage_bytes)
+```
+
+## Grafana Configuration
+
+Grafana is automatically configured with:
+- **Prometheus data source** (http://prometheus:9090) - pre-configured
+- **Bottom System Overview dashboard** - pre-loaded
+
+To access:
+1. Go to http://localhost:3000 (username: `admin`, password: `admin`)
+2. Navigate to Dashboards → Browse → "Bottom System Overview"
+
+The dashboard includes:
+- CPU usage by core
+- Memory usage (RAM/Swap)
+- Network traffic
+- Disk usage
+- Top 10 processes by CPU
+- Top 10 processes by Memory
+
+## Stopping the Stack
+
+```bash
+docker-compose down
+```
+
+To also remove volumes:
+
+```bash
+docker-compose down -v
+```
+
+## Troubleshooting
+
+### Bottom not sending metrics
+
+Check the OTEL Collector logs:
+```bash
+docker-compose logs -f otel-collector
+```
+
+You should see messages about receiving metrics.
+
+### Prometheus not scraping
+
+1. Check Prometheus targets at http://localhost:9090/targets
+2. The `otel-collector` target should be UP
+
+### No data in Grafana
+
+1. Verify Prometheus data source is configured correctly
+2. Check that Prometheus has data by querying directly
+3. Ensure your time range in Grafana includes when Bottom was running
+
+## Advanced Configuration
+
+### Using with TimescaleDB (optional)
+
+A TimescaleDB configuration file is available as `docker-compose-timescale.yml.ko` for long-term storage of metrics. Rename it to include it in your stack.
+
+### Custom Prometheus Rules
+
+Edit `rules/bottom_rules.yml` to add custom recording or alerting rules.
+
+### OTEL Collector Sampling
+
+Edit `otel-collector-config.yml` to adjust the batch processor settings for different performance characteristics.
--- a/docker-compose/docker-compose-timescale.yml.ko
+++ b/docker-compose/docker-compose-timescale.yml.ko
@@ -0,0 +1,61 @@
+services:
+  timescaledb:
+    image: timescale/timescaledb-ha:pg15
+    environment:
+      POSTGRES_PASSWORD: password
+      POSTGRES_DB: promscale
+      POSTGRES_USER: postgres
+    ports:
+      - "5432:5432"
+    volumes:
+      - timescale_data:/var/lib/postgresql/data
+
+  promscale:
+    image: timescale/promscale:latest
+    ports:
+      - "9201:9201"
+    depends_on:
+      - timescaledb
+    environment:
+      PROMSCALE_DB_URI: postgres://postgres:password@timescaledb:5432/promscale?sslmode=disable
+      PROMSCALE_STARTUP_INSTALL_EXTENSIONS: "true"
+    restart: on-failure
+
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:latest
+    container_name: otel-collector
+    command: ["--config=/etc/otel-collector-config.yml"]
+    volumes:
+      - ./otel-collector-config.yml:/etc/otel-collector-config.yml
+
+    ports:
+      - "4317:4317"
+
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+      - ./rules:/etc/prometheus/rules
+    ports:
+      - "9090:9090" # Interfaccia Web di Prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+    depends_on:
+      - otel-collector
+
+  grafana:
+    image: grafana/grafana:latest
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_SECURITY_ADMIN_USER=admin
+    volumes:
+      - grafana-storage:/var/lib/grafana
+    depends_on:
+      - prometheus
+
+volumes:
+  grafana-storage:
+  timescale_data:
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -0,0 +1,52 @@
+services:
+
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:latest
+    container_name: otel-collector
+    command: ["--config=/etc/otel-collector-config.yml"]
+    volumes:
+      - ./otel-collector-config.yml:/etc/otel-collector-config.yml
+    ports:
+      - "4317:4317"   # gRPC
+      - "4318:4318"   # HTTP
+      - "8889:8889"   # Prometheus metrics endpoint
+    networks:
+      - observ-net
+    
+
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+      - ./rules:/etc/prometheus/rules
+    ports:
+      - "9090:9090" # Interfaccia Web di Prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+    depends_on:
+      - otel-collector
+    networks:
+      - observ-net
+
+  grafana:
+    image: grafana/grafana:latest
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_SECURITY_ADMIN_USER=admin
+    volumes:
+      - grafana-storage:/var/lib/grafana
+      - ./grafana/provisioning:/etc/grafana/provisioning
+    depends_on:
+      - prometheus
+    networks:
+      - observ-net
+
+volumes:
+  grafana-storage:
+
+networks:
+  observ-net:
+    driver: bridge
--- a/docker-compose/grafana/provisioning/dashboards/bottom-overview.json
+++ b/docker-compose/grafana/provisioning/dashboards/bottom-overview.json
@@ -0,0 +1,278 @@
+{
+  "title": "Bottom System Overview",
+  "uid": "bottom-overview",
+  "timezone": "browser",
+  "schemaVersion": 16,
+  "refresh": "5s",
+  "editable": true,
+  "panels": [
+      {
+        "id": 1,
+        "title": "CPU Usage by Core",
+        "type": "timeseries",
+        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
+        "targets": [
+          {
+            "expr": "system_cpu_usage_percent",
+            "legendFormat": "Core {{cpu_id}}",
+            "refId": "CPU"
+          }
+        ],
+        "fieldConfig": {
+          "defaults": {
+            "unit": "percent",
+            "min": 0,
+            "max": 100
+          }
+        }
+      },
+      {
+        "id": 2,
+        "title": "Memory Usage",
+        "type": "timeseries",
+        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
+        "targets": [
+          {
+            "expr": "system_memory_usage_bytes",
+            "legendFormat": "RAM Used",
+            "refId": "RAM"
+          },
+          {
+            "expr": "system_memory_total_bytes",
+            "legendFormat": "RAM Total",
+            "refId": "RAM_Total"
+          },
+          {
+            "expr": "system_swap_usage_bytes",
+            "legendFormat": "Swap Used",
+            "refId": "Swap"
+          },
+          {
+            "expr": "system_swap_total_bytes",
+            "legendFormat": "Swap Total",
+            "refId": "Swap_Total"
+          }
+        ],
+        "fieldConfig": {
+          "defaults": {
+            "unit": "bytes"
+          }
+        }
+      },
+      {
+        "id": 3,
+        "title": "Network Traffic",
+        "type": "timeseries",
+        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
+        "targets": [
+          {
+            "expr": "system_network_rx_bytes_rate",
+            "legendFormat": "RX - {{interface}}",
+            "refId": "RX"
+          },
+          {
+            "expr": "system_network_tx_bytes_rate",
+            "legendFormat": "TX - {{interface}}",
+            "refId": "TX"
+          }
+        ],
+        "fieldConfig": {
+          "defaults": {
+            "unit": "Bps"
+          }
+        }
+      },
+      {
+        "id": 4,
+        "title": "Disk Usage",
+        "type": "gauge",
+        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
+        "targets": [
+          {
+            "expr": "(system_disk_usage_bytes / system_disk_total_bytes) * 100",
+            "legendFormat": "{{mount}} ({{device}})",
+            "refId": "Disk"
+          }
+        ],
+        "fieldConfig": {
+          "defaults": {
+            "unit": "percent",
+            "min": 0,
+            "max": 100,
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {"value": 0, "color": "green"},
+                {"value": 70, "color": "yellow"},
+                {"value": 90, "color": "red"}
+              ]
+            }
+          }
+        }
+      },
+      {
+        "id": 5,
+        "title": "Top 10 Processes by CPU",
+        "type": "table",
+        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
+        "targets": [
+          {
+            "expr": "topk(10, system_process_cpu_usage_percent and (time() - timestamp(system_process_cpu_usage_percent) < 30))",
+            "format": "table",
+            "instant": true,
+            "refId": "Process"
+          }
+        ],
+        "transformations": [
+          {
+            "id": "organize",
+            "options": {
+              "excludeByName": {
+                "Time": true,
+                "__name__": true,
+                "job": true,
+                "instance": true,
+                "exported_job": true,
+                "otel_scope_name": true
+              },
+              "indexByName": {
+                "name": 0,
+                "pid": 1,
+                "Value": 2
+              },
+              "renameByName": {
+                "name": "Process Name",
+                "pid": "PID",
+                "Value": "CPU %"
+              }
+            }
+          }
+        ],
+        "options": {
+          "showHeader": true,
+          "sortBy": [
+            {
+              "displayName": "CPU %",
+              "desc": true
+            }
+          ]
+        },
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "align": "auto",
+              "displayMode": "auto"
+            }
+          },
+          "overrides": [
+            {
+              "matcher": {"id": "byName", "options": "CPU %"},
+              "properties": [
+                {
+                  "id": "unit",
+                  "value": "percent"
+                },
+                {
+                  "id": "custom.displayMode",
+                  "value": "color-background"
+                },
+                {
+                  "id": "thresholds",
+                  "value": {
+                    "mode": "absolute",
+                    "steps": [
+                      {"value": 0, "color": "green"},
+                      {"value": 50, "color": "yellow"},
+                      {"value": 80, "color": "red"}
+                    ]
+                  }
+                }
+              ]
+            }
+          ]
+        }
+      },
+      {
+        "id": 6,
+        "title": "Top 10 Processes by Memory",
+        "type": "table",
+        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
+        "targets": [
+          {
+            "expr": "topk(10, system_process_memory_usage_bytes and (time() - timestamp(system_process_memory_usage_bytes) < 30))",
+            "format": "table",
+            "instant": true,
+            "refId": "Process"
+          }
+        ],
+        "transformations": [
+          {
+            "id": "organize",
+            "options": {
+              "excludeByName": {
+                "Time": true,
+                "__name__": true,
+                "job": true,
+                "instance": true,
+                "exported_job": true,
+                "otel_scope_name": true
+              },
+              "indexByName": {
+                "name": 0,
+                "pid": 1,
+                "Value": 2
+              },
+              "renameByName": {
+                "name": "Process Name",
+                "pid": "PID",
+                "Value": "Memory"
+              }
+            }
+          }
+        ],
+        "options": {
+          "showHeader": true,
+          "sortBy": [
+            {
+              "displayName": "Memory",
+              "desc": true
+            }
+          ]
+        },
+        "fieldConfig": {
+          "defaults": {
+            "custom": {
+              "align": "auto",
+              "displayMode": "auto"
+            }
+          },
+          "overrides": [
+            {
+              "matcher": {"id": "byName", "options": "Memory"},
+              "properties": [
+                {
+                  "id": "unit",
+                  "value": "bytes"
+                },
+                {
+                  "id": "custom.displayMode",
+                  "value": "color-background"
+                },
+                {
+                  "id": "thresholds",
+                  "value": {
+                    "mode": "absolute",
+                    "steps": [
+                      {"value": 0, "color": "green"},
+                      {"value": 1073741824, "color": "yellow"},
+                      {"value": 2147483648, "color": "red"}
+                    ]
+                  }
+                }
+              ]
+            }
+          ]
+        }
+      }
+    ]
+}
--- a/docker-compose/grafana/provisioning/dashboards/dashboards.yml
+++ b/docker-compose/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+  - name: 'Bottom Dashboards'
+    orgId: 1
+    folder: ''
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/provisioning/dashboards
--- a/docker-compose/grafana/provisioning/datasources/prometheus.yml
+++ b/docker-compose/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: true
+    jsonData:
+      timeInterval: 10s
+      queryTimeout: 60s
--- a/docker-compose/otel-collector-config.yml
+++ b/docker-compose/otel-collector-config.yml
@@ -0,0 +1,31 @@
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: 0.0.0.0:4317
+      http:
+        endpoint: 0.0.0.0:4318
+
+processors:
+  batch: 
+    send_batch_size: 10000
+    timeout: 10s
+  metricsgeneration: {} 
+
+exporters:
+  prometheus:
+    endpoint: "0.0.0.0:8889"
+  debug:
+    verbosity: detailed
+
+service:
+  pipelines:
+    metrics:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [prometheus, debug]
+
+    logs:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [debug] 
--- a/docker-compose/processes-example.toml
+++ b/docker-compose/processes-example.toml
@@ -0,0 +1,67 @@
+# Example process filter configuration file
+# This file can be included from the main bottom config to keep
+# server-specific process lists separate.
+#
+# Usage in bottom-config.toml:
+# [opentelemetry.metrics.process_filter]
+# include = "processes.toml"
+
+# Filter mode: "whitelist" or "blacklist"
+# - whitelist: Only export metrics for processes in the lists below
+# - blacklist: Export metrics for all processes EXCEPT those in the lists
+filter_mode = "whitelist"
+
+# Process names to monitor (case-insensitive substring match)
+# Examples for common server processes:
+names = [
+    # Web servers
+    "nginx",
+    "apache",
+    "httpd",
+
+    # Databases
+    "postgres",
+    "mysql",
+    "redis",
+    "mongodb",
+
+    # Application servers
+    "java",
+    "node",
+    "python",
+
+    # Your custom applications
+    # "myapp",
+]
+
+# Regex patterns to match process names (case-sensitive)
+# More powerful than simple substring matching
+patterns = [
+    # Match specific versions
+    # "^nginx-[0-9.]+$",
+    # "^node-v[0-9]+",
+
+    # Match Java applications with specific main class
+    # "java.*MyApplication",
+
+    # Match processes with specific format
+    # "^gunicorn: worker",
+
+    # Match kernel threads (for blacklist)
+    # "^\\[.*\\]$",
+]
+
+# Specific process PIDs to monitor (optional)
+# Useful for monitoring specific long-running processes
+pids = []
+
+# Example blacklist configuration:
+# filter_mode = "blacklist"
+# names = [
+#     "systemd",      # Exclude system processes
+#     "kworker",
+#     "migration",
+# ]
+# patterns = [
+#     "^\\[.*\\]$",   # Exclude all kernel threads
+# ]
--- a/docker-compose/prometheus.yml
+++ b/docker-compose/prometheus.yml
@@ -0,0 +1,21 @@
+global:
+  scrape_interval: 10s # Quanto spesso fare lo scraping
+  evaluation_interval: 10s
+
+rule_files:
+  - /etc/prometheus/rules/*.yml
+
+scrape_configs:
+  # Job 1: Monitora se Prometheus stesso è attivo
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  # Job 2: Scrape dell'OpenTelemetry Collector
+  - job_name: 'otel-collector'
+    # Il Collector espone le metriche per lo scraping sulla sua porta 8889
+    metrics_path: '/metrics'
+    static_configs:
+      # Raggiunge il Collector usando il suo nome di servizio Docker
+      - targets: ['otel-collector:8889'] 
+
--- a/docker-compose/rules/bottom_rules.yml
+++ b/docker-compose/rules/bottom_rules.yml
@@ -0,0 +1,15 @@
+groups:
+  - name: bottom_process_metrics
+    interval: 30s
+    rules:
+      - record: system_process_cpu_usage_percent:recent
+        expr: |
+          system_process_cpu_usage_percent
+            and on(pid, name)
+            (time() - timestamp(system_process_cpu_usage_percent) < 120)
+      
+      - record: system_process_memory_usage_bytes:recent
+        expr: |
+          system_process_memory_usage_bytes
+            and on(pid, name)
+            (time() - timestamp(system_process_memory_usage_bytes) < 120)
--- a/docker-compose/symon-config-example.toml
+++ b/docker-compose/symon-config-example.toml
@@ -0,0 +1,61 @@
+# Example Symon configuration file for OpenTelemetry export
+# Copy this file and customize it for your needs
+
+# Collection interval in seconds
+collection_interval_secs = 5
+
+# OTLP configuration
+[otlp]
+# OTLP endpoint (gRPC)
+# For local docker-compose setup: http://localhost:4317
+# For remote collector: http://your-collector-host:4317
+endpoint = "http://localhost:4317"
+
+# Export interval in seconds
+export_interval_secs = 10
+
+# Service name that will appear in metrics
+service_name = "symon"
+
+# Service version
+service_version = "0.1.0"
+
+# Export timeout in seconds
+export_timeout_secs = 30
+
+# Additional resource attributes (key-value pairs)
+[otlp.resource_attributes]
+environment = "production"
+host = "server-01"
+
+# Metrics configuration - enable/disable specific metric types
+[metrics]
+cpu = true           # CPU usage per core and average
+memory = true        # RAM, swap usage
+network = true       # Network RX/TX
+disk = true          # Disk usage
+temperature = true   # CPU/GPU temperatures
+processes = true     # Top 10 processes by CPU/Memory
+
+# Process filtering configuration
+[metrics.process_filter]
+# Option 1: Use an external file for server-specific process lists
+# This allows different servers to monitor different processes
+# Path can be relative to this config file or absolute
+#include = "processes.toml"
+
+# Option 2: Configure inline
+# Filter mode: "whitelist" (only listed processes) or "blacklist" (exclude listed)
+filter_mode = "whitelist"
+
+# List of process names to filter (case-insensitive substring match)
+# Examples: ["nginx", "postgres", "redis", "myapp"]
+names = ["nginx", "postgres", "redis"]
+
+# List of regex patterns to match process names (case-sensitive)
+# More powerful than substring matching
+# Examples: ["^nginx-[0-9.]+$", "java.*MyApp", "^gunicorn: worker"]
+patterns = []
+
+# List of specific process PIDs to filter
+pids = []
--- a/docker-compose/test-stack.sh
+++ b/docker-compose/test-stack.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# Test script to verify the observability stack is running correctly
+
+set -e
+
+echo "🔍 Testing Bottom OpenTelemetry Stack..."
+echo ""
+
+# Colors
+GREEN='\033[0;32m'
+RED='\033[0;31m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Test OTEL Collector gRPC endpoint
+echo -n "Testing OTEL Collector gRPC (port 4317)... "
+if nc -zv localhost 4317 2>&1 | grep -q "succeeded\|open"; then
+    echo -e "${GREEN}✓ OK${NC}"
+else
+    echo -e "${RED}✗ FAILED${NC}"
+    exit 1
+fi
+
+# Test OTEL Collector HTTP endpoint
+echo -n "Testing OTEL Collector HTTP (port 4318)... "
+if nc -zv localhost 4318 2>&1 | grep -q "succeeded\|open"; then
+    echo -e "${GREEN}✓ OK${NC}"
+else
+    echo -e "${RED}✗ FAILED${NC}"
+    exit 1
+fi
+
+# Test OTEL Collector metrics endpoint
+echo -n "Testing OTEL Collector metrics (port 8889)... "
+if curl -s http://localhost:8889/metrics > /dev/null; then
+    echo -e "${GREEN}✓ OK${NC}"
+else
+    echo -e "${RED}✗ FAILED${NC}"
+    exit 1
+fi
+
+# Test Prometheus
+echo -n "Testing Prometheus (port 9090)... "
+if curl -s http://localhost:9090/-/healthy | grep -q "Prometheus"; then
+    echo -e "${GREEN}✓ OK${NC}"
+else
+    echo -e "${RED}✗ FAILED${NC}"
+    exit 1
+fi
+
+# Test Prometheus targets
+echo -n "Testing Prometheus targets... "
+TARGETS=$(curl -s http://localhost:9090/api/v1/targets | grep -o '"health":"up"' | wc -l)
+if [ "$TARGETS" -gt 0 ]; then
+    echo -e "${GREEN}✓ OK${NC} (${TARGETS} targets up)"
+else
+    echo -e "${YELLOW}⚠ WARNING${NC} (no targets up yet - this is normal if just started)"
+fi
+
+# Test Grafana
+echo -n "Testing Grafana (port 3000)... "
+if curl -s http://localhost:3000/api/health | grep -q "ok"; then
+    echo -e "${GREEN}✓ OK${NC}"
+else
+    echo -e "${RED}✗ FAILED${NC}"
+    exit 1
+fi
+
+echo ""
+echo -e "${GREEN}✓ All tests passed!${NC}"
+echo ""
+echo "📊 Access points:"
+echo "   - Prometheus: http://localhost:9090"
+echo "   - Grafana: http://localhost:3000 (admin/admin)"
+echo "   - OTEL Collector metrics: http://localhost:8889/metrics"
+echo ""
+echo "💡 Next steps:"
+echo "   1. Build bottom with: cargo build --release --features opentelemetry"
+echo "   2. Run in headless mode: ./target/release/btm --headless"
+echo "   3. Check metrics in Prometheus: http://localhost:9090/graph"
--- a/src/collector.rs
+++ b/src/collector.rs
@@ -0,0 +1,214 @@
+use crate::config::MetricsConfig;
+use anyhow::Result;
+use sysinfo::{CpuRefreshKind, Disks, Networks, RefreshKind, System};
+
+/// System metrics collected at a point in time
+#[derive(Debug, Clone)]
+pub struct SystemMetrics {
+    pub cpu: Option<Vec<CpuMetric>>,
+    pub memory: Option<MemoryMetric>,
+    pub network: Option<Vec<NetworkMetric>>,
+    pub disk: Option<Vec<DiskMetric>>,
+    pub processes: Option<Vec<ProcessMetric>>,
+    pub temperature: Option<Vec<TemperatureMetric>>,
+}
+
+#[derive(Debug, Clone)]
+pub struct CpuMetric {
+    pub core_index: usize,
+    pub usage_percent: f32,
+}
+
+#[derive(Debug, Clone)]
+pub struct MemoryMetric {
+    pub used_bytes: u64,
+    pub total_bytes: u64,
+    pub swap_used_bytes: u64,
+    pub swap_total_bytes: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct NetworkMetric {
+    pub interface_name: String,
+    pub rx_bytes_total: u64,
+    pub tx_bytes_total: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct DiskMetric {
+    pub device_name: String,
+    pub mount_point: String,
+    pub used_bytes: u64,
+    pub total_bytes: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct ProcessMetric {
+    pub pid: u32,
+    pub name: String,
+    pub cpu_usage_percent: f32,
+    pub memory_bytes: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct TemperatureMetric {
+    pub sensor_name: String,
+    pub temperature_celsius: f32,
+}
+
+/// Collector for system metrics
+pub struct MetricsCollector {
+    system: System,
+    networks: Networks,
+    disks: Disks,
+    config: MetricsConfig,
+}
+
+impl MetricsCollector {
+    pub fn new(config: MetricsConfig) -> Self {
+        let refresh_kind = RefreshKind::new()
+            .with_cpu(CpuRefreshKind::everything())
+            .with_memory(sysinfo::MemoryRefreshKind::everything())
+            .with_processes(sysinfo::ProcessRefreshKind::everything());
+
+        Self {
+            system: System::new_with_specifics(refresh_kind),
+            networks: Networks::new_with_refreshed_list(),
+            disks: Disks::new_with_refreshed_list(),
+            config,
+        }
+    }
+
+    /// Collect all enabled metrics
+    pub fn collect(&mut self) -> Result<SystemMetrics> {
+        // Refresh system info
+        self.system.refresh_all();
+        self.networks.refresh();
+
+        Ok(SystemMetrics {
+            cpu: if self.config.cpu {
+                Some(self.collect_cpu())
+            } else {
+                None
+            },
+            memory: if self.config.memory {
+                Some(self.collect_memory())
+            } else {
+                None
+            },
+            network: if self.config.network {
+                Some(self.collect_network())
+            } else {
+                None
+            },
+            disk: if self.config.disk {
+                Some(self.collect_disk())
+            } else {
+                None
+            },
+            processes: if self.config.processes {
+                Some(self.collect_processes())
+            } else {
+                None
+            },
+            temperature: if self.config.temperature {
+                Some(self.collect_temperature())
+            } else {
+                None
+            },
+        })
+    }
+
+    fn collect_cpu(&self) -> Vec<CpuMetric> {
+        self.system
+            .cpus()
+            .iter()
+            .enumerate()
+            .map(|(index, cpu)| CpuMetric {
+                core_index: index,
+                usage_percent: cpu.cpu_usage(),
+            })
+            .collect()
+    }
+
+    fn collect_memory(&self) -> MemoryMetric {
+        MemoryMetric {
+            used_bytes: self.system.used_memory(),
+            total_bytes: self.system.total_memory(),
+            swap_used_bytes: self.system.used_swap(),
+            swap_total_bytes: self.system.total_swap(),
+        }
+    }
+
+    fn collect_network(&self) -> Vec<NetworkMetric> {
+        self.networks
+            .iter()
+            .map(|(interface_name, data)| NetworkMetric {
+                interface_name: interface_name.to_string(),
+                rx_bytes_total: data.total_received(),
+                tx_bytes_total: data.total_transmitted(),
+            })
+            .collect()
+    }
+
+    fn collect_disk(&self) -> Vec<DiskMetric> {
+        self.disks
+            .iter()
+            .filter_map(|disk| {
+                let total_bytes = disk.total_space();
+                let available_bytes = disk.available_space();
+                let used_bytes = total_bytes.saturating_sub(available_bytes);
+
+                Some(DiskMetric {
+                    device_name: disk.name().to_string_lossy().to_string(),
+                    mount_point: disk.mount_point().to_string_lossy().to_string(),
+                    used_bytes,
+                    total_bytes,
+                })
+            })
+            .collect()
+    }
+
+    fn collect_processes(&self) -> Vec<ProcessMetric> {
+        let filter = self.config.process_filter.as_ref();
+
+        let mut processes: Vec<ProcessMetric> = self
+            .system
+            .processes()
+            .iter()
+            .filter(|(_, process)| {
+                if let Some(filter_config) = filter {
+                    filter_config.should_include_process(
+                        process.name().to_string_lossy().as_ref(),
+                        process.pid().as_u32(),
+                    )
+                } else {
+                    true
+                }
+            })
+            .map(|(_, process)| ProcessMetric {
+                pid: process.pid().as_u32(),
+                name: process.name().to_string_lossy().to_string(),
+                cpu_usage_percent: process.cpu_usage(),
+                memory_bytes: process.memory(),
+            })
+            .collect();
+
+        // Sort by CPU usage and limit to top 10
+        processes.sort_by(|a, b| {
+            b.cpu_usage_percent
+                .partial_cmp(&a.cpu_usage_percent)
+                .unwrap_or(std::cmp::Ordering::Equal)
+        });
+        processes.truncate(10);
+
+        processes
+    }
+
+    fn collect_temperature(&self) -> Vec<TemperatureMetric> {
+        // sysinfo doesn't have direct temperature support in 0.31
+        // This would require platform-specific implementation or additional crates
+        // For now, return empty vector
+        vec![]
+    }
+}
--- a/src/config.rs
+++ b/src/config.rs
@@ -0,0 +1,347 @@
+use anyhow::{Context, Result};
+use regex::Regex;
+use serde::{Deserialize, Serialize};
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    /// OpenTelemetry export configuration
+    #[serde(default)]
+    pub otlp: OtlpConfig,
+
+    /// Metrics collection configuration
+    #[serde(default)]
+    pub metrics: MetricsConfig,
+
+    /// Collection interval
+    #[serde(default = "default_collection_interval")]
+    pub collection_interval_secs: u64,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            otlp: OtlpConfig::default(),
+            metrics: MetricsConfig::default(),
+            collection_interval_secs: default_collection_interval(),
+        }
+    }
+}
+
+impl Config {
+    /// Load configuration from file
+    pub fn from_file(path: &Path) -> Result<Self> {
+        let content = std::fs::read_to_string(path)
+            .with_context(|| format!("Failed to read config file: {}", path.display()))?;
+
+        let mut config: Config = toml::from_str(&content)
+            .with_context(|| format!("Failed to parse config file: {}", path.display()))?;
+
+        // Load process filter includes if configured
+        if let Some(process_filter) = &config.metrics.process_filter {
+            let config_dir = path.parent();
+            match process_filter.load_with_includes(config_dir) {
+                Ok(loaded_filter) => {
+                    config.metrics.process_filter = Some(loaded_filter);
+                }
+                Err(e) => {
+                    tracing::warn!("Failed to load process filter include: {}", e);
+                }
+            }
+        }
+
+        config.validate()?;
+        Ok(config)
+    }
+
+    /// Validate configuration
+    pub fn validate(&self) -> Result<()> {
+        if self.collection_interval_secs == 0 {
+            anyhow::bail!("Collection interval must be greater than 0");
+        }
+
+        self.otlp.validate()?;
+        Ok(())
+    }
+
+    pub fn collection_interval(&self) -> Duration {
+        Duration::from_secs(self.collection_interval_secs)
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OtlpConfig {
+    /// OTLP endpoint (e.g., "http://localhost:4317")
+    #[serde(default = "default_endpoint")]
+    pub endpoint: String,
+
+    /// Export interval in seconds
+    #[serde(default = "default_export_interval")]
+    pub export_interval_secs: u64,
+
+    /// Service name for the metrics
+    #[serde(default = "default_service_name")]
+    pub service_name: String,
+
+    /// Service version
+    #[serde(default = "default_service_version")]
+    pub service_version: String,
+
+    /// Additional resource attributes
+    #[serde(default)]
+    pub resource_attributes: std::collections::HashMap<String, String>,
+
+    /// Timeout for export operations in seconds
+    #[serde(default = "default_timeout")]
+    pub export_timeout_secs: u64,
+}
+
+impl Default for OtlpConfig {
+    fn default() -> Self {
+        Self {
+            endpoint: default_endpoint(),
+            export_interval_secs: default_export_interval(),
+            service_name: default_service_name(),
+            service_version: default_service_version(),
+            resource_attributes: std::collections::HashMap::new(),
+            export_timeout_secs: default_timeout(),
+        }
+    }
+}
+
+impl OtlpConfig {
+    pub fn export_interval(&self) -> Duration {
+        Duration::from_secs(self.export_interval_secs)
+    }
+
+    pub fn export_timeout(&self) -> Duration {
+        Duration::from_secs(self.export_timeout_secs)
+    }
+
+    pub fn validate(&self) -> Result<()> {
+        if self.endpoint.is_empty() {
+            anyhow::bail!("OTLP endpoint cannot be empty");
+        }
+
+        if !self.endpoint.starts_with("http://") && !self.endpoint.starts_with("https://") {
+            anyhow::bail!("OTLP endpoint must be a valid HTTP/HTTPS URL");
+        }
+
+        if self.export_interval_secs == 0 {
+            anyhow::bail!("Export interval must be greater than 0");
+        }
+
+        if self.service_name.is_empty() {
+            anyhow::bail!("Service name cannot be empty");
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MetricsConfig {
+    /// Export CPU metrics
+    #[serde(default = "default_true")]
+    pub cpu: bool,
+
+    /// Export memory metrics
+    #[serde(default = "default_true")]
+    pub memory: bool,
+
+    /// Export network metrics
+    #[serde(default = "default_true")]
+    pub network: bool,
+
+    /// Export disk metrics
+    #[serde(default = "default_true")]
+    pub disk: bool,
+
+    /// Export process metrics
+    #[serde(default)]
+    pub processes: bool,
+
+    /// Export temperature metrics
+    #[serde(default = "default_true")]
+    pub temperature: bool,
+
+    /// Process filter configuration
+    #[serde(default)]
+    pub process_filter: Option<ProcessFilterConfig>,
+}
+
+impl Default for MetricsConfig {
+    fn default() -> Self {
+        Self {
+            cpu: true,
+            memory: true,
+            network: true,
+            disk: true,
+            processes: false,
+            temperature: true,
+            process_filter: None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProcessFilterConfig {
+    /// Path to external file containing process filter (optional)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<PathBuf>,
+
+    /// Filter mode: "whitelist" or "blacklist"
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub filter_mode: Option<ProcessFilterMode>,
+
+    /// List of process names to filter (case-insensitive substring match)
+    #[serde(default)]
+    pub names: Vec<String>,
+
+    /// List of regex patterns to match process names
+    #[serde(default)]
+    pub patterns: Vec<String>,
+
+    /// List of process PIDs to filter
+    #[serde(default)]
+    pub pids: Vec<u32>,
+
+    /// Compiled regex patterns (not serialized, built at runtime)
+    #[serde(skip)]
+    compiled_patterns: Option<Vec<Regex>>,
+}
+
+impl ProcessFilterConfig {
+    /// Load and merge process filter from include file if specified
+    pub fn load_with_includes(& self, config_dir: Option<&Path>) -> Result<Self> {
+        if let Some(include_path) = &self.include {
+            // Resolve path relative to config directory if provided
+            let full_path = if include_path.is_absolute() {
+                include_path.clone()
+            } else if let Some(dir) = config_dir {
+                dir.join(include_path)
+            } else {
+                include_path.clone()
+            };
+
+            // Read and parse the included file
+            let content = std::fs::read_to_string(&full_path)
+                .with_context(|| format!("Failed to read process filter file: {}", full_path.display()))?;
+
+            let included: ProcessFilterConfig = toml::from_str(&content)
+                .with_context(|| format!("Failed to parse process filter file: {}", full_path.display()))?;
+
+            // Merge: included file takes precedence
+            let mut merged = Self {
+                include: None,
+                filter_mode: included.filter_mode.or(self.filter_mode),
+                names: if included.names.is_empty() {
+                    self.names.clone()
+                } else {
+                    included.names
+                },
+                patterns: if included.patterns.is_empty() {
+                    self.patterns.clone()
+                } else {
+                    included.patterns
+                },
+                pids: if included.pids.is_empty() {
+                    self.pids.clone()
+                } else {
+                    included.pids
+                },
+                compiled_patterns: None,
+            };
+
+            merged.compile_patterns()?;
+            Ok(merged)
+        } else {
+            let mut result = self.clone();
+            result.compile_patterns()?;
+            Ok(result)
+        }
+    }
+
+    /// Compile regex patterns from strings
+    fn compile_patterns(&mut self) -> Result<()> {
+        if self.patterns.is_empty() {
+            self.compiled_patterns = None;
+            return Ok(());
+        }
+
+        let mut compiled = Vec::new();
+        for pattern in &self.patterns {
+            let regex = Regex::new(pattern)
+                .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
+            compiled.push(regex);
+        }
+
+        self.compiled_patterns = Some(compiled);
+        Ok(())
+    }
+
+    /// Check if a process should be included based on filter configuration
+    pub fn should_include_process(&self, process_name: &str, process_pid: u32) -> bool {
+        let filter_mode = match &self.filter_mode {
+            Some(mode) => mode,
+            None => return true,
+        };
+
+        // Check if process matches the filter lists
+        let matches_name = self
+            .names
+            .iter()
+            .any(|name| process_name.to_lowercase().contains(&name.to_lowercase()));
+
+        let matches_pattern = if let Some(patterns) = &self.compiled_patterns {
+            patterns.iter().any(|regex| regex.is_match(process_name))
+        } else {
+            false
+        };
+
+        let matches_pid = self.pids.contains(&process_pid);
+        let matches = matches_name || matches_pattern || matches_pid;
+
+        match filter_mode {
+            ProcessFilterMode::Whitelist => matches,
+            ProcessFilterMode::Blacklist => !matches,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ProcessFilterMode {
+    Whitelist,
+    Blacklist,
+}
+
+// Default functions
+fn default_endpoint() -> String {
+    "http://localhost:4317".to_string()
+}
+
+fn default_export_interval() -> u64 {
+    10
+}
+
+fn default_collection_interval() -> u64 {
+    5
+}
+
+fn default_service_name() -> String {
+    "symon".to_string()
+}
+
+fn default_service_version() -> String {
+    env!("CARGO_PKG_VERSION").to_string()
+}
+
+fn default_timeout() -> u64 {
+    30
+}
+
+fn default_true() -> bool {
+    true
+}
--- a/src/exporter.rs
+++ b/src/exporter.rs
@@ -0,0 +1,196 @@
+use crate::collector::SystemMetrics;
+use crate::config::OtlpConfig;
+use anyhow::{Context, Result};
+use opentelemetry::metrics::MeterProvider;
+use opentelemetry::KeyValue;
+use opentelemetry_otlp::WithExportConfig;
+use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider};
+use opentelemetry_sdk::Resource;
+
+pub struct MetricsExporter {
+    meter_provider: SdkMeterProvider,
+    gauges: MetricInstruments,
+}
+
+struct MetricInstruments {
+    cpu_usage: opentelemetry::metrics::Gauge<f64>,
+    memory_usage: opentelemetry::metrics::Gauge<u64>,
+    memory_total: opentelemetry::metrics::Gauge<u64>,
+    swap_usage: opentelemetry::metrics::Gauge<u64>,
+    swap_total: opentelemetry::metrics::Gauge<u64>,
+    network_rx: opentelemetry::metrics::Counter<u64>,
+    network_tx: opentelemetry::metrics::Counter<u64>,
+    disk_usage: opentelemetry::metrics::Gauge<u64>,
+    disk_total: opentelemetry::metrics::Gauge<u64>,
+    process_cpu: opentelemetry::metrics::Gauge<f64>,
+    process_memory: opentelemetry::metrics::Gauge<u64>,
+    temperature: opentelemetry::metrics::Gauge<f64>,
+}
+
+impl MetricsExporter {
+    pub async fn new(config: &OtlpConfig) -> Result<Self> {
+        // Build resource with service information
+        let mut resource_kvs = vec![
+            KeyValue::new("service.name", config.service_name.clone()),
+            KeyValue::new("service.version", config.service_version.clone()),
+        ];
+
+        // Add custom resource attributes
+        for (key, value) in &config.resource_attributes {
+            resource_kvs.push(KeyValue::new(key.clone(), value.clone()));
+        }
+
+        let resource = Resource::new(resource_kvs);
+
+        // Build OTLP exporter using new pipeline API
+        let exporter = opentelemetry_otlp::new_exporter()
+            .tonic()
+            .with_endpoint(&config.endpoint)
+            .with_timeout(config.export_timeout())
+            .build_metrics_exporter(
+                Box::new(opentelemetry_sdk::metrics::reader::DefaultTemporalitySelector::default())
+            )
+            .context("Failed to build OTLP metrics exporter")?;
+
+        // Build meter provider
+        let reader = PeriodicReader::builder(exporter, opentelemetry_sdk::runtime::Tokio)
+            .with_interval(config.export_interval())
+            .build();
+
+        let meter_provider = SdkMeterProvider::builder()
+            .with_reader(reader)
+            .with_resource(resource)
+            .build();
+
+        // Create meter and instruments
+        let meter = meter_provider.meter("symon");
+
+        let gauges = MetricInstruments {
+            cpu_usage: meter
+                .f64_gauge("system_cpu_usage_percent")
+                .with_description("CPU usage percentage per core")
+                .init(),
+            memory_usage: meter
+                .u64_gauge("system_memory_usage_bytes")
+                .with_description("Memory usage in bytes")
+                .init(),
+            memory_total: meter
+                .u64_gauge("system_memory_total_bytes")
+                .with_description("Total memory in bytes")
+                .init(),
+            swap_usage: meter
+                .u64_gauge("system_swap_usage_bytes")
+                .with_description("Swap usage in bytes")
+                .init(),
+            swap_total: meter
+                .u64_gauge("system_swap_total_bytes")
+                .with_description("Total swap in bytes")
+                .init(),
+            network_rx: meter
+                .u64_counter("system_network_rx_bytes_total")
+                .with_description("Total bytes received")
+                .init(),
+            network_tx: meter
+                .u64_counter("system_network_tx_bytes_total")
+                .with_description("Total bytes transmitted")
+                .init(),
+            disk_usage: meter
+                .u64_gauge("system_disk_usage_bytes")
+                .with_description("Disk usage in bytes")
+                .init(),
+            disk_total: meter
+                .u64_gauge("system_disk_total_bytes")
+                .with_description("Total disk space in bytes")
+                .init(),
+            process_cpu: meter
+                .f64_gauge("system_process_cpu_usage_percent")
+                .with_description("Process CPU usage percentage")
+                .init(),
+            process_memory: meter
+                .u64_gauge("system_process_memory_usage_bytes")
+                .with_description("Process memory usage in bytes")
+                .init(),
+            temperature: meter
+                .f64_gauge("system_temperature_celsius")
+                .with_description("Temperature in Celsius")
+                .init(),
+        };
+
+        Ok(Self {
+            meter_provider,
+            gauges,
+        })
+    }
+
+    pub fn export(&self, metrics: &SystemMetrics) {
+        // Export CPU metrics
+        if let Some(cpu_metrics) = &metrics.cpu {
+            for cpu in cpu_metrics {
+                self.gauges.cpu_usage.record(
+                    cpu.usage_percent as f64,
+                    &[KeyValue::new("cpu_id", cpu.core_index as i64)],
+                );
+            }
+        }
+
+        // Export memory metrics
+        if let Some(memory) = &metrics.memory {
+            self.gauges.memory_usage.record(memory.used_bytes, &[]);
+            self.gauges.memory_total.record(memory.total_bytes, &[]);
+            self.gauges.swap_usage.record(memory.swap_used_bytes, &[]);
+            self.gauges.swap_total.record(memory.swap_total_bytes, &[]);
+        }
+
+        // Export network metrics
+        if let Some(network_metrics) = &metrics.network {
+            for net in network_metrics {
+                let attrs = &[KeyValue::new("interface", net.interface_name.clone())];
+                self.gauges.network_rx.add(net.rx_bytes_total, attrs);
+                self.gauges.network_tx.add(net.tx_bytes_total, attrs);
+            }
+        }
+
+        // Export disk metrics
+        if let Some(disk_metrics) = &metrics.disk {
+            for disk in disk_metrics {
+                let attrs = &[
+                    KeyValue::new("device", disk.device_name.clone()),
+                    KeyValue::new("mount", disk.mount_point.clone()),
+                ];
+                self.gauges.disk_usage.record(disk.used_bytes, attrs);
+                self.gauges.disk_total.record(disk.total_bytes, attrs);
+            }
+        }
+
+        // Export process metrics
+        if let Some(process_metrics) = &metrics.processes {
+            for process in process_metrics {
+                let attrs = &[
+                    KeyValue::new("pid", process.pid as i64),
+                    KeyValue::new("name", process.name.clone()),
+                ];
+                self.gauges
+                    .process_cpu
+                    .record(process.cpu_usage_percent as f64, attrs);
+                self.gauges.process_memory.record(process.memory_bytes, attrs);
+            }
+        }
+
+        // Export temperature metrics
+        if let Some(temp_metrics) = &metrics.temperature {
+            for temp in temp_metrics {
+                self.gauges.temperature.record(
+                    temp.temperature_celsius as f64,
+                    &[KeyValue::new("sensor", temp.sensor_name.clone())],
+                );
+            }
+        }
+    }
+
+    pub async fn shutdown(self) -> Result<()> {
+        self.meter_provider
+            .shutdown()
+            .context("Failed to shutdown meter provider")?;
+        Ok(())
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,108 @@
+mod collector;
+mod config;
+mod exporter;
+
+use anyhow::{Context, Result};
+use clap::Parser;
+use collector::MetricsCollector;
+use config::Config;
+use exporter::MetricsExporter;
+use std::path::PathBuf;
+use tokio::signal;
+use tokio::time::interval;
+use tracing::{error, info};
+
+#[derive(Parser, Debug)]
+#[command(name = "symon")]
+#[command(about = "Lightweight system metrics exporter for OpenTelemetry", long_about = None)]
+#[command(version)]
+struct Args {
+    /// Path to configuration file
+    #[arg(short, long, value_name = "FILE")]
+    config: Option<PathBuf>,
+
+    /// Log level (trace, debug, info, warn, error)
+    #[arg(short, long, default_value = "info")]
+    log_level: String,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let args = Args::parse();
+
+    // Initialize tracing
+    let log_level = args.log_level.parse().unwrap_or(tracing::Level::INFO);
+    tracing_subscriber::fmt()
+        .with_max_level(log_level)
+        .with_target(false)
+        .init();
+
+    info!("Starting symon v{}", env!("CARGO_PKG_VERSION"));
+
+    // Load configuration
+    let config_path = args
+        .config
+        .or_else(find_default_config)
+        .context("No configuration file specified and no default config found")?;
+
+    info!("Loading configuration from: {}", config_path.display());
+    let config = Config::from_file(&config_path)?;
+
+    info!(
+        "OTLP endpoint: {}, export interval: {}s, collection interval: {}s",
+        config.otlp.endpoint, config.otlp.export_interval_secs, config.collection_interval_secs
+    );
+
+    // Initialize metrics collector
+    let mut collector = MetricsCollector::new(config.metrics.clone());
+
+    // Initialize OTLP exporter
+    info!("Initializing OTLP exporter...");
+    let exporter = MetricsExporter::new(&config.otlp)
+        .await
+        .context("Failed to initialize OTLP exporter")?;
+
+    info!("Symon initialized successfully");
+    info!("Press Ctrl+C to stop");
+
+    // Main collection loop
+    let mut tick_interval = interval(config.collection_interval());
+    let mut shutdown = Box::pin(signal::ctrl_c());
+
+    loop {
+        tokio::select! {
+            _ = tick_interval.tick() => {
+                match collector.collect() {
+                    Ok(metrics) => {
+                        exporter.export(&metrics);
+                        tracing::debug!("Metrics collected and exported");
+                    }
+                    Err(e) => {
+                        error!("Failed to collect metrics: {}", e);
+                    }
+                }
+            }
+            _ = &mut shutdown => {
+                info!("Shutdown signal received");
+                break;
+            }
+        }
+    }
+
+    // Shutdown gracefully
+    info!("Shutting down...");
+    exporter.shutdown().await?;
+    info!("Symon stopped");
+
+    Ok(())
+}
+
+fn find_default_config() -> Option<PathBuf> {
+    let candidates = vec![
+        PathBuf::from("symon.toml"),
+        PathBuf::from("/etc/symon/symon.toml"),
+        PathBuf::from("config.toml"),
+    ];
+
+    candidates.into_iter().find(|p| p.exists())
+}
--- a/symon.toml
+++ b/symon.toml
@@ -0,0 +1,76 @@
+# Symon Configuration File
+# Lightweight system metrics exporter for OpenTelemetry
+
+# Collection interval in seconds
+# How often to collect system metrics
+collection_interval_secs = 5
+
+# OTLP configuration
+[otlp]
+# OTLP endpoint (gRPC)
+endpoint = "http://localhost:4317"
+
+# Export interval in seconds
+# How often to export metrics to OTLP collector
+export_interval_secs = 10
+
+# Service name that will appear in metrics
+service_name = "symon"
+
+# Service version
+service_version = "0.1.0"
+
+# Export timeout in seconds
+export_timeout_secs = 30
+
+# Additional resource attributes (key-value pairs)
+[otlp.resource_attributes]
+environment = "production"
+# host = "server-01"
+# datacenter = "us-east-1"
+
+# Metrics configuration - enable/disable specific metric types
+[metrics]
+cpu = true           # CPU usage per core
+memory = true        # RAM and swap usage
+network = true       # Network RX/TX
+disk = true          # Disk usage
+processes = false    # Top 10 processes (disabled by default - can generate high cardinality)
+temperature = true   # System temperatures (if available)
+
+# Process filtering configuration
+# Only used when processes = true
+[metrics.process_filter]
+# Option 1: Use an external file for server-specific process lists
+# include = "processes.toml"
+
+# Option 2: Configure inline
+# Filter mode: "whitelist" (only listed processes) or "blacklist" (exclude listed)
+filter_mode = "whitelist"
+
+# List of process names to filter (case-insensitive substring match)
+names = [
+    # Web servers
+    "nginx",
+    "apache",
+
+    # Databases
+    "postgres",
+    "mysql",
+    "redis",
+
+    # Application servers
+    # "java",
+    # "node",
+    # "python",
+]
+
+# List of regex patterns to match process names (case-sensitive)
+patterns = [
+    # Example: Match specific versions
+    # "^nginx-[0-9.]+$",
+    # "^node-v[0-9]+",
+]
+
+# List of specific process PIDs to filter
+pids = []