Add comprehensive validation system and migrate to .env configuration

This commit includes: 1. Database Configuration Migration: - Migrated from DB.txt (Java JDBC) to .env (python-dotenv) - Added .env.example template with clear variable names - Updated database.py to use environment variables - Added python-dotenv>=1.0.0 to dependencies - Updated .gitignore to exclude sensitive files 2. Validation System (1,294 lines): - comparator.py: Statistical comparison with RMSE, correlation, tolerances - db_extractor.py: Database queries for all sensor types - validator.py: High-level validation orchestration - cli.py: Command-line interface for validation - README.md: Comprehensive validation documentation 3. Validation Features: - Compare Python vs MATLAB outputs from database - Support for all sensor types (RSN, Tilt, ATD) - Statistical metrics: max abs/rel diff, RMSE, correlation - Configurable tolerances (abs, rel, max) - Detailed validation reports - CLI and programmatic APIs 4. Examples and Documentation: - validate_example.sh: Bash script example - validate_example.py: Python programmatic example - Updated main README with validation section - Added validation workflow and troubleshooting guide Benefits: - ✅ No Java driver needed (native Python connectors) - ✅ Secure .env configuration (excluded from git) - ✅ Comprehensive validation against MATLAB - ✅ Statistical confidence in migration accuracy - ✅ Automated validation reports 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-13 15:34:13 +02:00
parent 876ef073fc
commit 23c53cf747
25 changed files with 7476 additions and 83 deletions
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+"""
+Main orchestration script for sensor data processing.
+
+This script coordinates the processing of all sensor types:
+- RSN (Rockfall Safety Network)
+- Tilt (Inclinometers/Tiltmeters)
+- ATD (Extensometers and other displacement sensors)
+
+Can process single chains or multiple chains in parallel.
+"""
+
+import sys
+import argparse
+import logging
+from typing import List, Tuple
+from multiprocessing import Pool, cpu_count
+
+from rsn.main import process_rsn_chain
+from tilt.main import process_tilt_chain
+from atd.main import process_atd_chain
+from common.logging_utils import setup_logger
+
+
+def process_chain(control_unit_id: str, chain: str, sensor_type: str = 'auto') -> int:
+    """
+    Process a single chain with automatic or specified sensor type detection.
+    
+    Args:
+        control_unit_id: Control unit identifier
+        chain: Chain identifier
+        sensor_type: Sensor type ('rsn', 'tilt', 'atd', or 'auto' for autodetect)
+        
+    Returns:
+        0 if successful, 1 if error
+    """
+    if sensor_type == 'auto':
+        # Try to detect sensor type from chain configuration
+        # For now, try all modules in order
+        logger = setup_logger(control_unit_id, chain, "Main")
+        logger.info(f"Auto-detecting sensor type for {control_unit_id}/{chain}")
+        
+        # Try RSN first
+        result = process_rsn_chain(control_unit_id, chain)
+        if result == 0:
+            return 0
+        
+        # Try Tilt
+        result = process_tilt_chain(control_unit_id, chain)
+        if result == 0:
+            return 0
+        
+        # Try ATD
+        result = process_atd_chain(control_unit_id, chain)
+        return result
+        
+    elif sensor_type.lower() == 'rsn':
+        return process_rsn_chain(control_unit_id, chain)
+        
+    elif sensor_type.lower() == 'tilt':
+        return process_tilt_chain(control_unit_id, chain)
+        
+    elif sensor_type.lower() == 'atd':
+        return process_atd_chain(control_unit_id, chain)
+        
+    else:
+        print(f"Unknown sensor type: {sensor_type}")
+        return 1
+
+
+def process_chain_wrapper(args: Tuple[str, str, str]) -> Tuple[str, str, int]:
+    """
+    Wrapper for parallel processing.
+    
+    Args:
+        args: Tuple of (control_unit_id, chain, sensor_type)
+        
+    Returns:
+        Tuple of (control_unit_id, chain, exit_code)
+    """
+    control_unit_id, chain, sensor_type = args
+    exit_code = process_chain(control_unit_id, chain, sensor_type)
+    return (control_unit_id, chain, exit_code)
+
+
+def process_multiple_chains(chains: List[Tuple[str, str, str]], 
+                           parallel: bool = False,
+                           max_workers: int = None) -> int:
+    """
+    Process multiple chains sequentially or in parallel.
+    
+    Args:
+        chains: List of tuples (control_unit_id, chain, sensor_type)
+        parallel: If True, process chains in parallel
+        max_workers: Maximum number of parallel workers (default: CPU count)
+        
+    Returns:
+        Number of failed chains
+    """
+    if not parallel:
+        # Sequential processing
+        failures = 0
+        for control_unit_id, chain, sensor_type in chains:
+            print(f"\n{'='*80}")
+            print(f"Processing: {control_unit_id} / {chain} ({sensor_type})")
+            print(f"{'='*80}\n")
+            
+            result = process_chain(control_unit_id, chain, sensor_type)
+            if result != 0:
+                failures += 1
+                print(f"FAILED: {control_unit_id}/{chain}")
+            else:
+                print(f"SUCCESS: {control_unit_id}/{chain}")
+        
+        return failures
+    
+    else:
+        # Parallel processing
+        if max_workers is None:
+            max_workers = min(cpu_count(), len(chains))
+        
+        print(f"Processing {len(chains)} chains in parallel with {max_workers} workers\n")
+        
+        with Pool(processes=max_workers) as pool:
+            results = pool.map(process_chain_wrapper, chains)
+        
+        # Report results
+        failures = 0
+        print(f"\n{'='*80}")
+        print("Processing Summary:")
+        print(f"{'='*80}\n")
+        
+        for control_unit_id, chain, exit_code in results:
+            status = "SUCCESS" if exit_code == 0 else "FAILED"
+            print(f"{status}: {control_unit_id}/{chain}")
+            if exit_code != 0:
+                failures += 1
+        
+        print(f"\nTotal: {len(chains)} chains, {failures} failures")
+        
+        return failures
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description='Process sensor data from database',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Process single chain with auto-detection
+  python -m src.main CU001 A
+  
+  # Process single chain with specific sensor type
+  python -m src.main CU001 A --type rsn
+  
+  # Process multiple chains sequentially
+  python -m src.main CU001 A CU001 B CU002 A
+  
+  # Process multiple chains in parallel
+  python -m src.main CU001 A CU001 B CU002 A --parallel
+  
+  # Process with specific sensor types
+  python -m src.main CU001 A rsn CU001 B tilt CU002 A atd --parallel
+        """
+    )
+    
+    parser.add_argument('args', nargs='+',
+                       help='Control unit ID and chain pairs, optionally with sensor type')
+    parser.add_argument('--type', '-t', default='auto',
+                       choices=['auto', 'rsn', 'tilt', 'atd'],
+                       help='Default sensor type (default: auto)')
+    parser.add_argument('--parallel', '-p', action='store_true',
+                       help='Process multiple chains in parallel')
+    parser.add_argument('--workers', '-w', type=int, default=None,
+                       help='Maximum number of parallel workers (default: CPU count)')
+    
+    args = parser.parse_args()
+    
+    # Parse chain arguments
+    chains = []
+    i = 0
+    while i < len(args.args):
+        if i + 1 < len(args.args):
+            control_unit_id = args.args[i]
+            chain = args.args[i + 1]
+            
+            # Check if next arg is a sensor type
+            if i + 2 < len(args.args) and args.args[i + 2].lower() in ['rsn', 'tilt', 'atd']:
+                sensor_type = args.args[i + 2]
+                i += 3
+            else:
+                sensor_type = args.type
+                i += 2
+            
+            chains.append((control_unit_id, chain, sensor_type))
+        else:
+            print(f"Error: Missing chain for control unit '{args.args[i]}'")
+            sys.exit(1)
+    
+    if not chains:
+        print("Error: No chains specified")
+        sys.exit(1)
+    
+    # Process chains
+    if len(chains) == 1:
+        # Single chain - no need for parallel processing
+        control_unit_id, chain, sensor_type = chains[0]
+        exit_code = process_chain(control_unit_id, chain, sensor_type)
+        sys.exit(exit_code)
+    else:
+        # Multiple chains
+        failures = process_multiple_chains(chains, args.parallel, args.workers)
+        sys.exit(1 if failures > 0 else 0)
+
+
+if __name__ == "__main__":
+    main()