Files
matlab-python/src/main.py
alex 23c53cf747 Add comprehensive validation system and migrate to .env configuration
This commit includes:

1. Database Configuration Migration:
   - Migrated from DB.txt (Java JDBC) to .env (python-dotenv)
   - Added .env.example template with clear variable names
   - Updated database.py to use environment variables
   - Added python-dotenv>=1.0.0 to dependencies
   - Updated .gitignore to exclude sensitive files

2. Validation System (1,294 lines):
   - comparator.py: Statistical comparison with RMSE, correlation, tolerances
   - db_extractor.py: Database queries for all sensor types
   - validator.py: High-level validation orchestration
   - cli.py: Command-line interface for validation
   - README.md: Comprehensive validation documentation

3. Validation Features:
   - Compare Python vs MATLAB outputs from database
   - Support for all sensor types (RSN, Tilt, ATD)
   - Statistical metrics: max abs/rel diff, RMSE, correlation
   - Configurable tolerances (abs, rel, max)
   - Detailed validation reports
   - CLI and programmatic APIs

4. Examples and Documentation:
   - validate_example.sh: Bash script example
   - validate_example.py: Python programmatic example
   - Updated main README with validation section
   - Added validation workflow and troubleshooting guide

Benefits:
-  No Java driver needed (native Python connectors)
-  Secure .env configuration (excluded from git)
-  Comprehensive validation against MATLAB
-  Statistical confidence in migration accuracy
-  Automated validation reports

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-13 15:34:13 +02:00

219 lines
7.0 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Main orchestration script for sensor data processing.
This script coordinates the processing of all sensor types:
- RSN (Rockfall Safety Network)
- Tilt (Inclinometers/Tiltmeters)
- ATD (Extensometers and other displacement sensors)
Can process single chains or multiple chains in parallel.
"""
import sys
import argparse
import logging
from typing import List, Tuple
from multiprocessing import Pool, cpu_count
from rsn.main import process_rsn_chain
from tilt.main import process_tilt_chain
from atd.main import process_atd_chain
from common.logging_utils import setup_logger
def process_chain(control_unit_id: str, chain: str, sensor_type: str = 'auto') -> int:
"""
Process a single chain with automatic or specified sensor type detection.
Args:
control_unit_id: Control unit identifier
chain: Chain identifier
sensor_type: Sensor type ('rsn', 'tilt', 'atd', or 'auto' for autodetect)
Returns:
0 if successful, 1 if error
"""
if sensor_type == 'auto':
# Try to detect sensor type from chain configuration
# For now, try all modules in order
logger = setup_logger(control_unit_id, chain, "Main")
logger.info(f"Auto-detecting sensor type for {control_unit_id}/{chain}")
# Try RSN first
result = process_rsn_chain(control_unit_id, chain)
if result == 0:
return 0
# Try Tilt
result = process_tilt_chain(control_unit_id, chain)
if result == 0:
return 0
# Try ATD
result = process_atd_chain(control_unit_id, chain)
return result
elif sensor_type.lower() == 'rsn':
return process_rsn_chain(control_unit_id, chain)
elif sensor_type.lower() == 'tilt':
return process_tilt_chain(control_unit_id, chain)
elif sensor_type.lower() == 'atd':
return process_atd_chain(control_unit_id, chain)
else:
print(f"Unknown sensor type: {sensor_type}")
return 1
def process_chain_wrapper(args: Tuple[str, str, str]) -> Tuple[str, str, int]:
"""
Wrapper for parallel processing.
Args:
args: Tuple of (control_unit_id, chain, sensor_type)
Returns:
Tuple of (control_unit_id, chain, exit_code)
"""
control_unit_id, chain, sensor_type = args
exit_code = process_chain(control_unit_id, chain, sensor_type)
return (control_unit_id, chain, exit_code)
def process_multiple_chains(chains: List[Tuple[str, str, str]],
parallel: bool = False,
max_workers: int = None) -> int:
"""
Process multiple chains sequentially or in parallel.
Args:
chains: List of tuples (control_unit_id, chain, sensor_type)
parallel: If True, process chains in parallel
max_workers: Maximum number of parallel workers (default: CPU count)
Returns:
Number of failed chains
"""
if not parallel:
# Sequential processing
failures = 0
for control_unit_id, chain, sensor_type in chains:
print(f"\n{'='*80}")
print(f"Processing: {control_unit_id} / {chain} ({sensor_type})")
print(f"{'='*80}\n")
result = process_chain(control_unit_id, chain, sensor_type)
if result != 0:
failures += 1
print(f"FAILED: {control_unit_id}/{chain}")
else:
print(f"SUCCESS: {control_unit_id}/{chain}")
return failures
else:
# Parallel processing
if max_workers is None:
max_workers = min(cpu_count(), len(chains))
print(f"Processing {len(chains)} chains in parallel with {max_workers} workers\n")
with Pool(processes=max_workers) as pool:
results = pool.map(process_chain_wrapper, chains)
# Report results
failures = 0
print(f"\n{'='*80}")
print("Processing Summary:")
print(f"{'='*80}\n")
for control_unit_id, chain, exit_code in results:
status = "SUCCESS" if exit_code == 0 else "FAILED"
print(f"{status}: {control_unit_id}/{chain}")
if exit_code != 0:
failures += 1
print(f"\nTotal: {len(chains)} chains, {failures} failures")
return failures
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description='Process sensor data from database',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Process single chain with auto-detection
python -m src.main CU001 A
# Process single chain with specific sensor type
python -m src.main CU001 A --type rsn
# Process multiple chains sequentially
python -m src.main CU001 A CU001 B CU002 A
# Process multiple chains in parallel
python -m src.main CU001 A CU001 B CU002 A --parallel
# Process with specific sensor types
python -m src.main CU001 A rsn CU001 B tilt CU002 A atd --parallel
"""
)
parser.add_argument('args', nargs='+',
help='Control unit ID and chain pairs, optionally with sensor type')
parser.add_argument('--type', '-t', default='auto',
choices=['auto', 'rsn', 'tilt', 'atd'],
help='Default sensor type (default: auto)')
parser.add_argument('--parallel', '-p', action='store_true',
help='Process multiple chains in parallel')
parser.add_argument('--workers', '-w', type=int, default=None,
help='Maximum number of parallel workers (default: CPU count)')
args = parser.parse_args()
# Parse chain arguments
chains = []
i = 0
while i < len(args.args):
if i + 1 < len(args.args):
control_unit_id = args.args[i]
chain = args.args[i + 1]
# Check if next arg is a sensor type
if i + 2 < len(args.args) and args.args[i + 2].lower() in ['rsn', 'tilt', 'atd']:
sensor_type = args.args[i + 2]
i += 3
else:
sensor_type = args.type
i += 2
chains.append((control_unit_id, chain, sensor_type))
else:
print(f"Error: Missing chain for control unit '{args.args[i]}'")
sys.exit(1)
if not chains:
print("Error: No chains specified")
sys.exit(1)
# Process chains
if len(chains) == 1:
# Single chain - no need for parallel processing
control_unit_id, chain, sensor_type = chains[0]
exit_code = process_chain(control_unit_id, chain, sensor_type)
sys.exit(exit_code)
else:
# Multiple chains
failures = process_multiple_chains(chains, args.parallel, args.workers)
sys.exit(1 if failures > 0 else 0)
if __name__ == "__main__":
main()