Add comprehensive validation system and migrate to .env configuration
This commit includes: 1. Database Configuration Migration: - Migrated from DB.txt (Java JDBC) to .env (python-dotenv) - Added .env.example template with clear variable names - Updated database.py to use environment variables - Added python-dotenv>=1.0.0 to dependencies - Updated .gitignore to exclude sensitive files 2. Validation System (1,294 lines): - comparator.py: Statistical comparison with RMSE, correlation, tolerances - db_extractor.py: Database queries for all sensor types - validator.py: High-level validation orchestration - cli.py: Command-line interface for validation - README.md: Comprehensive validation documentation 3. Validation Features: - Compare Python vs MATLAB outputs from database - Support for all sensor types (RSN, Tilt, ATD) - Statistical metrics: max abs/rel diff, RMSE, correlation - Configurable tolerances (abs, rel, max) - Detailed validation reports - CLI and programmatic APIs 4. Examples and Documentation: - validate_example.sh: Bash script example - validate_example.py: Python programmatic example - Updated main README with validation section - Added validation workflow and troubleshooting guide Benefits: - ✅ No Java driver needed (native Python connectors) - ✅ Secure .env configuration (excluded from git) - ✅ Comprehensive validation against MATLAB - ✅ Statistical confidence in migration accuracy - ✅ Automated validation reports 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
218
src/main.py
Executable file
218
src/main.py
Executable file
@@ -0,0 +1,218 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Main orchestration script for sensor data processing.
|
||||
|
||||
This script coordinates the processing of all sensor types:
|
||||
- RSN (Rockfall Safety Network)
|
||||
- Tilt (Inclinometers/Tiltmeters)
|
||||
- ATD (Extensometers and other displacement sensors)
|
||||
|
||||
Can process single chains or multiple chains in parallel.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
from multiprocessing import Pool, cpu_count
|
||||
|
||||
from rsn.main import process_rsn_chain
|
||||
from tilt.main import process_tilt_chain
|
||||
from atd.main import process_atd_chain
|
||||
from common.logging_utils import setup_logger
|
||||
|
||||
|
||||
def process_chain(control_unit_id: str, chain: str, sensor_type: str = 'auto') -> int:
|
||||
"""
|
||||
Process a single chain with automatic or specified sensor type detection.
|
||||
|
||||
Args:
|
||||
control_unit_id: Control unit identifier
|
||||
chain: Chain identifier
|
||||
sensor_type: Sensor type ('rsn', 'tilt', 'atd', or 'auto' for autodetect)
|
||||
|
||||
Returns:
|
||||
0 if successful, 1 if error
|
||||
"""
|
||||
if sensor_type == 'auto':
|
||||
# Try to detect sensor type from chain configuration
|
||||
# For now, try all modules in order
|
||||
logger = setup_logger(control_unit_id, chain, "Main")
|
||||
logger.info(f"Auto-detecting sensor type for {control_unit_id}/{chain}")
|
||||
|
||||
# Try RSN first
|
||||
result = process_rsn_chain(control_unit_id, chain)
|
||||
if result == 0:
|
||||
return 0
|
||||
|
||||
# Try Tilt
|
||||
result = process_tilt_chain(control_unit_id, chain)
|
||||
if result == 0:
|
||||
return 0
|
||||
|
||||
# Try ATD
|
||||
result = process_atd_chain(control_unit_id, chain)
|
||||
return result
|
||||
|
||||
elif sensor_type.lower() == 'rsn':
|
||||
return process_rsn_chain(control_unit_id, chain)
|
||||
|
||||
elif sensor_type.lower() == 'tilt':
|
||||
return process_tilt_chain(control_unit_id, chain)
|
||||
|
||||
elif sensor_type.lower() == 'atd':
|
||||
return process_atd_chain(control_unit_id, chain)
|
||||
|
||||
else:
|
||||
print(f"Unknown sensor type: {sensor_type}")
|
||||
return 1
|
||||
|
||||
|
||||
def process_chain_wrapper(args: Tuple[str, str, str]) -> Tuple[str, str, int]:
|
||||
"""
|
||||
Wrapper for parallel processing.
|
||||
|
||||
Args:
|
||||
args: Tuple of (control_unit_id, chain, sensor_type)
|
||||
|
||||
Returns:
|
||||
Tuple of (control_unit_id, chain, exit_code)
|
||||
"""
|
||||
control_unit_id, chain, sensor_type = args
|
||||
exit_code = process_chain(control_unit_id, chain, sensor_type)
|
||||
return (control_unit_id, chain, exit_code)
|
||||
|
||||
|
||||
def process_multiple_chains(chains: List[Tuple[str, str, str]],
|
||||
parallel: bool = False,
|
||||
max_workers: int = None) -> int:
|
||||
"""
|
||||
Process multiple chains sequentially or in parallel.
|
||||
|
||||
Args:
|
||||
chains: List of tuples (control_unit_id, chain, sensor_type)
|
||||
parallel: If True, process chains in parallel
|
||||
max_workers: Maximum number of parallel workers (default: CPU count)
|
||||
|
||||
Returns:
|
||||
Number of failed chains
|
||||
"""
|
||||
if not parallel:
|
||||
# Sequential processing
|
||||
failures = 0
|
||||
for control_unit_id, chain, sensor_type in chains:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Processing: {control_unit_id} / {chain} ({sensor_type})")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
result = process_chain(control_unit_id, chain, sensor_type)
|
||||
if result != 0:
|
||||
failures += 1
|
||||
print(f"FAILED: {control_unit_id}/{chain}")
|
||||
else:
|
||||
print(f"SUCCESS: {control_unit_id}/{chain}")
|
||||
|
||||
return failures
|
||||
|
||||
else:
|
||||
# Parallel processing
|
||||
if max_workers is None:
|
||||
max_workers = min(cpu_count(), len(chains))
|
||||
|
||||
print(f"Processing {len(chains)} chains in parallel with {max_workers} workers\n")
|
||||
|
||||
with Pool(processes=max_workers) as pool:
|
||||
results = pool.map(process_chain_wrapper, chains)
|
||||
|
||||
# Report results
|
||||
failures = 0
|
||||
print(f"\n{'='*80}")
|
||||
print("Processing Summary:")
|
||||
print(f"{'='*80}\n")
|
||||
|
||||
for control_unit_id, chain, exit_code in results:
|
||||
status = "SUCCESS" if exit_code == 0 else "FAILED"
|
||||
print(f"{status}: {control_unit_id}/{chain}")
|
||||
if exit_code != 0:
|
||||
failures += 1
|
||||
|
||||
print(f"\nTotal: {len(chains)} chains, {failures} failures")
|
||||
|
||||
return failures
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Process sensor data from database',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Process single chain with auto-detection
|
||||
python -m src.main CU001 A
|
||||
|
||||
# Process single chain with specific sensor type
|
||||
python -m src.main CU001 A --type rsn
|
||||
|
||||
# Process multiple chains sequentially
|
||||
python -m src.main CU001 A CU001 B CU002 A
|
||||
|
||||
# Process multiple chains in parallel
|
||||
python -m src.main CU001 A CU001 B CU002 A --parallel
|
||||
|
||||
# Process with specific sensor types
|
||||
python -m src.main CU001 A rsn CU001 B tilt CU002 A atd --parallel
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('args', nargs='+',
|
||||
help='Control unit ID and chain pairs, optionally with sensor type')
|
||||
parser.add_argument('--type', '-t', default='auto',
|
||||
choices=['auto', 'rsn', 'tilt', 'atd'],
|
||||
help='Default sensor type (default: auto)')
|
||||
parser.add_argument('--parallel', '-p', action='store_true',
|
||||
help='Process multiple chains in parallel')
|
||||
parser.add_argument('--workers', '-w', type=int, default=None,
|
||||
help='Maximum number of parallel workers (default: CPU count)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse chain arguments
|
||||
chains = []
|
||||
i = 0
|
||||
while i < len(args.args):
|
||||
if i + 1 < len(args.args):
|
||||
control_unit_id = args.args[i]
|
||||
chain = args.args[i + 1]
|
||||
|
||||
# Check if next arg is a sensor type
|
||||
if i + 2 < len(args.args) and args.args[i + 2].lower() in ['rsn', 'tilt', 'atd']:
|
||||
sensor_type = args.args[i + 2]
|
||||
i += 3
|
||||
else:
|
||||
sensor_type = args.type
|
||||
i += 2
|
||||
|
||||
chains.append((control_unit_id, chain, sensor_type))
|
||||
else:
|
||||
print(f"Error: Missing chain for control unit '{args.args[i]}'")
|
||||
sys.exit(1)
|
||||
|
||||
if not chains:
|
||||
print("Error: No chains specified")
|
||||
sys.exit(1)
|
||||
|
||||
# Process chains
|
||||
if len(chains) == 1:
|
||||
# Single chain - no need for parallel processing
|
||||
control_unit_id, chain, sensor_type = chains[0]
|
||||
exit_code = process_chain(control_unit_id, chain, sensor_type)
|
||||
sys.exit(exit_code)
|
||||
else:
|
||||
# Multiple chains
|
||||
failures = process_multiple_chains(chains, args.parallel, args.workers)
|
||||
sys.exit(1 if failures > 0 else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user