Add comprehensive validation system and migrate to .env configuration

This commit includes: 1. Database Configuration Migration: - Migrated from DB.txt (Java JDBC) to .env (python-dotenv) - Added .env.example template with clear variable names - Updated database.py to use environment variables - Added python-dotenv>=1.0.0 to dependencies - Updated .gitignore to exclude sensitive files 2. Validation System (1,294 lines): - comparator.py: Statistical comparison with RMSE, correlation, tolerances - db_extractor.py: Database queries for all sensor types - validator.py: High-level validation orchestration - cli.py: Command-line interface for validation - README.md: Comprehensive validation documentation 3. Validation Features: - Compare Python vs MATLAB outputs from database - Support for all sensor types (RSN, Tilt, ATD) - Statistical metrics: max abs/rel diff, RMSE, correlation - Configurable tolerances (abs, rel, max) - Detailed validation reports - CLI and programmatic APIs 4. Examples and Documentation: - validate_example.sh: Bash script example - validate_example.py: Python programmatic example - Updated main README with validation section - Added validation workflow and troubleshooting guide Benefits: - ✅ No Java driver needed (native Python connectors) - ✅ Secure .env configuration (excluded from git) - ✅ Comprehensive validation against MATLAB - ✅ Statistical confidence in migration accuracy - ✅ Automated validation reports 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-13 15:34:13 +02:00
parent 876ef073fc
commit 23c53cf747
25 changed files with 7476 additions and 83 deletions
--- a/validate_example.py
+++ b/validate_example.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+"""
+Example script demonstrating programmatic validation usage.
+
+This shows how to use the validation API directly in Python code.
+"""
+
+import sys
+from src.common.database import DatabaseConfig, DatabaseConnection
+from src.validation.validator import OutputValidator
+from src.common.logging_utils import setup_logger
+
+
+def main():
+    """Main validation example."""
+    # Setup
+    logger = setup_logger('validation_example')
+    control_unit_id = 'CU001'
+    chain = 'A'
+
+    logger.info(f"Starting validation for {control_unit_id}/{chain}")
+
+    try:
+        # Connect to database
+        db_config = DatabaseConfig()
+        with DatabaseConnection(db_config) as conn:
+            logger.info("Database connected")
+
+            # Create validator with custom tolerances
+            validator = OutputValidator(
+                conn,
+                abs_tol=1e-6,      # Absolute tolerance
+                rel_tol=1e-4,      # Relative tolerance (0.01%)
+                max_rel_tol=0.01   # Max acceptable (1%)
+            )
+
+            # Example 1: Validate specific sensor type
+            logger.info("Example 1: Validating RSN sensors...")
+            report = validator.validate_rsn(control_unit_id, chain)
+
+            print("\n" + "=" * 80)
+            print("RSN VALIDATION RESULTS")
+            print("=" * 80)
+            print(report.generate_report())
+
+            if report.is_valid():
+                logger.info("✓ RSN validation passed")
+            else:
+                logger.warning("✗ RSN validation failed")
+
+            # Example 2: Validate all sensors
+            logger.info("\nExample 2: Validating all sensors...")
+            validator_all = OutputValidator(conn)
+            report_all = validator_all.validate_all(control_unit_id, chain)
+
+            print("\n" + "=" * 80)
+            print("COMPREHENSIVE VALIDATION RESULTS")
+            print("=" * 80)
+            print(report_all.generate_report())
+
+            # Save report to file
+            output_file = f"validation_{control_unit_id}_{chain}.txt"
+            report_all.save_report(output_file, include_equivalent=True)
+            logger.info(f"Report saved to {output_file}")
+
+            # Example 3: Access individual results programmatically
+            logger.info("\nExample 3: Programmatic access to results...")
+            summary = report_all.get_summary()
+
+            print("\nSummary Statistics:")
+            print(f"  Identical:  {summary['identical']}")
+            print(f"  Equivalent: {summary['equivalent']}")
+            print(f"  Different:  {summary['different']}")
+            print(f"  Missing:    {summary['missing_matlab'] + summary['missing_python']}")
+            print(f"  Errors:     {summary['error']}")
+
+            # Check specific fields
+            print("\nDetailed Results:")
+            for result in report_all.results[:5]:  # Show first 5
+                print(f"\n{result.field_name}:")
+                print(f"  Status: {result.status.value}")
+                if result.max_abs_diff is not None:
+                    print(f"  Max abs diff: {result.max_abs_diff:.2e}")
+                    print(f"  Max rel diff: {result.max_rel_diff:.2%}")
+                    print(f"  Correlation: {result.correlation:.6f}")
+
+            # Return success/failure
+            return 0 if report_all.is_valid() else 1
+
+    except Exception as e:
+        logger.error(f"Validation error: {e}", exc_info=True)
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())