feat: Add error logging and fix incremental migration state tracking
Implement comprehensive error handling and fix state management bug in incremental migration: Error Logging System: - Add validation for consolidation keys (NULL dates, empty IDs, corrupted Java strings) - Log invalid keys to dedicated error files with detailed reasons - Full migration: migration_errors_<table>_<partition>.log - Incremental migration: migration_errors_<table>_incremental_<timestamp>.log (timestamped to preserve history) - Report total count of skipped invalid keys at migration completion - Auto-delete empty error log files State Tracking Fix: - Fix critical bug where last_key wasn't updated after final buffer flush - Track last_processed_key throughout migration loop - Update state both during periodic flushes and after final flush - Ensures incremental migration correctly resumes from last migrated key Validation Checks: - EventDate IS NULL or EventDate = '0000-00-00' - EventTime IS NULL - ToolNameID IS NULL or empty string - UnitName IS NULL or empty string - UnitName starting with '[L' (corrupted Java strings) Documentation: - Update README.md with error logging behavior - Update MIGRATION_WORKFLOW.md with validation details - Update CHANGELOG.md with new features and fixes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,21 +10,30 @@ logger = get_logger(__name__)
|
||||
class ErrorLogger:
|
||||
"""Log invalid migration keys to a file."""
|
||||
|
||||
def __init__(self, table: str, partition: str):
|
||||
def __init__(self, table: str, partition: str, use_timestamp: bool = False):
|
||||
"""Initialize error logger.
|
||||
|
||||
Args:
|
||||
table: Table name
|
||||
partition: Partition name
|
||||
partition: Partition name (e.g., 'p2024' or 'incremental')
|
||||
use_timestamp: If True, add timestamp to filename (for incremental migrations)
|
||||
"""
|
||||
self.table = table
|
||||
self.partition = partition
|
||||
self.error_file = f"migration_errors_{table}_{partition}.log"
|
||||
|
||||
# Add timestamp to filename for incremental migrations to avoid overwriting
|
||||
if use_timestamp or partition == "incremental":
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self.error_file = f"migration_errors_{table}_{partition}_{timestamp}.log"
|
||||
else:
|
||||
self.error_file = f"migration_errors_{table}_{partition}.log"
|
||||
|
||||
self.error_count = 0
|
||||
|
||||
# Create error file with header
|
||||
with open(self.error_file, "w") as f:
|
||||
f.write(f"# Migration errors for {table} partition {partition}\n")
|
||||
f.write(f"# Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
f.write("# Format: UnitName|ToolNameID|EventDate|EventTime|Reason\n\n")
|
||||
|
||||
logger.info(f"Error log file created: {self.error_file}")
|
||||
@@ -99,6 +108,10 @@ def validate_consolidation_key(
|
||||
if unit_name is None or unit_name == "":
|
||||
return False, "UnitName is NULL or empty"
|
||||
|
||||
# Check for corrupted Java strings (like '[Ljava.lang.String;@...')
|
||||
if isinstance(unit_name, str) and unit_name.startswith("[L"):
|
||||
return False, f"UnitName is corrupted Java string: {unit_name}"
|
||||
|
||||
if tool_name_id is None or tool_name_id == "":
|
||||
return False, "ToolNameID is NULL or empty"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user