fix incremental
This commit is contained in:
157
src/utils/validation.py
Normal file
157
src/utils/validation.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Data validation utilities for migration."""
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
from datetime import datetime, date
|
||||
import os
|
||||
from src.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ErrorLogger:
|
||||
"""Log invalid migration keys to a file."""
|
||||
|
||||
def __init__(self, table: str, partition: str):
|
||||
"""Initialize error logger.
|
||||
|
||||
Args:
|
||||
table: Table name
|
||||
partition: Partition name
|
||||
"""
|
||||
self.table = table
|
||||
self.partition = partition
|
||||
self.error_file = f"migration_errors_{table}_{partition}.log"
|
||||
self.error_count = 0
|
||||
|
||||
# Create error file with header
|
||||
with open(self.error_file, "w") as f:
|
||||
f.write(f"# Migration errors for {table} partition {partition}\n")
|
||||
f.write("# Format: UnitName|ToolNameID|EventDate|EventTime|Reason\n\n")
|
||||
|
||||
logger.info(f"Error log file created: {self.error_file}")
|
||||
|
||||
def log_invalid_key(
|
||||
self,
|
||||
unit_name: Any,
|
||||
tool_name_id: Any,
|
||||
event_date: Any,
|
||||
event_time: Any,
|
||||
reason: str
|
||||
) -> None:
|
||||
"""Log an invalid consolidation key.
|
||||
|
||||
Args:
|
||||
unit_name: UnitName value
|
||||
tool_name_id: ToolNameID value
|
||||
event_date: EventDate value
|
||||
event_time: EventTime value
|
||||
reason: Reason for rejection
|
||||
"""
|
||||
with open(self.error_file, "a") as f:
|
||||
f.write(f"{unit_name}|{tool_name_id}|{event_date}|{event_time}|{reason}\n")
|
||||
|
||||
self.error_count += 1
|
||||
|
||||
if self.error_count % 100 == 0:
|
||||
logger.warning(f"Logged {self.error_count} invalid keys to {self.error_file}")
|
||||
|
||||
def get_error_count(self) -> int:
|
||||
"""Get total number of errors logged.
|
||||
|
||||
Returns:
|
||||
Number of errors logged
|
||||
"""
|
||||
return self.error_count
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close error logger and log summary."""
|
||||
if self.error_count > 0:
|
||||
logger.warning(
|
||||
f"Total invalid keys for {self.table} partition {self.partition}: "
|
||||
f"{self.error_count} (see {self.error_file})"
|
||||
)
|
||||
else:
|
||||
logger.info(f"No invalid keys found for {self.table} partition {self.partition}")
|
||||
# Remove empty error file
|
||||
if os.path.exists(self.error_file):
|
||||
os.remove(self.error_file)
|
||||
|
||||
|
||||
def validate_consolidation_key(
|
||||
unit_name: Any,
|
||||
tool_name_id: Any,
|
||||
event_date: Any,
|
||||
event_time: Any
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate a consolidation key.
|
||||
|
||||
Args:
|
||||
unit_name: UnitName value
|
||||
tool_name_id: ToolNameID value
|
||||
event_date: EventDate value
|
||||
event_time: EventTime value
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_reason)
|
||||
If valid: (True, None)
|
||||
If invalid: (False, "reason description")
|
||||
"""
|
||||
# Check for NULL unit_name or tool_name_id
|
||||
if unit_name is None or unit_name == "":
|
||||
return False, "UnitName is NULL or empty"
|
||||
|
||||
if tool_name_id is None or tool_name_id == "":
|
||||
return False, "ToolNameID is NULL or empty"
|
||||
|
||||
# Check for NULL or invalid dates
|
||||
if event_date is None:
|
||||
return False, "EventDate is NULL"
|
||||
|
||||
# Check for invalid date like '0000-00-00'
|
||||
try:
|
||||
if isinstance(event_date, str):
|
||||
if event_date.startswith("0000-00-00"):
|
||||
return False, f"EventDate is invalid: {event_date}"
|
||||
# Try to parse
|
||||
parsed_date = datetime.strptime(event_date, "%Y-%m-%d").date()
|
||||
elif isinstance(event_date, (date, datetime)):
|
||||
parsed_date = event_date if isinstance(event_date, date) else event_date.date()
|
||||
# Check for zero date
|
||||
if parsed_date.year == 0:
|
||||
return False, f"EventDate year is 0: {event_date}"
|
||||
else:
|
||||
return False, f"EventDate has invalid type: {type(event_date)}"
|
||||
except (ValueError, AttributeError) as e:
|
||||
return False, f"EventDate parsing failed: {event_date} ({e})"
|
||||
|
||||
# Check for NULL event_time
|
||||
if event_time is None:
|
||||
return False, "EventTime is NULL"
|
||||
|
||||
return True, None
|
||||
|
||||
|
||||
def validate_mysql_row(row: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate a complete MySQL row for migration.
|
||||
|
||||
Args:
|
||||
row: MySQL row dictionary
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_reason)
|
||||
"""
|
||||
# Validate consolidation key
|
||||
is_valid, reason = validate_consolidation_key(
|
||||
row.get("UnitName"),
|
||||
row.get("ToolNameID"),
|
||||
row.get("EventDate"),
|
||||
row.get("EventTime")
|
||||
)
|
||||
|
||||
if not is_valid:
|
||||
return False, reason
|
||||
|
||||
# Check for NodeNum
|
||||
if row.get("NodeNum") is None:
|
||||
return False, "NodeNum is NULL"
|
||||
|
||||
return True, None
|
||||
Reference in New Issue
Block a user