diff --git a/src/transformers/data_transformer.py b/src/transformers/data_transformer.py index 4fbc0e0..7173577 100644 --- a/src/transformers/data_transformer.py +++ b/src/transformers/data_transformer.py @@ -19,7 +19,7 @@ class DataTransformer: """Convert event_date to date object. Handles multiple input types: - - str: Parse from "YYYY-MM-DD" format + - str: Parse from "YYYY-MM-DD" format (returns None for invalid dates like '0000-00-00') - date: Return as-is - datetime: Extract date component @@ -27,16 +27,23 @@ class DataTransformer: event_date: Date value from MySQL (str or date) Returns: - date object + date object or None for invalid dates """ if isinstance(event_date, str): - return datetime.strptime(event_date, "%Y-%m-%d").date() + # Handle MySQL invalid dates (0000-00-00, etc) + if event_date == '0000-00-00' or not event_date or event_date.startswith('0000'): + return None + try: + return datetime.strptime(event_date, "%Y-%m-%d").date() + except ValueError: + # If parsing fails, return None instead of crashing + return None elif isinstance(event_date, datetime): return event_date.date() elif isinstance(event_date, date): return event_date else: - raise ValueError(f"Unsupported event_date type: {type(event_date)}") + return None @staticmethod def _convert_time(event_time: Any) -> time: @@ -121,7 +128,11 @@ class DataTransformer: if event_date is not None and event_time is not None: event_date_obj = DataTransformer._convert_date(event_date) event_time_obj = DataTransformer._convert_time(event_time) - event_timestamp = datetime.combine(event_date_obj, event_time_obj) + # If date conversion failed (invalid MySQL date), use default timestamp + if event_date_obj is None or event_time_obj is None: + event_timestamp = datetime(1970, 1, 1, 0, 0, 0) + else: + event_timestamp = datetime.combine(event_date_obj, event_time_obj) elif event_date is None or event_time is None: # Log a warning for records with missing date/time missing = [] @@ -192,7 +203,11 @@ class DataTransformer: if event_date is not None and event_time is not None: event_date_obj = DataTransformer._convert_date(event_date) event_time_obj = DataTransformer._convert_time(event_time) - event_timestamp = datetime.combine(event_date_obj, event_time_obj) + # If date conversion failed (invalid MySQL date), use default timestamp + if event_date_obj is None or event_time_obj is None: + event_timestamp = datetime(1970, 1, 1, 0, 0, 0) + else: + event_timestamp = datetime.combine(event_date_obj, event_time_obj) elif event_date is None or event_time is None: # Log a warning for records with missing date/time missing = []