"""Data transformation from MySQL to PostgreSQL format.""" from typing import Dict, Any, List from datetime import datetime, time, timedelta from config import ( RAWDATACOR_COLUMNS, ELABDATADISP_FIELD_MAPPING, TABLE_CONFIGS, ) from src.utils.logger import get_logger logger = get_logger(__name__) class DataTransformer: """Transform MySQL data to PostgreSQL format.""" @staticmethod def _convert_time(event_time: Any) -> time: """Convert event_time to datetime.time object. Handles multiple input types: - str: Parse from "HH:MM:SS" format - timedelta: Convert from MySQL TIME type (stored as timedelta) - time: Return as-is Args: event_time: Time value from MySQL (str, timedelta, or time) Returns: datetime.time object """ if isinstance(event_time, str): return datetime.strptime(event_time, "%H:%M:%S").time() elif isinstance(event_time, timedelta): # MySQL returns TIME as timedelta # Extract seconds from timedelta and convert to time total_seconds = int(event_time.total_seconds()) hours = total_seconds // 3600 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 return time(hour=hours, minute=minutes, second=seconds) elif isinstance(event_time, time): return event_time else: raise ValueError(f"Unsupported event_time type: {type(event_time)}") @staticmethod def transform_rawdatacor_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]: """Transform a RAWDATACOR row from MySQL to PostgreSQL format. Args: mysql_row: Row dictionary from MySQL Returns: Transformed row dictionary for PostgreSQL """ # Create measurements JSONB measurements = {} # Map Val0-ValF with their units for i, val_col in enumerate(RAWDATACOR_COLUMNS["val_columns"]): unit_col = RAWDATACOR_COLUMNS["unit_columns"][i] value = mysql_row.get(val_col) unit = mysql_row.get(unit_col) # Only add to JSONB if value is not None if value is not None: measurements[str(i)] = { "value": str(value), "unit": unit if unit else None, } # Combine event_date and event_time into event_timestamp event_date = mysql_row.get("EventDate") event_time = mysql_row.get("EventTime") if event_date is not None and event_time is not None: event_time_obj = DataTransformer._convert_time(event_time) event_timestamp = datetime.combine(event_date, event_time_obj) elif event_date is None or event_time is None: # Log a warning for records with missing date/time missing = [] if event_date is None: missing.append("EventDate") if event_time is None: missing.append("EventTime") logger.warning( f"Row {mysql_row.get('id')} has NULL {', '.join(missing)}. " f"Using default timestamp: 1970-01-01 00:00:00" ) # Use default timestamp for records with missing date/time event_timestamp = datetime(1970, 1, 1, 0, 0, 0) else: event_timestamp = None # Create PostgreSQL row pg_row = { "id": mysql_row["id"], "unit_name": mysql_row.get("UnitName"), "tool_name_id": mysql_row["ToolNameID"], "node_num": mysql_row["NodeNum"], "event_timestamp": event_timestamp, "bat_level": mysql_row["BatLevel"], "temperature": mysql_row["Temperature"], "measurements": measurements, "created_at": mysql_row.get("created_at"), "bat_level_module": mysql_row.get("BatLevelModule"), "temperature_module": mysql_row.get("TemperatureModule"), "rssi_module": mysql_row.get("RssiModule"), } return pg_row @staticmethod def transform_elabdatadisp_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]: """Transform an ELABDATADISP row from MySQL to PostgreSQL format. Args: mysql_row: Row dictionary from MySQL Returns: Transformed row dictionary for PostgreSQL """ # Create measurements JSONB with structured categories measurements = { "shifts": {}, "coordinates": {}, "kinematics": {}, "sensors": {}, "calculated": {}, } # Map all measurement fields using the configuration for mysql_col, (category, pg_key) in ELABDATADISP_FIELD_MAPPING.items(): value = mysql_row.get(mysql_col) if value is not None: measurements[category][pg_key] = float(value) if isinstance(value, str) else value # Remove empty categories measurements = { k: v for k, v in measurements.items() if v } # Combine event_date and event_time into event_timestamp event_date = mysql_row.get("EventDate") event_time = mysql_row.get("EventTime") if event_date is not None and event_time is not None: event_time_obj = DataTransformer._convert_time(event_time) event_timestamp = datetime.combine(event_date, event_time_obj) elif event_date is None or event_time is None: # Log a warning for records with missing date/time missing = [] if event_date is None: missing.append("EventDate") if event_time is None: missing.append("EventTime") logger.warning( f"Row {mysql_row.get('idElabData')} has NULL {', '.join(missing)}. " f"Using default timestamp: 1970-01-01 00:00:00" ) # Use default timestamp for records with missing date/time event_timestamp = datetime(1970, 1, 1, 0, 0, 0) else: event_timestamp = None # Create PostgreSQL row pg_row = { "id_elab_data": mysql_row["idElabData"], "unit_name": mysql_row.get("UnitName"), "tool_name_id": mysql_row["ToolNameID"], "node_num": mysql_row["NodeNum"], "event_timestamp": event_timestamp, "state": mysql_row.get("State"), "calc_err": mysql_row.get("calcerr", 0), "measurements": measurements, "created_at": mysql_row.get("created_at"), "updated_at": mysql_row.get("updated_at"), } return pg_row @staticmethod def transform_batch( table: str, rows: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: """Transform a batch of rows from MySQL to PostgreSQL format. Args: table: Table name ('RAWDATACOR' or 'ELABDATADISP') rows: List of row dictionaries from MySQL Returns: List of transformed row dictionaries for PostgreSQL """ if table == "RAWDATACOR": return [ DataTransformer.transform_rawdatacor_row(row) for row in rows ] elif table == "ELABDATADISP": return [ DataTransformer.transform_elabdatadisp_row(row) for row in rows ] else: raise ValueError(f"Unknown table: {table}") @staticmethod def get_column_order(table: str) -> List[str]: """Get the column order for inserting into PostgreSQL. Args: table: PostgreSQL table name Returns: List of column names in order """ if table == "rawdatacor": return [ "id", "unit_name", "tool_name_id", "node_num", "event_timestamp", "bat_level", "temperature", "measurements", "created_at", "bat_level_module", "temperature_module", "rssi_module", ] elif table == "elabdatadisp": return [ "id_elab_data", "unit_name", "tool_name_id", "node_num", "event_timestamp", "state", "calc_err", "measurements", "created_at", "updated_at", ] else: raise ValueError(f"Unknown table: {table}")