feat: Add MySQL to PostgreSQL migration tool with JSONB transformation
Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
178
src/transformers/data_transformer.py
Normal file
178
src/transformers/data_transformer.py
Normal file
@@ -0,0 +1,178 @@
|
||||
"""Data transformation from MySQL to PostgreSQL format."""
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
from config import (
|
||||
RAWDATACOR_COLUMNS,
|
||||
ELABDATADISP_FIELD_MAPPING,
|
||||
TABLE_CONFIGS,
|
||||
)
|
||||
from src.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DataTransformer:
|
||||
"""Transform MySQL data to PostgreSQL format."""
|
||||
|
||||
@staticmethod
|
||||
def transform_rawdatacor_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Transform a RAWDATACOR row from MySQL to PostgreSQL format.
|
||||
|
||||
Args:
|
||||
mysql_row: Row dictionary from MySQL
|
||||
|
||||
Returns:
|
||||
Transformed row dictionary for PostgreSQL
|
||||
"""
|
||||
# Create measurements JSONB
|
||||
measurements = {}
|
||||
|
||||
# Map Val0-ValF with their units
|
||||
for i, val_col in enumerate(RAWDATACOR_COLUMNS["val_columns"]):
|
||||
unit_col = RAWDATACOR_COLUMNS["unit_columns"][i]
|
||||
|
||||
value = mysql_row.get(val_col)
|
||||
unit = mysql_row.get(unit_col)
|
||||
|
||||
# Only add to JSONB if value is not None
|
||||
if value is not None:
|
||||
measurements[str(i)] = {
|
||||
"value": str(value),
|
||||
"unit": unit if unit else None,
|
||||
}
|
||||
|
||||
# Create PostgreSQL row
|
||||
pg_row = {
|
||||
"id": mysql_row["id"],
|
||||
"unit_name": mysql_row.get("UnitName"),
|
||||
"tool_name_id": mysql_row["ToolNameID"],
|
||||
"node_num": mysql_row["NodeNum"],
|
||||
"event_date": mysql_row["EventDate"],
|
||||
"event_time": mysql_row["EventTime"],
|
||||
"bat_level": mysql_row["BatLevel"],
|
||||
"temperature": mysql_row["Temperature"],
|
||||
"measurements": measurements,
|
||||
"created_at": mysql_row.get("created_at"),
|
||||
"bat_level_module": mysql_row.get("BatLevelModule"),
|
||||
"temperature_module": mysql_row.get("TemperatureModule"),
|
||||
"rssi_module": mysql_row.get("RssiModule"),
|
||||
}
|
||||
|
||||
return pg_row
|
||||
|
||||
@staticmethod
|
||||
def transform_elabdatadisp_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Transform an ELABDATADISP row from MySQL to PostgreSQL format.
|
||||
|
||||
Args:
|
||||
mysql_row: Row dictionary from MySQL
|
||||
|
||||
Returns:
|
||||
Transformed row dictionary for PostgreSQL
|
||||
"""
|
||||
# Create measurements JSONB with structured categories
|
||||
measurements = {
|
||||
"shifts": {},
|
||||
"coordinates": {},
|
||||
"kinematics": {},
|
||||
"sensors": {},
|
||||
"calculated": {},
|
||||
}
|
||||
|
||||
# Map all measurement fields using the configuration
|
||||
for mysql_col, (category, pg_key) in ELABDATADISP_FIELD_MAPPING.items():
|
||||
value = mysql_row.get(mysql_col)
|
||||
if value is not None:
|
||||
measurements[category][pg_key] = float(value) if isinstance(value, str) else value
|
||||
|
||||
# Remove empty categories
|
||||
measurements = {
|
||||
k: v for k, v in measurements.items() if v
|
||||
}
|
||||
|
||||
# Create PostgreSQL row
|
||||
pg_row = {
|
||||
"id_elab_data": mysql_row["idElabData"],
|
||||
"unit_name": mysql_row.get("UnitName"),
|
||||
"tool_name_id": mysql_row["ToolNameID"],
|
||||
"node_num": mysql_row["NodeNum"],
|
||||
"event_date": mysql_row["EventDate"],
|
||||
"event_time": mysql_row["EventTime"],
|
||||
"state": mysql_row.get("State"),
|
||||
"calc_err": mysql_row.get("calcerr", 0),
|
||||
"measurements": measurements,
|
||||
"created_at": mysql_row.get("created_at"),
|
||||
"updated_at": mysql_row.get("updated_at"),
|
||||
}
|
||||
|
||||
return pg_row
|
||||
|
||||
@staticmethod
|
||||
def transform_batch(
|
||||
table: str,
|
||||
rows: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Transform a batch of rows from MySQL to PostgreSQL format.
|
||||
|
||||
Args:
|
||||
table: Table name ('RAWDATACOR' or 'ELABDATADISP')
|
||||
rows: List of row dictionaries from MySQL
|
||||
|
||||
Returns:
|
||||
List of transformed row dictionaries for PostgreSQL
|
||||
"""
|
||||
if table == "RAWDATACOR":
|
||||
return [
|
||||
DataTransformer.transform_rawdatacor_row(row)
|
||||
for row in rows
|
||||
]
|
||||
elif table == "ELABDATADISP":
|
||||
return [
|
||||
DataTransformer.transform_elabdatadisp_row(row)
|
||||
for row in rows
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Unknown table: {table}")
|
||||
|
||||
@staticmethod
|
||||
def get_column_order(table: str) -> List[str]:
|
||||
"""Get the column order for inserting into PostgreSQL.
|
||||
|
||||
Args:
|
||||
table: PostgreSQL table name
|
||||
|
||||
Returns:
|
||||
List of column names in order
|
||||
"""
|
||||
if table == "rawdatacor":
|
||||
return [
|
||||
"id",
|
||||
"unit_name",
|
||||
"tool_name_id",
|
||||
"node_num",
|
||||
"event_date",
|
||||
"event_time",
|
||||
"bat_level",
|
||||
"temperature",
|
||||
"measurements",
|
||||
"created_at",
|
||||
"bat_level_module",
|
||||
"temperature_module",
|
||||
"rssi_module",
|
||||
]
|
||||
elif table == "elabdatadisp":
|
||||
return [
|
||||
"id_elab_data",
|
||||
"unit_name",
|
||||
"tool_name_id",
|
||||
"node_num",
|
||||
"event_date",
|
||||
"event_time",
|
||||
"state",
|
||||
"calc_err",
|
||||
"measurements",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
else:
|
||||
raise ValueError(f"Unknown table: {table}")
|
||||
Reference in New Issue
Block a user