feat: Add MySQL to PostgreSQL migration tool with JSONB transformation

Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 19:57:11 +01:00
commit 62577d3200
24 changed files with 2075 additions and 0 deletions
--- a/src/transformers/schema_transformer.py
+++ b/src/transformers/schema_transformer.py
@@ -0,0 +1,149 @@
+"""PostgreSQL schema creation from MySQL structure."""
+from config import PARTITION_YEARS
+from src.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def create_rawdatacor_schema() -> str:
+    """Create PostgreSQL schema for RAWDATACOR table.
+
+    Returns:
+        SQL script to create the table with partitions
+    """
+    sql = """
+-- Create RAWDATACOR table with partitioning
+CREATE TABLE IF NOT EXISTS rawdatacor (
+    id BIGSERIAL NOT NULL,
+    unit_name VARCHAR(32),
+    tool_name_id VARCHAR(32) NOT NULL,
+    node_num INTEGER NOT NULL,
+    event_date DATE NOT NULL,
+    event_time TIME NOT NULL,
+    bat_level NUMERIC(4,2) NOT NULL,
+    temperature NUMERIC(5,2) NOT NULL,
+    measurements JSONB,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    bat_level_module NUMERIC(4,2),
+    temperature_module NUMERIC(5,2),
+    rssi_module INTEGER,
+    PRIMARY KEY (id, event_date)
+) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
+
+-- Create partitions for each year
+"""
+    # Add partition creation statements
+    for year in PARTITION_YEARS:
+        next_year = year + 1
+        sql += f"""
+CREATE TABLE IF NOT EXISTS rawdatacor_{year}
+    PARTITION OF rawdatacor
+    FOR VALUES FROM ({year}) TO ({next_year});
+"""
+
+    # Add indexes
+    sql += """
+-- Create indexes
+CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_raw
+    ON rawdatacor(unit_name, tool_name_id, node_num, event_date, event_time);
+
+CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
+    ON rawdatacor(unit_name, tool_name_id);
+
+CREATE INDEX IF NOT EXISTS idx_measurements_gin_raw
+    ON rawdatacor USING GIN (measurements);
+
+CREATE INDEX IF NOT EXISTS idx_event_date_raw
+    ON rawdatacor(event_date);
+"""
+
+    return sql
+
+
+def create_elabdatadisp_schema() -> str:
+    """Create PostgreSQL schema for ELABDATADISP table.
+
+    Returns:
+        SQL script to create the table with partitions
+    """
+    sql = """
+-- Create ELABDATADISP table with partitioning
+CREATE TABLE IF NOT EXISTS elabdatadisp (
+    id_elab_data BIGSERIAL NOT NULL,
+    unit_name VARCHAR(32),
+    tool_name_id VARCHAR(32) NOT NULL,
+    node_num INTEGER NOT NULL,
+    event_date DATE NOT NULL,
+    event_time TIME NOT NULL,
+    state VARCHAR(32),
+    calc_err INTEGER DEFAULT 0,
+    measurements JSONB,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (id_elab_data, event_date)
+) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
+
+-- Create partitions for each year
+"""
+    # Add partition creation statements
+    for year in PARTITION_YEARS:
+        next_year = year + 1
+        sql += f"""
+CREATE TABLE IF NOT EXISTS elabdatadisp_{year}
+    PARTITION OF elabdatadisp
+    FOR VALUES FROM ({year}) TO ({next_year});
+"""
+
+    # Add indexes
+    sql += """
+-- Create indexes
+CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_elab
+    ON elabdatadisp(unit_name, tool_name_id, node_num, event_date, event_time);
+
+CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
+    ON elabdatadisp(unit_name, tool_name_id);
+
+CREATE INDEX IF NOT EXISTS idx_measurements_gin_elab
+    ON elabdatadisp USING GIN (measurements);
+
+CREATE INDEX IF NOT EXISTS idx_event_date_elab
+    ON elabdatadisp(event_date);
+"""
+
+    return sql
+
+
+def create_migration_state_table() -> str:
+    """Create table to track migration state.
+
+    Returns:
+        SQL to create migration_state table
+    """
+    sql = """
+-- Create table to track migration state
+CREATE TABLE IF NOT EXISTS migration_state (
+    table_name VARCHAR(255) PRIMARY KEY,
+    last_migrated_timestamp TIMESTAMP,
+    last_migrated_id BIGINT,
+    migration_started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    migration_completed_at TIMESTAMP,
+    total_rows_migrated BIGINT DEFAULT 0,
+    status VARCHAR(32) DEFAULT 'pending'
+);
+"""
+    return sql
+
+
+def get_full_schema_script() -> str:
+    """Get complete schema creation script for PostgreSQL.
+
+    Returns:
+        Full SQL script to create all tables and indexes
+    """
+    return (
+        create_rawdatacor_schema() +
+        "\n\n" +
+        create_elabdatadisp_schema() +
+        "\n\n" +
+        create_migration_state_table()
+    )