feat: Add MySQL to PostgreSQL migration tool with JSONB transformation

Implement comprehensive migration solution with:
- Full and incremental migration modes
- JSONB schema transformation for RAWDATACOR and ELABDATADISP tables
- Native PostgreSQL partitioning (2014-2031)
- Optimized GIN indexes for JSONB queries
- Rich logging with progress tracking
- Complete benchmark system for MySQL vs PostgreSQL comparison
- CLI interface with multiple commands (setup, migrate, benchmark)
- Configuration management via .env file
- Error handling and retry logic
- Batch processing for performance (configurable batch size)

Database transformations:
- RAWDATACOR: 16 Val columns + units → single JSONB measurements
- ELABDATADISP: 25+ measurement fields → structured JSONB with categories

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-10 19:57:11 +01:00
commit 62577d3200
24 changed files with 2075 additions and 0 deletions

View File

@@ -0,0 +1,149 @@
"""PostgreSQL schema creation from MySQL structure."""
from config import PARTITION_YEARS
from src.utils.logger import get_logger
logger = get_logger(__name__)
def create_rawdatacor_schema() -> str:
"""Create PostgreSQL schema for RAWDATACOR table.
Returns:
SQL script to create the table with partitions
"""
sql = """
-- Create RAWDATACOR table with partitioning
CREATE TABLE IF NOT EXISTS rawdatacor (
id BIGSERIAL NOT NULL,
unit_name VARCHAR(32),
tool_name_id VARCHAR(32) NOT NULL,
node_num INTEGER NOT NULL,
event_date DATE NOT NULL,
event_time TIME NOT NULL,
bat_level NUMERIC(4,2) NOT NULL,
temperature NUMERIC(5,2) NOT NULL,
measurements JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
bat_level_module NUMERIC(4,2),
temperature_module NUMERIC(5,2),
rssi_module INTEGER,
PRIMARY KEY (id, event_date)
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
-- Create partitions for each year
"""
# Add partition creation statements
for year in PARTITION_YEARS:
next_year = year + 1
sql += f"""
CREATE TABLE IF NOT EXISTS rawdatacor_{year}
PARTITION OF rawdatacor
FOR VALUES FROM ({year}) TO ({next_year});
"""
# Add indexes
sql += """
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_raw
ON rawdatacor(unit_name, tool_name_id, node_num, event_date, event_time);
CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
ON rawdatacor(unit_name, tool_name_id);
CREATE INDEX IF NOT EXISTS idx_measurements_gin_raw
ON rawdatacor USING GIN (measurements);
CREATE INDEX IF NOT EXISTS idx_event_date_raw
ON rawdatacor(event_date);
"""
return sql
def create_elabdatadisp_schema() -> str:
"""Create PostgreSQL schema for ELABDATADISP table.
Returns:
SQL script to create the table with partitions
"""
sql = """
-- Create ELABDATADISP table with partitioning
CREATE TABLE IF NOT EXISTS elabdatadisp (
id_elab_data BIGSERIAL NOT NULL,
unit_name VARCHAR(32),
tool_name_id VARCHAR(32) NOT NULL,
node_num INTEGER NOT NULL,
event_date DATE NOT NULL,
event_time TIME NOT NULL,
state VARCHAR(32),
calc_err INTEGER DEFAULT 0,
measurements JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id_elab_data, event_date)
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
-- Create partitions for each year
"""
# Add partition creation statements
for year in PARTITION_YEARS:
next_year = year + 1
sql += f"""
CREATE TABLE IF NOT EXISTS elabdatadisp_{year}
PARTITION OF elabdatadisp
FOR VALUES FROM ({year}) TO ({next_year});
"""
# Add indexes
sql += """
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_elab
ON elabdatadisp(unit_name, tool_name_id, node_num, event_date, event_time);
CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
ON elabdatadisp(unit_name, tool_name_id);
CREATE INDEX IF NOT EXISTS idx_measurements_gin_elab
ON elabdatadisp USING GIN (measurements);
CREATE INDEX IF NOT EXISTS idx_event_date_elab
ON elabdatadisp(event_date);
"""
return sql
def create_migration_state_table() -> str:
"""Create table to track migration state.
Returns:
SQL to create migration_state table
"""
sql = """
-- Create table to track migration state
CREATE TABLE IF NOT EXISTS migration_state (
table_name VARCHAR(255) PRIMARY KEY,
last_migrated_timestamp TIMESTAMP,
last_migrated_id BIGINT,
migration_started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
migration_completed_at TIMESTAMP,
total_rows_migrated BIGINT DEFAULT 0,
status VARCHAR(32) DEFAULT 'pending'
);
"""
return sql
def get_full_schema_script() -> str:
"""Get complete schema creation script for PostgreSQL.
Returns:
Full SQL script to create all tables and indexes
"""
return (
create_rawdatacor_schema() +
"\n\n" +
create_elabdatadisp_schema() +
"\n\n" +
create_migration_state_table()
)