feat: Add MySQL to PostgreSQL migration tool with JSONB transformation
Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
149
src/transformers/schema_transformer.py
Normal file
149
src/transformers/schema_transformer.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""PostgreSQL schema creation from MySQL structure."""
|
||||
from config import PARTITION_YEARS
|
||||
from src.utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def create_rawdatacor_schema() -> str:
|
||||
"""Create PostgreSQL schema for RAWDATACOR table.
|
||||
|
||||
Returns:
|
||||
SQL script to create the table with partitions
|
||||
"""
|
||||
sql = """
|
||||
-- Create RAWDATACOR table with partitioning
|
||||
CREATE TABLE IF NOT EXISTS rawdatacor (
|
||||
id BIGSERIAL NOT NULL,
|
||||
unit_name VARCHAR(32),
|
||||
tool_name_id VARCHAR(32) NOT NULL,
|
||||
node_num INTEGER NOT NULL,
|
||||
event_date DATE NOT NULL,
|
||||
event_time TIME NOT NULL,
|
||||
bat_level NUMERIC(4,2) NOT NULL,
|
||||
temperature NUMERIC(5,2) NOT NULL,
|
||||
measurements JSONB,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
bat_level_module NUMERIC(4,2),
|
||||
temperature_module NUMERIC(5,2),
|
||||
rssi_module INTEGER,
|
||||
PRIMARY KEY (id, event_date)
|
||||
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
|
||||
|
||||
-- Create partitions for each year
|
||||
"""
|
||||
# Add partition creation statements
|
||||
for year in PARTITION_YEARS:
|
||||
next_year = year + 1
|
||||
sql += f"""
|
||||
CREATE TABLE IF NOT EXISTS rawdatacor_{year}
|
||||
PARTITION OF rawdatacor
|
||||
FOR VALUES FROM ({year}) TO ({next_year});
|
||||
"""
|
||||
|
||||
# Add indexes
|
||||
sql += """
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_raw
|
||||
ON rawdatacor(unit_name, tool_name_id, node_num, event_date, event_time);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
|
||||
ON rawdatacor(unit_name, tool_name_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_measurements_gin_raw
|
||||
ON rawdatacor USING GIN (measurements);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_event_date_raw
|
||||
ON rawdatacor(event_date);
|
||||
"""
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def create_elabdatadisp_schema() -> str:
|
||||
"""Create PostgreSQL schema for ELABDATADISP table.
|
||||
|
||||
Returns:
|
||||
SQL script to create the table with partitions
|
||||
"""
|
||||
sql = """
|
||||
-- Create ELABDATADISP table with partitioning
|
||||
CREATE TABLE IF NOT EXISTS elabdatadisp (
|
||||
id_elab_data BIGSERIAL NOT NULL,
|
||||
unit_name VARCHAR(32),
|
||||
tool_name_id VARCHAR(32) NOT NULL,
|
||||
node_num INTEGER NOT NULL,
|
||||
event_date DATE NOT NULL,
|
||||
event_time TIME NOT NULL,
|
||||
state VARCHAR(32),
|
||||
calc_err INTEGER DEFAULT 0,
|
||||
measurements JSONB,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (id_elab_data, event_date)
|
||||
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
|
||||
|
||||
-- Create partitions for each year
|
||||
"""
|
||||
# Add partition creation statements
|
||||
for year in PARTITION_YEARS:
|
||||
next_year = year + 1
|
||||
sql += f"""
|
||||
CREATE TABLE IF NOT EXISTS elabdatadisp_{year}
|
||||
PARTITION OF elabdatadisp
|
||||
FOR VALUES FROM ({year}) TO ({next_year});
|
||||
"""
|
||||
|
||||
# Add indexes
|
||||
sql += """
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_elab
|
||||
ON elabdatadisp(unit_name, tool_name_id, node_num, event_date, event_time);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
|
||||
ON elabdatadisp(unit_name, tool_name_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_measurements_gin_elab
|
||||
ON elabdatadisp USING GIN (measurements);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_event_date_elab
|
||||
ON elabdatadisp(event_date);
|
||||
"""
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def create_migration_state_table() -> str:
|
||||
"""Create table to track migration state.
|
||||
|
||||
Returns:
|
||||
SQL to create migration_state table
|
||||
"""
|
||||
sql = """
|
||||
-- Create table to track migration state
|
||||
CREATE TABLE IF NOT EXISTS migration_state (
|
||||
table_name VARCHAR(255) PRIMARY KEY,
|
||||
last_migrated_timestamp TIMESTAMP,
|
||||
last_migrated_id BIGINT,
|
||||
migration_started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
migration_completed_at TIMESTAMP,
|
||||
total_rows_migrated BIGINT DEFAULT 0,
|
||||
status VARCHAR(32) DEFAULT 'pending'
|
||||
);
|
||||
"""
|
||||
return sql
|
||||
|
||||
|
||||
def get_full_schema_script() -> str:
|
||||
"""Get complete schema creation script for PostgreSQL.
|
||||
|
||||
Returns:
|
||||
Full SQL script to create all tables and indexes
|
||||
"""
|
||||
return (
|
||||
create_rawdatacor_schema() +
|
||||
"\n\n" +
|
||||
create_elabdatadisp_schema() +
|
||||
"\n\n" +
|
||||
create_migration_state_table()
|
||||
)
|
||||
Reference in New Issue
Block a user