feat: Add MySQL to PostgreSQL migration tool with JSONB transformation
Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
155
src/migrator/incremental_migration.py
Normal file
155
src/migrator/incremental_migration.py
Normal file
@@ -0,0 +1,155 @@
|
||||
"""Incremental migration from MySQL to PostgreSQL based on timestamps."""
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from config import get_settings, TABLE_CONFIGS
|
||||
from src.connectors.mysql_connector import MySQLConnector
|
||||
from src.connectors.postgres_connector import PostgreSQLConnector
|
||||
from src.transformers.data_transformer import DataTransformer
|
||||
from src.utils.logger import get_logger, setup_logger
|
||||
from src.utils.progress import ProgressTracker
|
||||
from src.migrator.state import MigrationState
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class IncrementalMigrator:
|
||||
"""Perform incremental migration based on timestamps."""
|
||||
|
||||
def __init__(self, table: str, state_file: str = "migration_state.json"):
|
||||
"""Initialize incremental migrator.
|
||||
|
||||
Args:
|
||||
table: Table name to migrate
|
||||
state_file: Path to migration state file
|
||||
"""
|
||||
if table not in TABLE_CONFIGS:
|
||||
raise ValueError(f"Unknown table: {table}")
|
||||
|
||||
self.table = table
|
||||
self.config = TABLE_CONFIGS[table]
|
||||
self.settings = get_settings()
|
||||
self.state = MigrationState(state_file)
|
||||
|
||||
def migrate(self, dry_run: bool = False) -> int:
|
||||
"""Perform incremental migration since last sync.
|
||||
|
||||
Args:
|
||||
dry_run: If True, log what would be done but don't modify data
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
setup_logger(__name__)
|
||||
|
||||
mysql_table = self.config["mysql_table"]
|
||||
pg_table = self.config["postgres_table"]
|
||||
|
||||
# Get last migration timestamp
|
||||
last_timestamp = self.state.get_last_timestamp(pg_table)
|
||||
|
||||
if last_timestamp is None:
|
||||
logger.info(
|
||||
f"No previous migration found for {pg_table}. "
|
||||
"Use 'migrate --full' for initial migration."
|
||||
)
|
||||
return 0
|
||||
|
||||
logger.info(
|
||||
f"Starting incremental migration of {mysql_table} -> {pg_table} "
|
||||
f"since {last_timestamp}"
|
||||
)
|
||||
|
||||
try:
|
||||
with MySQLConnector() as mysql_conn:
|
||||
# Count rows to migrate
|
||||
timestamp_col = "updated_at" if mysql_table == "ELABDATADISP" else "created_at"
|
||||
|
||||
with PostgreSQLConnector() as pg_conn:
|
||||
# Get max timestamp from PostgreSQL
|
||||
pg_max_timestamp = pg_conn.get_max_timestamp(
|
||||
pg_table,
|
||||
timestamp_col
|
||||
)
|
||||
|
||||
logger.info(f"Last timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] Would migrate rows after timestamp")
|
||||
return 0
|
||||
|
||||
migrated = 0
|
||||
migration_start_time = datetime.utcnow().isoformat()
|
||||
|
||||
# Fetch and migrate rows in batches
|
||||
batch_count = 0
|
||||
for batch in mysql_conn.fetch_rows_since(
|
||||
mysql_table,
|
||||
last_timestamp
|
||||
):
|
||||
batch_count += 1
|
||||
|
||||
if batch_count == 1:
|
||||
# Create progress tracker with unknown total
|
||||
progress = ProgressTracker(
|
||||
len(batch),
|
||||
f"Migrating {mysql_table} (incremental)"
|
||||
)
|
||||
progress.__enter__()
|
||||
|
||||
# Transform batch
|
||||
transformed = DataTransformer.transform_batch(
|
||||
mysql_table,
|
||||
batch
|
||||
)
|
||||
|
||||
# Insert batch
|
||||
columns = DataTransformer.get_column_order(pg_table)
|
||||
inserted = pg_conn.insert_batch(
|
||||
pg_table,
|
||||
transformed,
|
||||
columns
|
||||
)
|
||||
|
||||
migrated += inserted
|
||||
progress.update(inserted)
|
||||
|
||||
if batch_count == 0:
|
||||
logger.info(f"No new rows to migrate for {mysql_table}")
|
||||
return 0
|
||||
|
||||
progress.__exit__(None, None, None)
|
||||
|
||||
# Update migration state
|
||||
self.state.set_last_timestamp(pg_table, migration_start_time)
|
||||
self.state.increment_migration_count(pg_table, migrated)
|
||||
|
||||
logger.info(
|
||||
f"✓ Incremental migration complete: {migrated} rows migrated "
|
||||
f"to {pg_table}"
|
||||
)
|
||||
|
||||
return migrated
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Incremental migration failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def run_incremental_migration(
|
||||
table: str,
|
||||
dry_run: bool = False,
|
||||
state_file: str = "migration_state.json"
|
||||
) -> int:
|
||||
"""Run incremental migration for a table.
|
||||
|
||||
Args:
|
||||
table: Table name to migrate
|
||||
dry_run: If True, show what would be done without modifying data
|
||||
state_file: Path to migration state file
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
migrator = IncrementalMigrator(table, state_file)
|
||||
return migrator.migrate(dry_run=dry_run)
|
||||
Reference in New Issue
Block a user