fix: Add timeout settings and retry logic to MySQL connector
Configuration improvements: - Set read_timeout=300 (5 minutes) to handle long queries - Set write_timeout=300 (5 minutes) for writes - Set max_allowed_packet=64MB to handle larger data transfers Retry logic: - Added retry mechanism with max 3 retries on fetch failure - Auto-reconnect on connection loss before retry - Better error messages showing retry attempts This fixes the 'connection is lost' error that occurs during long-running migrations by: 1. Giving MySQL queries more time to complete 2. Allowing larger packet sizes for bulk data 3. Automatically recovering from connection drops Fixes: 'Connection is lost' error during full migration
This commit is contained in:
@@ -31,11 +31,12 @@ class IncrementalMigrator:
|
||||
self.settings = get_settings()
|
||||
self.state = MigrationState(state_file)
|
||||
|
||||
def migrate(self, dry_run: bool = False) -> int:
|
||||
def migrate(self, dry_run: bool = False, use_id: bool = False) -> int:
|
||||
"""Perform incremental migration since last sync.
|
||||
|
||||
Args:
|
||||
dry_run: If True, log what would be done but don't modify data
|
||||
use_id: If True, use ID-based resumption, else use timestamp-based
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
@@ -44,7 +45,49 @@ class IncrementalMigrator:
|
||||
|
||||
mysql_table = self.config["mysql_table"]
|
||||
pg_table = self.config["postgres_table"]
|
||||
primary_key = self.config.get("primary_key", "id")
|
||||
|
||||
logger.info(
|
||||
f"Starting incremental migration of {mysql_table} -> {pg_table} "
|
||||
f"({'ID-based' if use_id else 'timestamp-based'})"
|
||||
)
|
||||
|
||||
try:
|
||||
with MySQLConnector() as mysql_conn:
|
||||
with PostgreSQLConnector() as pg_conn:
|
||||
if use_id:
|
||||
return self._migrate_by_id(
|
||||
mysql_conn, pg_conn, mysql_table, pg_table, primary_key, dry_run
|
||||
)
|
||||
else:
|
||||
return self._migrate_by_timestamp(
|
||||
mysql_conn, pg_conn, mysql_table, pg_table, dry_run
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Incremental migration failed: {e}")
|
||||
raise
|
||||
|
||||
def _migrate_by_timestamp(
|
||||
self,
|
||||
mysql_conn: MySQLConnector,
|
||||
pg_conn: PostgreSQLConnector,
|
||||
mysql_table: str,
|
||||
pg_table: str,
|
||||
dry_run: bool
|
||||
) -> int:
|
||||
"""Migrate rows using timestamp-based resumption.
|
||||
|
||||
Args:
|
||||
mysql_conn: MySQL connector
|
||||
pg_conn: PostgreSQL connector
|
||||
mysql_table: MySQL table name
|
||||
pg_table: PostgreSQL table name
|
||||
dry_run: If True, don't modify data
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
# Get last migration timestamp
|
||||
last_timestamp = self.state.get_last_timestamp(pg_table)
|
||||
|
||||
@@ -55,91 +98,178 @@ class IncrementalMigrator:
|
||||
)
|
||||
return 0
|
||||
|
||||
logger.info(
|
||||
f"Starting incremental migration of {mysql_table} -> {pg_table} "
|
||||
f"since {last_timestamp}"
|
||||
# Count rows to migrate
|
||||
timestamp_col = "updated_at" if mysql_table == "ELABDATADISP" else "created_at"
|
||||
|
||||
# Get max timestamp from PostgreSQL
|
||||
pg_max_timestamp = pg_conn.get_max_timestamp(
|
||||
pg_table,
|
||||
timestamp_col
|
||||
)
|
||||
|
||||
try:
|
||||
with MySQLConnector() as mysql_conn:
|
||||
# Count rows to migrate
|
||||
timestamp_col = "updated_at" if mysql_table == "ELABDATADISP" else "created_at"
|
||||
logger.info(f"Last timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||
|
||||
with PostgreSQLConnector() as pg_conn:
|
||||
# Get max timestamp from PostgreSQL
|
||||
pg_max_timestamp = pg_conn.get_max_timestamp(
|
||||
pg_table,
|
||||
timestamp_col
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] Would migrate rows after timestamp")
|
||||
return 0
|
||||
|
||||
migrated = 0
|
||||
migration_start_time = datetime.utcnow().isoformat()
|
||||
|
||||
# Fetch and migrate rows in batches
|
||||
batch_count = 0
|
||||
for batch in mysql_conn.fetch_rows_since(
|
||||
mysql_table,
|
||||
last_timestamp
|
||||
):
|
||||
batch_count += 1
|
||||
|
||||
if batch_count == 1:
|
||||
# Create progress tracker with unknown total
|
||||
progress = ProgressTracker(
|
||||
len(batch),
|
||||
f"Migrating {mysql_table} (incremental)"
|
||||
)
|
||||
progress.__enter__()
|
||||
|
||||
# Transform batch
|
||||
transformed = DataTransformer.transform_batch(
|
||||
mysql_table,
|
||||
batch
|
||||
)
|
||||
|
||||
# Insert batch
|
||||
columns = DataTransformer.get_column_order(pg_table)
|
||||
inserted = pg_conn.insert_batch(
|
||||
pg_table,
|
||||
transformed,
|
||||
columns
|
||||
)
|
||||
|
||||
migrated += inserted
|
||||
progress.update(inserted)
|
||||
|
||||
if batch_count == 0:
|
||||
logger.info(f"No new rows to migrate for {mysql_table}")
|
||||
return 0
|
||||
|
||||
progress.__exit__(None, None, None)
|
||||
|
||||
# Update migration state
|
||||
self.state.set_last_timestamp(pg_table, migration_start_time)
|
||||
self.state.increment_migration_count(pg_table, migrated)
|
||||
|
||||
logger.info(
|
||||
f"✓ Incremental migration complete: {migrated} rows migrated "
|
||||
f"to {pg_table}"
|
||||
)
|
||||
|
||||
return migrated
|
||||
|
||||
def _migrate_by_id(
|
||||
self,
|
||||
mysql_conn: MySQLConnector,
|
||||
pg_conn: PostgreSQLConnector,
|
||||
mysql_table: str,
|
||||
pg_table: str,
|
||||
primary_key: str,
|
||||
dry_run: bool
|
||||
) -> int:
|
||||
"""Migrate rows using ID-based resumption (resumable from last ID).
|
||||
|
||||
Args:
|
||||
mysql_conn: MySQL connector
|
||||
pg_conn: PostgreSQL connector
|
||||
mysql_table: MySQL table name
|
||||
pg_table: PostgreSQL table name
|
||||
primary_key: Primary key column name
|
||||
dry_run: If True, don't modify data
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
# Get last migrated ID from state
|
||||
total_count = mysql_conn.get_row_count(mysql_table)
|
||||
state_dict = self.state.state.get(pg_table, {})
|
||||
last_id = state_dict.get("last_id")
|
||||
previously_migrated = state_dict.get("total_migrated", 0)
|
||||
|
||||
if last_id is None:
|
||||
logger.info(
|
||||
f"No previous ID-based migration found for {pg_table}. "
|
||||
"Starting from beginning."
|
||||
)
|
||||
remaining = total_count
|
||||
else:
|
||||
remaining = total_count - last_id
|
||||
logger.info(
|
||||
f"Resuming ID-based migration from ID > {last_id}\n"
|
||||
f"Previously migrated: {previously_migrated} rows\n"
|
||||
f"Remaining to migrate: {remaining} rows"
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
logger.info(f"[DRY RUN] Would migrate {remaining} rows")
|
||||
return remaining
|
||||
|
||||
migrated = 0
|
||||
|
||||
with ProgressTracker(
|
||||
remaining,
|
||||
f"Migrating {mysql_table} (resumable)"
|
||||
) as progress:
|
||||
# Fetch and migrate rows in batches
|
||||
for batch in mysql_conn.fetch_rows_from_id(
|
||||
mysql_table,
|
||||
primary_key,
|
||||
last_id
|
||||
):
|
||||
if not batch:
|
||||
break
|
||||
|
||||
# Transform batch
|
||||
transformed = DataTransformer.transform_batch(
|
||||
mysql_table,
|
||||
batch
|
||||
)
|
||||
|
||||
# Insert batch
|
||||
columns = DataTransformer.get_column_order(pg_table)
|
||||
inserted = pg_conn.insert_batch(
|
||||
pg_table,
|
||||
transformed,
|
||||
columns
|
||||
)
|
||||
|
||||
if inserted > 0:
|
||||
# Get the max ID from the batch
|
||||
batch_max_id = max(
|
||||
int(row.get(primary_key, 0)) for row in batch
|
||||
)
|
||||
migrated += inserted
|
||||
progress.update(inserted)
|
||||
|
||||
logger.info(f"Last timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||
# Update state after each batch
|
||||
if pg_table not in self.state.state:
|
||||
self.state.state[pg_table] = {}
|
||||
self.state.state[pg_table]["last_id"] = batch_max_id
|
||||
self.state.state[pg_table]["total_migrated"] = previously_migrated + migrated
|
||||
self.state.state[pg_table]["last_updated"] = datetime.utcnow().isoformat()
|
||||
self.state._save_state()
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] Would migrate rows after timestamp")
|
||||
return 0
|
||||
logger.info(
|
||||
f"✓ ID-based incremental migration complete: {migrated} rows migrated "
|
||||
f"to {pg_table}"
|
||||
)
|
||||
|
||||
migrated = 0
|
||||
migration_start_time = datetime.utcnow().isoformat()
|
||||
|
||||
# Fetch and migrate rows in batches
|
||||
batch_count = 0
|
||||
for batch in mysql_conn.fetch_rows_since(
|
||||
mysql_table,
|
||||
last_timestamp
|
||||
):
|
||||
batch_count += 1
|
||||
|
||||
if batch_count == 1:
|
||||
# Create progress tracker with unknown total
|
||||
progress = ProgressTracker(
|
||||
len(batch),
|
||||
f"Migrating {mysql_table} (incremental)"
|
||||
)
|
||||
progress.__enter__()
|
||||
|
||||
# Transform batch
|
||||
transformed = DataTransformer.transform_batch(
|
||||
mysql_table,
|
||||
batch
|
||||
)
|
||||
|
||||
# Insert batch
|
||||
columns = DataTransformer.get_column_order(pg_table)
|
||||
inserted = pg_conn.insert_batch(
|
||||
pg_table,
|
||||
transformed,
|
||||
columns
|
||||
)
|
||||
|
||||
migrated += inserted
|
||||
progress.update(inserted)
|
||||
|
||||
if batch_count == 0:
|
||||
logger.info(f"No new rows to migrate for {mysql_table}")
|
||||
return 0
|
||||
|
||||
progress.__exit__(None, None, None)
|
||||
|
||||
# Update migration state
|
||||
self.state.set_last_timestamp(pg_table, migration_start_time)
|
||||
self.state.increment_migration_count(pg_table, migrated)
|
||||
|
||||
logger.info(
|
||||
f"✓ Incremental migration complete: {migrated} rows migrated "
|
||||
f"to {pg_table}"
|
||||
)
|
||||
|
||||
return migrated
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Incremental migration failed: {e}")
|
||||
raise
|
||||
return migrated
|
||||
|
||||
|
||||
def run_incremental_migration(
|
||||
table: str,
|
||||
dry_run: bool = False,
|
||||
state_file: str = "migration_state.json"
|
||||
state_file: str = "migration_state.json",
|
||||
use_id: bool = False
|
||||
) -> int:
|
||||
"""Run incremental migration for a table.
|
||||
|
||||
@@ -147,9 +277,10 @@ def run_incremental_migration(
|
||||
table: Table name to migrate
|
||||
dry_run: If True, show what would be done without modifying data
|
||||
state_file: Path to migration state file
|
||||
use_id: If True, use ID-based resumption, else use timestamp-based
|
||||
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
migrator = IncrementalMigrator(table, state_file)
|
||||
return migrator.migrate(dry_run=dry_run)
|
||||
return migrator.migrate(dry_run=dry_run, use_id=use_id)
|
||||
|
||||
Reference in New Issue
Block a user