fix: Use actual PostgreSQL row count for total_rows_migrated tracking
Replace session-level counting with direct table COUNT queries to ensure total_rows_migrated always reflects actual reality in PostgreSQL. This fixes the discrepancy where the counter was only tracking rows from the current session and didn't account for earlier insertions or duplicates from failed resume attempts. Key improvements: - Use get_row_count() after each batch to get authoritative total - Preserve previous count on resume and accumulate across sessions - Remove dependency on error-prone session-level counters - Ensures migration_state.total_rows_migrated matches actual table row count 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Incremental migration from MySQL to PostgreSQL based on timestamps."""
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
import psycopg
|
||||
|
||||
from config import get_settings, TABLE_CONFIGS
|
||||
from src.connectors.mysql_connector import MySQLConnector
|
||||
@@ -31,6 +32,33 @@ class IncrementalMigrator:
|
||||
self.settings = get_settings()
|
||||
self.state = MigrationState(state_file)
|
||||
|
||||
def _get_last_timestamp_from_db(
|
||||
self,
|
||||
pg_conn: PostgreSQLConnector,
|
||||
pg_table: str
|
||||
) -> Optional[str]:
|
||||
"""Get last migration timestamp from PostgreSQL migration_state table.
|
||||
|
||||
Args:
|
||||
pg_conn: PostgreSQL connector
|
||||
pg_table: PostgreSQL table name
|
||||
|
||||
Returns:
|
||||
ISO format timestamp or None if not found
|
||||
"""
|
||||
try:
|
||||
with pg_conn.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"SELECT last_migrated_timestamp FROM migration_state WHERE table_name = %s",
|
||||
(pg_table,)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result and result[0]:
|
||||
return result[0].isoformat()
|
||||
except psycopg.Error:
|
||||
return None
|
||||
return None
|
||||
|
||||
def migrate(self, dry_run: bool = False, use_id: bool = False) -> int:
|
||||
"""Perform incremental migration since last sync.
|
||||
|
||||
@@ -88,9 +116,19 @@ class IncrementalMigrator:
|
||||
Returns:
|
||||
Number of rows migrated
|
||||
"""
|
||||
# Get last migration timestamp
|
||||
# Try to get last migration timestamp from state file first
|
||||
last_timestamp = self.state.get_last_timestamp(pg_table)
|
||||
|
||||
# If not in state file, try to get from PostgreSQL migration_state table
|
||||
if last_timestamp is None:
|
||||
try:
|
||||
last_timestamp = self._get_last_timestamp_from_db(pg_conn, pg_table)
|
||||
if last_timestamp:
|
||||
logger.info(f"Found previous migration state in database: {last_timestamp}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not read from migration_state table: {e}")
|
||||
last_timestamp = None
|
||||
|
||||
if last_timestamp is None:
|
||||
logger.info(
|
||||
f"No previous migration found for {pg_table}. "
|
||||
@@ -98,6 +136,8 @@ class IncrementalMigrator:
|
||||
)
|
||||
return 0
|
||||
|
||||
logger.info(f"Last migration timestamp: {last_timestamp}")
|
||||
|
||||
# Count rows to migrate
|
||||
timestamp_col = "updated_at" if mysql_table == "ELABDATADISP" else "created_at"
|
||||
|
||||
@@ -107,7 +147,7 @@ class IncrementalMigrator:
|
||||
timestamp_col
|
||||
)
|
||||
|
||||
logger.info(f"Last timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||
logger.info(f"Current max timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||
|
||||
if dry_run:
|
||||
logger.info("[DRY RUN] Would migrate rows after timestamp")
|
||||
|
||||
Reference in New Issue
Block a user