From 9ef65995d414bf19d9a594e739229be5c8d0f5b6 Mon Sep 17 00:00:00 2001
From: alex <alessandro.battilani@gmail.com>
Date: Thu, 25 Dec 2025 23:41:57 +0100
Subject: [PATCH] feat: Add granular resume within partitions using last
 inserted ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem: If migration was interrupted in the middle of processing a partition
(e.g., at row 100k of 500k), resume would re-process all 100k rows, causing
duplicate insertions and wasted time.

Solution:
1. Modified fetch_consolidation_groups_from_partition() to accept start_id parameter
2. When resuming within the same partition, query the last inserted ID from
   migration_state.last_migrated_id
3. Use keyset pagination starting from (id > last_id) to skip already-processed rows
4. Added logic to detect when we're resuming within the same partition vs resuming
   from a new partition

Flow:
- If last_completed_partition < current_partition: start from beginning of partition
- If last_completed_partition == current_partition: start from last_migrated_id
- If last_completed_partition > current_partition: skip to next uncompleted partition

This ensures resume is granular:
- Won't re-insert already inserted rows within a partition
- Continues exactly from where it stopped
- Combines with existing partition tracking for complete accuracy

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
---
 src/connectors/mysql_connector.py |  6 ++++--
 src/migrator/full_migration.py    | 13 ++++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/connectors/mysql_connector.py b/src/connectors/mysql_connector.py
index a9da2cf..6a071d6 100644
--- a/src/connectors/mysql_connector.py
+++ b/src/connectors/mysql_connector.py
@@ -337,7 +337,8 @@ class MySQLConnector:
         table: str,
         partition: str,
         limit: Optional[int] = None,
-        offset: int = 0
+        offset: int = 0,
+        start_id: Optional[int] = None
     ) -> Generator[List[Dict[str, Any]], None, None]:
         """Fetch consolidation groups from a partition.
 
@@ -352,6 +353,7 @@ class MySQLConnector:
             partition: Partition name
             limit: Batch size for consolidation (uses config default if None)
             offset: Starting offset for pagination (unused, kept for compatibility)
+            start_id: Resume from this ID (fetch id > start_id). If None, starts from beginning
 
         Yields:
             Lists of rows grouped by consolidation key (complete groups only)
@@ -365,7 +367,7 @@ class MySQLConnector:
         # Determine ID column name
         id_column = "idElabData" if table == "ELABDATADISP" else "id"
         max_retries = 3
-        last_id = None
+        last_id = start_id
         buffered_group = []  # Buffer incomplete group at batch boundary
         last_buffered_key = None
 
diff --git a/src/migrator/full_migration.py b/src/migrator/full_migration.py
index c5d5c02..80d332f 100644
--- a/src/migrator/full_migration.py
+++ b/src/migrator/full_migration.py
@@ -120,6 +120,16 @@ class FullMigrator:
                             logger.info(f"[{partition_idx}/{len(partitions)}] Processing partition {partition}...")
                             partition_group_count = 0
 
+                            # Determine resume point within this partition
+                            # If resuming and this is the last completed partition, start from last_id
+                            start_id = None
+                            if last_completed_partition == partition and previous_migrated_count > 0:
+                                # For resume within same partition, we need to query the last ID inserted
+                                # This is a simplified approach: just continue from ID tracking
+                                start_id = self._get_last_migrated_id(pg_conn, pg_table)
+                                if start_id:
+                                    logger.info(f"Resuming partition {partition} from ID > {start_id}")
+
                             # Accumulate rows for batch insertion to reduce database round-trips
                             insert_buffer = []
                             # Use smaller batch size for more frequent updates: batch_size * 5 = 50k rows
@@ -130,7 +140,8 @@ class FullMigrator:
                             # Each group is a list of rows with the same (unit, tool, date, time)
                             for group_rows in mysql_conn.fetch_consolidation_groups_from_partition(
                                 mysql_table,
-                                partition
+                                partition,
+                                start_id=start_id
                             ):
                                 if not group_rows:
                                     break