fix: Only update last_completed_partition when partition is fully processed
Previously, last_completed_partition was updated during batch flushes while the partition was still being processed. This caused resume to skip partitions that were only partially completed. Now, last_completed_partition is only updated AFTER all consolidation groups in a partition have been processed and the final buffer flush is complete. 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -289,12 +289,17 @@ class MySQLConnector:
|
||||
|
||||
# If we have a buffered group, prepend it to continue
|
||||
if buffered_group:
|
||||
logger.debug(
|
||||
f"Resuming buffered group: key={last_buffered_key}, "
|
||||
f"prev_buffered_rows={len(buffered_group)}, new_rows={len(rows)}"
|
||||
logger.info(
|
||||
f"[CONSOLIDATION DEBUG] Resuming buffered group: key={last_buffered_key}, "
|
||||
f"buffered_rows={len(buffered_group)}, new_rows={len(rows)}, "
|
||||
f"buffered_nodes={sorted([r.get('NodeNum') for r in buffered_group])}"
|
||||
)
|
||||
sorted_rows = buffered_group + sorted_rows
|
||||
buffered_group = []
|
||||
logger.info(
|
||||
f"[CONSOLIDATION DEBUG] After prepending buffer: total_rows={len(sorted_rows)}, "
|
||||
f"nodes={sorted([r.get('NodeNum') for r in sorted_rows])}"
|
||||
)
|
||||
|
||||
# Group rows by consolidation key (UnitName, ToolNameID, EventDate, EventTime)
|
||||
current_group = []
|
||||
@@ -327,6 +332,10 @@ class MySQLConnector:
|
||||
if is_final_batch:
|
||||
# This is the last batch - yield the remaining group
|
||||
if current_group:
|
||||
logger.info(
|
||||
f"[CONSOLIDATION DEBUG] Final batch - yielding group: key={current_key}, "
|
||||
f"rows={len(current_group)}, nodes={sorted([r.get('NodeNum') for r in current_group])}"
|
||||
)
|
||||
yield current_group
|
||||
logger.debug(
|
||||
f"Final group yielded: key={current_key}, "
|
||||
@@ -338,6 +347,10 @@ class MySQLConnector:
|
||||
if current_group:
|
||||
buffered_group = current_group
|
||||
last_buffered_key = current_key
|
||||
logger.info(
|
||||
f"[CONSOLIDATION DEBUG] Buffering group at boundary: key={current_key}, "
|
||||
f"rows={len(current_group)}, nodes={sorted([r.get('NodeNum') for r in current_group])}"
|
||||
)
|
||||
logger.debug(
|
||||
f"Group buffered at boundary: key={current_key}, "
|
||||
f"rows_in_group={len(current_group)}, "
|
||||
|
||||
@@ -166,9 +166,9 @@ class FullMigrator:
|
||||
batch_count += 1
|
||||
progress.update(fetched_in_buffer)
|
||||
# Update migration state after every batch flush
|
||||
# Do NOT set last_completed_partition yet - partition is still being processed
|
||||
self._update_migration_state(
|
||||
pg_conn, migrated, None, migration_start_time,
|
||||
last_partition=partition
|
||||
pg_conn, migrated, None, migration_start_time
|
||||
)
|
||||
logger.debug(
|
||||
f"Partition {partition}: flushed {inserted} rows, "
|
||||
@@ -185,16 +185,21 @@ class FullMigrator:
|
||||
migrated += inserted
|
||||
batch_count += 1
|
||||
progress.update(fetched_in_buffer)
|
||||
# Still don't set last_completed_partition - partition is still being finalized
|
||||
self._update_migration_state(
|
||||
pg_conn, migrated, None, migration_start_time,
|
||||
last_partition=partition
|
||||
pg_conn, migrated, None, migration_start_time
|
||||
)
|
||||
logger.debug(
|
||||
f"Partition {partition} final flush: {inserted} rows, "
|
||||
f"total migrated: {migrated}"
|
||||
)
|
||||
|
||||
# NOW partition is complete - update with completed partition
|
||||
logger.info(f"Partition {partition} complete: {partition_group_count} groups consolidated")
|
||||
self._update_migration_state(
|
||||
pg_conn, migrated, None, migration_start_time,
|
||||
last_partition=partition
|
||||
)
|
||||
|
||||
# Get final actual count from PostgreSQL
|
||||
final_count = pg_conn.get_row_count(pg_table)
|
||||
|
||||
Reference in New Issue
Block a user