refactor: Improve logging for consolidation group tracking

Enhanced debug logging to show:
- Max ID for each yielded group (important for resume tracking)
- Group size and consolidation key for each operation
- Clear distinction between buffered and final groups

The max ID is tracked because:
- PostgreSQL stores MAX(id) per consolidated group for resume
- This logging helps verify correct ID tracking
- Assists debugging consolidation completeness

No functional changes, improved observability.

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-26 00:10:16 +01:00
parent 4277dd8d2c
commit d6564b7f9e

View File

@@ -421,7 +421,7 @@ class MySQLConnector:
# Group rows by consolidation key (UnitName, ToolNameID, EventDate, EventTime)
current_group = []
last_key = None
current_key = None
is_final_batch = len(rows) < limit # True if this is the last batch
for row in sorted_rows:
@@ -433,31 +433,39 @@ class MySQLConnector:
)
# If key changed, yield previous group and start new one
if last_key is not None and key != last_key:
if current_key is not None and key != current_key:
if current_group:
yield current_group
logger.debug(
f"Group yielded: key={current_key}, "
f"rows_in_group={len(current_group)}, "
f"max_id={current_group[-1][id_column]}"
)
current_group = []
current_group.append(row)
last_key = key
current_key = key
# At end of batch: handle the final group
if is_final_batch:
# This is the last batch - yield all remaining groups
# This is the last batch - yield the remaining group
if current_group:
logger.debug(
f"Final batch: yielding group key={last_key}, "
f"rows_in_group={len(current_group)}, total_rows_fetched={len(rows)}"
)
yield current_group
logger.debug(
f"Final group yielded: key={current_key}, "
f"rows_in_group={len(current_group)}, "
f"max_id={current_group[-1][id_column]}"
)
else:
# More rows might exist - buffer the last group for next batch
buffered_group = current_group
last_buffered_key = last_key
logger.debug(
f"Buffering group at boundary: key={last_key}, "
f"rows_in_group={len(current_group)}, total_rows_fetched={len(rows)}"
)
if current_group:
buffered_group = current_group
last_buffered_key = current_key
logger.debug(
f"Group buffered at boundary: key={current_key}, "
f"rows_in_group={len(current_group)}, "
f"max_id={current_group[-1][id_column]}"
)
last_id = rows[-1][id_column]
break # Success, exit retry loop