From f2b6049608e2cd5863956c818ddcfdebeff4d87a Mon Sep 17 00:00:00 2001 From: alex Date: Fri, 26 Dec 2025 20:44:40 +0100 Subject: [PATCH] fix: CRITICAL - Don't prematurely yield incomplete groups at batch boundaries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: When batch limit was reached (len(rows) >= limit), code was yielding the current_group immediately, even if it was incomplete. This caused groups that spanned multiple batches to be split. Example: - First batch contains UnitA nodes 1-11 with same consolidation key - Code yields them as complete group before seeing nodes 12-22 in next batch - Next batch starts with different key, so incomplete group is never merged - Result: 11 separate rows instead of 1 consolidated row Root cause: Not checking if the group might continue in the next batch Fix: Before yielding at batch boundary, check if the LAST row in current batch has the SAME consolidation key as the current_group: - If YES (last_row_key == current_key): DON'T yield yet, keep buffering - If NO (last_row_key != current_key): Yield, group is definitely complete This ensures groups that span batch boundaries are kept together and fully consolidated. 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 --- src/connectors/mysql_connector.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/connectors/mysql_connector.py b/src/connectors/mysql_connector.py index 0a9f6af..0250493 100644 --- a/src/connectors/mysql_connector.py +++ b/src/connectors/mysql_connector.py @@ -308,10 +308,25 @@ class MySQLConnector: yield current_group return else: - # More rows might exist - yield the last group only if key changed - # If not, it will be continued/merged in next iteration - if current_group: - yield current_group + # More rows might exist after this batch + # Check if the last row in this batch has same key as current_group + # If yes, DON'T yield yet - the group might continue in next batch + # If no, yield because we know the group is complete + if rows: + last_row = rows[-1] + last_row_key = ( + last_row.get("UnitName"), + last_row.get("ToolNameID"), + last_row.get("EventDate"), + last_row.get("EventTime") + ) + + # If last row has different key than current group, current group is complete + if last_row_key != current_key and current_group: + yield current_group + current_group = [] + current_key = None + # else: same key as current_group, so continue in next iteration # Update last_key for next iteration if current_key: