From 9cc12abe11231d862482ec5ab4c2856d68bf8f69 Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 25 Dec 2025 19:32:52 +0100 Subject: [PATCH] fix: Order rows by consolidation key to keep related nodes together in batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fetching rows for consolidation, the original keyset pagination only ordered by id, which caused nodes from the same (unit, tool, timestamp) to be split across multiple batches. This resulted in incomplete consolidation, with some nodes being missed. Solution: Order by consolidation columns in addition to id: - Primary: id (for keyset pagination) - Secondary: UnitName, ToolNameID, EventDate, EventTime, NodeNum This ensures all nodes with the same (unit, tool, timestamp) are grouped together in the same batch, allowing proper consolidation within the batch. Fixes: Nodes being lost during ELABDATADISP consolidation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 --- src/connectors/mysql_connector.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/connectors/mysql_connector.py b/src/connectors/mysql_connector.py index f94341a..3c08559 100644 --- a/src/connectors/mysql_connector.py +++ b/src/connectors/mysql_connector.py @@ -257,11 +257,15 @@ class MySQLConnector: with self.connection.cursor() as cursor: # Use keyset pagination: fetch by id > last_id # This is much more efficient than OFFSET for large tables + # Order by id first for pagination, then by consolidation key to keep + # related nodes together in the same batch + order_clause = f"`{id_column}` ASC, `UnitName` ASC, `ToolNameID` ASC, `EventDate` ASC, `EventTime` ASC, `NodeNum` ASC" + if last_id is None: - query = f"SELECT * FROM `{table}` ORDER BY `{id_column}` ASC LIMIT %s" + query = f"SELECT * FROM `{table}` ORDER BY {order_clause} LIMIT %s" cursor.execute(query, (batch_size,)) else: - query = f"SELECT * FROM `{table}` WHERE `{id_column}` > %s ORDER BY `{id_column}` ASC LIMIT %s" + query = f"SELECT * FROM `{table}` WHERE `{id_column}` > %s ORDER BY {order_clause} LIMIT %s" cursor.execute(query, (last_id, batch_size)) rows = cursor.fetchall()