fix: Fix duplicate group insertion in consolidation generator
Critical bug: current_group and current_key were inside the while loop, causing them to be reset on each batch iteration. When an incomplete group spanned a batch boundary, it would be: 1. Buffered at end of batch N (in local current_group) 2. LOST when loop continued (new local variables created) 3. Re-fetched and yielded again in batch N+1 This caused the same consolidated record to be inserted many times. Solution: Move current_group and current_key OUTSIDE while loop to persist across batch iterations. Incomplete groups now properly merge across batch boundaries without duplication. Algorithm: - Only yield groups when we're 100% certain they're complete - A group is complete when the next key differs from current key - At batch boundaries, incomplete groups stay buffered for next batch - Resume always uses last_completed_key to avoid re-processing This fixes the user's observation of 27 identical rows for the same consolidated record. 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
90
test_target_record.py
Normal file
90
test_target_record.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test if target record is being consolidated correctly."""
|
||||
from src.connectors.mysql_connector import MySQLConnector
|
||||
from src.transformers.data_transformer import DataTransformer
|
||||
from src.utils.logger import setup_logger, get_logger
|
||||
|
||||
setup_logger(__name__)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("Testing target record consolidation")
|
||||
print("="*80 + "\n")
|
||||
|
||||
target_key = ("M1_ID0246", "DT0001", "2023-06-26", "10:43:59")
|
||||
|
||||
with MySQLConnector() as mysql_conn:
|
||||
partition = "d10"
|
||||
group_num = 0
|
||||
found = False
|
||||
|
||||
print("Fetching consolidation groups from d10...\n")
|
||||
|
||||
for group_rows in mysql_conn.fetch_consolidation_groups_from_partition(
|
||||
"ELABDATADISP",
|
||||
partition,
|
||||
limit=100
|
||||
):
|
||||
group_num += 1
|
||||
if group_rows:
|
||||
first_row = group_rows[0]
|
||||
key = (
|
||||
first_row.get("UnitName"),
|
||||
first_row.get("ToolNameID"),
|
||||
str(first_row.get("EventDate")),
|
||||
str(first_row.get("EventTime"))
|
||||
)
|
||||
nodes = sorted([r.get('NodeNum') for r in group_rows])
|
||||
|
||||
# Show first 10 groups
|
||||
if group_num <= 10:
|
||||
print(f"Group {group_num}: key={key}, nodes={len(nodes)} items")
|
||||
|
||||
if key == target_key:
|
||||
print(f"\n✓ FOUND TARGET KEY in group {group_num}!")
|
||||
print(f" Key: {key}")
|
||||
print(f" Nodes: {nodes}")
|
||||
print(f" Count: {len(group_rows)}")
|
||||
|
||||
if len(nodes) == 22 and nodes == list(range(1, 23)):
|
||||
print("\n✓ All 22 nodes present!")
|
||||
|
||||
# Test consolidation
|
||||
consolidated = DataTransformer.consolidate_elabdatadisp_batch(group_rows)
|
||||
print(f"Consolidated to {len(consolidated)} row(s)")
|
||||
|
||||
if len(consolidated) == 1:
|
||||
print("✓ Consolidated to 1 row!")
|
||||
import json
|
||||
meas = consolidated[0].get("measurements")
|
||||
if isinstance(meas, str):
|
||||
meas = json.loads(meas)
|
||||
cons_nodes = sorted([int(k) for k in meas.keys()])
|
||||
print(f"Measurements nodes: {cons_nodes}")
|
||||
|
||||
if cons_nodes == list(range(1, 23)):
|
||||
print("\n" + "="*80)
|
||||
print("✓✓✓ TARGET RECORD CONSOLIDATES CORRECTLY ✓✓✓")
|
||||
print("="*80)
|
||||
else:
|
||||
print(f"✗ Expected nodes 1-22, got {cons_nodes}")
|
||||
else:
|
||||
print(f"✗ Expected 1 consolidated row, got {len(consolidated)}")
|
||||
else:
|
||||
print(f"✗ INCOMPLETE! Expected 22 nodes, got {len(nodes)}")
|
||||
print(f" Expected: {list(range(1, 23))}")
|
||||
print(f" Got: {nodes}")
|
||||
|
||||
found = True
|
||||
break
|
||||
|
||||
# Safety limit
|
||||
if group_num >= 1000:
|
||||
print(f"\nStopped at group {group_num} (safety limit)")
|
||||
break
|
||||
|
||||
if not found:
|
||||
print(f"\n✗ Target key NOT FOUND in first {group_num} groups")
|
||||
print("\nThis is a PROBLEM - the record is not being returned by the generator!")
|
||||
|
||||
print("\nDone!\n")
|
||||
Reference in New Issue
Block a user