fix: Use actual PostgreSQL row count for total_rows_migrated tracking

Replace session-level counting with direct table COUNT queries to ensure
total_rows_migrated always reflects actual reality in PostgreSQL. This fixes
the discrepancy where the counter was only tracking rows from the current session
and didn't account for earlier insertions or duplicates from failed resume attempts.

Key improvements:
- Use get_row_count() after each batch to get authoritative total
- Preserve previous count on resume and accumulate across sessions
- Remove dependency on error-prone session-level counters
- Ensures migration_state.total_rows_migrated matches actual table row count

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-23 15:33:27 +01:00
parent b09cfcf9df
commit 0f217379ea
8 changed files with 646 additions and 100 deletions

View File

@@ -62,6 +62,7 @@ class TestDataTransformation:
assert pg_row["id"] == 1
assert pg_row["unit_name"] == "TestUnit"
assert pg_row["tool_name_id"] == "Tool1"
assert "node_num" not in pg_row # node_num should NOT be a column anymore
assert pg_row["event_timestamp"] is not None
assert pg_row["event_timestamp"].year == 2024
assert pg_row["event_timestamp"].month == 1
@@ -69,11 +70,15 @@ class TestDataTransformation:
assert pg_row["event_timestamp"].hour == 12
assert pg_row["event_timestamp"].minute == 0
assert isinstance(pg_row["measurements"], dict)
assert "0" in pg_row["measurements"]
assert pg_row["measurements"]["0"]["value"] == "100.5"
assert pg_row["measurements"]["0"]["unit"] == "°C"
assert "1" not in pg_row["measurements"] # NULL values excluded
assert "2" in pg_row["measurements"]
# Verify node is a key in measurements JSONB (single node case)
assert "1" in pg_row["measurements"] # node number as key
assert "0" in pg_row["measurements"]["1"]
assert pg_row["measurements"]["1"]["0"]["value"] == "100.5"
assert pg_row["measurements"]["1"]["0"]["unit"] == "°C"
assert "1" not in pg_row["measurements"]["1"] # NULL values excluded
assert "2" in pg_row["measurements"]["1"]
assert pg_row["measurements"]["1"]["2"]["value"] == "200.3"
assert pg_row["measurements"]["1"]["2"]["unit"] == "m/s"
def test_elabdatadisp_transformation(self):
"""Test ELABDATADISP row transformation."""
@@ -246,6 +251,167 @@ class TestTimeConversion:
assert pg_row["event_timestamp"].hour == 0
assert pg_row["event_timestamp"].minute == 0
# Verify that unit is NOT included when it's None (optimization)
assert "0" in pg_row["measurements"]
assert pg_row["measurements"]["0"]["value"] == "-1709"
assert "unit" not in pg_row["measurements"]["0"] # unit should not exist when None
def test_rawdatacor_consolidation(self):
"""Test consolidation of multiple nodes into single JSONB row."""
# Create three rows with same (unit, tool, timestamp) but different nodes
rows = [
{
"id": 1,
"UnitName": "TestUnit",
"ToolNameID": "Tool1",
"NodeNum": 1,
"EventDate": "2024-01-01",
"EventTime": "12:00:00",
"BatLevel": 3.5,
"Temperature": 25.5,
"Val0": "100.5",
"Val1": None,
"Val2": "200.3",
"Val0_unitmisure": "°C",
"Val1_unitmisure": None,
"Val2_unitmisure": "m/s",
},
{
"id": 2,
"UnitName": "TestUnit",
"ToolNameID": "Tool1",
"NodeNum": 2,
"EventDate": "2024-01-01",
"EventTime": "12:00:00",
"BatLevel": 3.5,
"Temperature": 25.5,
"Val0": "101.2",
"Val1": None,
"Val2": "205.1",
"Val0_unitmisure": "°C",
"Val1_unitmisure": None,
"Val2_unitmisure": "m/s",
},
{
"id": 3,
"UnitName": "TestUnit",
"ToolNameID": "Tool1",
"NodeNum": 3,
"EventDate": "2024-01-01",
"EventTime": "12:00:00",
"BatLevel": 3.5,
"Temperature": 25.5,
"Val0": "102.0",
"Val1": None,
"Val2": "210.5",
"Val0_unitmisure": "°C",
"Val1_unitmisure": None,
"Val2_unitmisure": "m/s",
},
]
# Add remaining Val columns as None for all rows
for row in rows:
for i in range(3, 16):
col = f"Val{i:X}"
row[col] = None
row[f"{col}_unitmisure"] = None
row["created_at"] = None
row["BatLevelModule"] = None
row["TemperatureModule"] = None
row["RssiModule"] = None
# Consolidate
consolidated = DataTransformer.consolidate_rawdatacor_batch(rows)
# Should have 1 consolidated row (all three nodes have same unit/tool/timestamp)
assert len(consolidated) == 1
consolidated_row = consolidated[0]
# Verify consolidated row properties
assert consolidated_row["id"] == 3 # MAX(id) for proper resume
assert consolidated_row["unit_name"] == "TestUnit"
assert consolidated_row["tool_name_id"] == "Tool1"
assert consolidated_row["bat_level"] == 3.5
assert consolidated_row["temperature"] == 25.5
# Verify all three nodes are in measurements
measurements = consolidated_row["measurements"]
assert "1" in measurements
assert "2" in measurements
assert "3" in measurements
# Verify node 1 measurements
assert measurements["1"]["0"]["value"] == "100.5"
assert measurements["1"]["0"]["unit"] == "°C"
assert measurements["1"]["2"]["value"] == "200.3"
# Verify node 2 measurements
assert measurements["2"]["0"]["value"] == "101.2"
assert measurements["2"]["2"]["value"] == "205.1"
# Verify node 3 measurements
assert measurements["3"]["0"]["value"] == "102.0"
assert measurements["3"]["2"]["value"] == "210.5"
def test_rawdatacor_consolidation_with_different_keys(self):
"""Test that rows with different keys are NOT consolidated."""
# Two rows with different units
rows = [
{
"id": 1,
"UnitName": "Unit1",
"ToolNameID": "Tool1",
"NodeNum": 1,
"EventDate": "2024-01-01",
"EventTime": "12:00:00",
"BatLevel": 3.5,
"Temperature": 25.5,
"Val0": "100.5",
"Val1": None,
"Val2": None,
"Val0_unitmisure": "°C",
"Val1_unitmisure": None,
"Val2_unitmisure": None,
},
{
"id": 2,
"UnitName": "Unit2", # Different unit
"ToolNameID": "Tool1",
"NodeNum": 1,
"EventDate": "2024-01-01",
"EventTime": "12:00:00",
"BatLevel": 3.5,
"Temperature": 25.5,
"Val0": "100.5",
"Val1": None,
"Val2": None,
"Val0_unitmisure": "°C",
"Val1_unitmisure": None,
"Val2_unitmisure": None,
},
]
# Add remaining Val columns as None for all rows
for row in rows:
for i in range(3, 16):
col = f"Val{i:X}"
row[col] = None
row[f"{col}_unitmisure"] = None
row["created_at"] = None
row["BatLevelModule"] = None
row["TemperatureModule"] = None
row["RssiModule"] = None
# Consolidate
consolidated = DataTransformer.consolidate_rawdatacor_batch(rows)
# Should have 2 rows (different units)
assert len(consolidated) == 2
assert consolidated[0]["unit_name"] == "Unit1"
assert consolidated[1]["unit_name"] == "Unit2"
class TestFieldMapping:
"""Test field mapping configuration."""