fix: Use actual PostgreSQL row count for total_rows_migrated tracking

Replace session-level counting with direct table COUNT queries to ensure total_rows_migrated always reflects actual reality in PostgreSQL. This fixes the discrepancy where the counter was only tracking rows from the current session and didn't account for earlier insertions or duplicates from failed resume attempts. Key improvements: - Use get_row_count() after each batch to get authoritative total - Preserve previous count on resume and accumulate across sessions - Remove dependency on error-prone session-level counters - Ensures migration_state.total_rows_migrated matches actual table row count 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-23 15:33:27 +01:00
parent b09cfcf9df
commit 0f217379ea
8 changed files with 646 additions and 100 deletions
--- a/tests/test_setup.py
+++ b/tests/test_setup.py
@@ -62,6 +62,7 @@ class TestDataTransformation:
        assert pg_row["id"] == 1
        assert pg_row["unit_name"] == "TestUnit"
        assert pg_row["tool_name_id"] == "Tool1"
+        assert "node_num" not in pg_row  # node_num should NOT be a column anymore
        assert pg_row["event_timestamp"] is not None
        assert pg_row["event_timestamp"].year == 2024
        assert pg_row["event_timestamp"].month == 1
@@ -69,11 +70,15 @@ class TestDataTransformation:
        assert pg_row["event_timestamp"].hour == 12
        assert pg_row["event_timestamp"].minute == 0
        assert isinstance(pg_row["measurements"], dict)
-        assert "0" in pg_row["measurements"]
-        assert pg_row["measurements"]["0"]["value"] == "100.5"
-        assert pg_row["measurements"]["0"]["unit"] == "°C"
-        assert "1" not in pg_row["measurements"]  # NULL values excluded
-        assert "2" in pg_row["measurements"]
+        # Verify node is a key in measurements JSONB (single node case)
+        assert "1" in pg_row["measurements"]  # node number as key
+        assert "0" in pg_row["measurements"]["1"]
+        assert pg_row["measurements"]["1"]["0"]["value"] == "100.5"
+        assert pg_row["measurements"]["1"]["0"]["unit"] == "°C"
+        assert "1" not in pg_row["measurements"]["1"]  # NULL values excluded
+        assert "2" in pg_row["measurements"]["1"]
+        assert pg_row["measurements"]["1"]["2"]["value"] == "200.3"
+        assert pg_row["measurements"]["1"]["2"]["unit"] == "m/s"

    def test_elabdatadisp_transformation(self):
        """Test ELABDATADISP row transformation."""
@@ -246,6 +251,167 @@ class TestTimeConversion:
        assert pg_row["event_timestamp"].hour == 0
        assert pg_row["event_timestamp"].minute == 0

+        # Verify that unit is NOT included when it's None (optimization)
+        assert "0" in pg_row["measurements"]
+        assert pg_row["measurements"]["0"]["value"] == "-1709"
+        assert "unit" not in pg_row["measurements"]["0"]  # unit should not exist when None
+
+    def test_rawdatacor_consolidation(self):
+        """Test consolidation of multiple nodes into single JSONB row."""
+        # Create three rows with same (unit, tool, timestamp) but different nodes
+        rows = [
+            {
+                "id": 1,
+                "UnitName": "TestUnit",
+                "ToolNameID": "Tool1",
+                "NodeNum": 1,
+                "EventDate": "2024-01-01",
+                "EventTime": "12:00:00",
+                "BatLevel": 3.5,
+                "Temperature": 25.5,
+                "Val0": "100.5",
+                "Val1": None,
+                "Val2": "200.3",
+                "Val0_unitmisure": "°C",
+                "Val1_unitmisure": None,
+                "Val2_unitmisure": "m/s",
+            },
+            {
+                "id": 2,
+                "UnitName": "TestUnit",
+                "ToolNameID": "Tool1",
+                "NodeNum": 2,
+                "EventDate": "2024-01-01",
+                "EventTime": "12:00:00",
+                "BatLevel": 3.5,
+                "Temperature": 25.5,
+                "Val0": "101.2",
+                "Val1": None,
+                "Val2": "205.1",
+                "Val0_unitmisure": "°C",
+                "Val1_unitmisure": None,
+                "Val2_unitmisure": "m/s",
+            },
+            {
+                "id": 3,
+                "UnitName": "TestUnit",
+                "ToolNameID": "Tool1",
+                "NodeNum": 3,
+                "EventDate": "2024-01-01",
+                "EventTime": "12:00:00",
+                "BatLevel": 3.5,
+                "Temperature": 25.5,
+                "Val0": "102.0",
+                "Val1": None,
+                "Val2": "210.5",
+                "Val0_unitmisure": "°C",
+                "Val1_unitmisure": None,
+                "Val2_unitmisure": "m/s",
+            },
+        ]
+
+        # Add remaining Val columns as None for all rows
+        for row in rows:
+            for i in range(3, 16):
+                col = f"Val{i:X}"
+                row[col] = None
+                row[f"{col}_unitmisure"] = None
+            row["created_at"] = None
+            row["BatLevelModule"] = None
+            row["TemperatureModule"] = None
+            row["RssiModule"] = None
+
+        # Consolidate
+        consolidated = DataTransformer.consolidate_rawdatacor_batch(rows)
+
+        # Should have 1 consolidated row (all three nodes have same unit/tool/timestamp)
+        assert len(consolidated) == 1
+
+        consolidated_row = consolidated[0]
+
+        # Verify consolidated row properties
+        assert consolidated_row["id"] == 3  # MAX(id) for proper resume
+        assert consolidated_row["unit_name"] == "TestUnit"
+        assert consolidated_row["tool_name_id"] == "Tool1"
+        assert consolidated_row["bat_level"] == 3.5
+        assert consolidated_row["temperature"] == 25.5
+
+        # Verify all three nodes are in measurements
+        measurements = consolidated_row["measurements"]
+        assert "1" in measurements
+        assert "2" in measurements
+        assert "3" in measurements
+
+        # Verify node 1 measurements
+        assert measurements["1"]["0"]["value"] == "100.5"
+        assert measurements["1"]["0"]["unit"] == "°C"
+        assert measurements["1"]["2"]["value"] == "200.3"
+
+        # Verify node 2 measurements
+        assert measurements["2"]["0"]["value"] == "101.2"
+        assert measurements["2"]["2"]["value"] == "205.1"
+
+        # Verify node 3 measurements
+        assert measurements["3"]["0"]["value"] == "102.0"
+        assert measurements["3"]["2"]["value"] == "210.5"
+
+    def test_rawdatacor_consolidation_with_different_keys(self):
+        """Test that rows with different keys are NOT consolidated."""
+        # Two rows with different units
+        rows = [
+            {
+                "id": 1,
+                "UnitName": "Unit1",
+                "ToolNameID": "Tool1",
+                "NodeNum": 1,
+                "EventDate": "2024-01-01",
+                "EventTime": "12:00:00",
+                "BatLevel": 3.5,
+                "Temperature": 25.5,
+                "Val0": "100.5",
+                "Val1": None,
+                "Val2": None,
+                "Val0_unitmisure": "°C",
+                "Val1_unitmisure": None,
+                "Val2_unitmisure": None,
+            },
+            {
+                "id": 2,
+                "UnitName": "Unit2",  # Different unit
+                "ToolNameID": "Tool1",
+                "NodeNum": 1,
+                "EventDate": "2024-01-01",
+                "EventTime": "12:00:00",
+                "BatLevel": 3.5,
+                "Temperature": 25.5,
+                "Val0": "100.5",
+                "Val1": None,
+                "Val2": None,
+                "Val0_unitmisure": "°C",
+                "Val1_unitmisure": None,
+                "Val2_unitmisure": None,
+            },
+        ]
+
+        # Add remaining Val columns as None for all rows
+        for row in rows:
+            for i in range(3, 16):
+                col = f"Val{i:X}"
+                row[col] = None
+                row[f"{col}_unitmisure"] = None
+            row["created_at"] = None
+            row["BatLevelModule"] = None
+            row["TemperatureModule"] = None
+            row["RssiModule"] = None
+
+        # Consolidate
+        consolidated = DataTransformer.consolidate_rawdatacor_batch(rows)
+
+        # Should have 2 rows (different units)
+        assert len(consolidated) == 2
+        assert consolidated[0]["unit_name"] == "Unit1"
+        assert consolidated[1]["unit_name"] == "Unit2"
+

 class TestFieldMapping:
    """Test field mapping configuration."""