fix: Add timeout settings and retry logic to MySQL connector

Configuration improvements:
- Set read_timeout=300 (5 minutes) to handle long queries
- Set write_timeout=300 (5 minutes) for writes
- Set max_allowed_packet=64MB to handle larger data transfers

Retry logic:
- Added retry mechanism with max 3 retries on fetch failure
- Auto-reconnect on connection loss before retry
- Better error messages showing retry attempts

This fixes the 'connection is lost' error that occurs during
long-running migrations by:
1. Giving MySQL queries more time to complete
2. Allowing larger packet sizes for bulk data
3. Automatically recovering from connection drops

Fixes: 'Connection is lost' error during full migration
This commit is contained in:
2025-12-21 09:53:34 +01:00
parent 821cda850e
commit b09cfcf9df
8 changed files with 761 additions and 119 deletions

View File

@@ -1,6 +1,6 @@
"""Data transformation from MySQL to PostgreSQL format."""
from typing import Dict, Any, List
from datetime import datetime
from datetime import datetime, time, timedelta
from config import (
RAWDATACOR_COLUMNS,
ELABDATADISP_FIELD_MAPPING,
@@ -14,6 +14,36 @@ logger = get_logger(__name__)
class DataTransformer:
"""Transform MySQL data to PostgreSQL format."""
@staticmethod
def _convert_time(event_time: Any) -> time:
"""Convert event_time to datetime.time object.
Handles multiple input types:
- str: Parse from "HH:MM:SS" format
- timedelta: Convert from MySQL TIME type (stored as timedelta)
- time: Return as-is
Args:
event_time: Time value from MySQL (str, timedelta, or time)
Returns:
datetime.time object
"""
if isinstance(event_time, str):
return datetime.strptime(event_time, "%H:%M:%S").time()
elif isinstance(event_time, timedelta):
# MySQL returns TIME as timedelta
# Extract seconds from timedelta and convert to time
total_seconds = int(event_time.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
return time(hour=hours, minute=minutes, second=seconds)
elif isinstance(event_time, time):
return event_time
else:
raise ValueError(f"Unsupported event_time type: {type(event_time)}")
@staticmethod
def transform_rawdatacor_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]:
"""Transform a RAWDATACOR row from MySQL to PostgreSQL format.
@@ -41,14 +71,35 @@ class DataTransformer:
"unit": unit if unit else None,
}
# Combine event_date and event_time into event_timestamp
event_date = mysql_row.get("EventDate")
event_time = mysql_row.get("EventTime")
if event_date is not None and event_time is not None:
event_time_obj = DataTransformer._convert_time(event_time)
event_timestamp = datetime.combine(event_date, event_time_obj)
elif event_date is None or event_time is None:
# Log a warning for records with missing date/time
missing = []
if event_date is None:
missing.append("EventDate")
if event_time is None:
missing.append("EventTime")
logger.warning(
f"Row {mysql_row.get('id')} has NULL {', '.join(missing)}. "
f"Using default timestamp: 1970-01-01 00:00:00"
)
# Use default timestamp for records with missing date/time
event_timestamp = datetime(1970, 1, 1, 0, 0, 0)
else:
event_timestamp = None
# Create PostgreSQL row
pg_row = {
"id": mysql_row["id"],
"unit_name": mysql_row.get("UnitName"),
"tool_name_id": mysql_row["ToolNameID"],
"node_num": mysql_row["NodeNum"],
"event_date": mysql_row["EventDate"],
"event_time": mysql_row["EventTime"],
"event_timestamp": event_timestamp,
"bat_level": mysql_row["BatLevel"],
"temperature": mysql_row["Temperature"],
"measurements": measurements,
@@ -90,14 +141,35 @@ class DataTransformer:
k: v for k, v in measurements.items() if v
}
# Combine event_date and event_time into event_timestamp
event_date = mysql_row.get("EventDate")
event_time = mysql_row.get("EventTime")
if event_date is not None and event_time is not None:
event_time_obj = DataTransformer._convert_time(event_time)
event_timestamp = datetime.combine(event_date, event_time_obj)
elif event_date is None or event_time is None:
# Log a warning for records with missing date/time
missing = []
if event_date is None:
missing.append("EventDate")
if event_time is None:
missing.append("EventTime")
logger.warning(
f"Row {mysql_row.get('idElabData')} has NULL {', '.join(missing)}. "
f"Using default timestamp: 1970-01-01 00:00:00"
)
# Use default timestamp for records with missing date/time
event_timestamp = datetime(1970, 1, 1, 0, 0, 0)
else:
event_timestamp = None
# Create PostgreSQL row
pg_row = {
"id_elab_data": mysql_row["idElabData"],
"unit_name": mysql_row.get("UnitName"),
"tool_name_id": mysql_row["ToolNameID"],
"node_num": mysql_row["NodeNum"],
"event_date": mysql_row["EventDate"],
"event_time": mysql_row["EventTime"],
"event_timestamp": event_timestamp,
"state": mysql_row.get("State"),
"calc_err": mysql_row.get("calcerr", 0),
"measurements": measurements,
@@ -150,8 +222,7 @@ class DataTransformer:
"unit_name",
"tool_name_id",
"node_num",
"event_date",
"event_time",
"event_timestamp",
"bat_level",
"temperature",
"measurements",
@@ -166,8 +237,7 @@ class DataTransformer:
"unit_name",
"tool_name_id",
"node_num",
"event_date",
"event_time",
"event_timestamp",
"state",
"calc_err",
"measurements",

View File

@@ -21,8 +21,7 @@ CREATE TABLE IF NOT EXISTS rawdatacor (
unit_name VARCHAR(32),
tool_name_id VARCHAR(32) NOT NULL,
node_num INTEGER NOT NULL,
event_date DATE NOT NULL,
event_time TIME NOT NULL,
event_timestamp TIMESTAMP NOT NULL,
bat_level NUMERIC(4,2) NOT NULL,
temperature NUMERIC(5,2) NOT NULL,
measurements JSONB,
@@ -30,7 +29,7 @@ CREATE TABLE IF NOT EXISTS rawdatacor (
bat_level_module NUMERIC(4,2),
temperature_module NUMERIC(5,2),
rssi_module INTEGER
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_timestamp));
-- Note: PostgreSQL doesn't support PRIMARY KEY or UNIQUE constraints
-- with RANGE partitioning on expressions. Using sequence for id auto-increment.
@@ -46,11 +45,18 @@ CREATE TABLE IF NOT EXISTS rawdatacor_{year}
FOR VALUES FROM ({year}) TO ({next_year});
"""
# Add default partition for records outside the defined year range
sql += """
CREATE TABLE IF NOT EXISTS rawdatacor_default
PARTITION OF rawdatacor
DEFAULT;
"""
# Add indexes
sql += """
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_raw
ON rawdatacor(unit_name, tool_name_id, node_num, event_date, event_time);
ON rawdatacor(unit_name, tool_name_id, node_num, event_timestamp);
CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
ON rawdatacor(unit_name, tool_name_id);
@@ -58,8 +64,8 @@ CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
CREATE INDEX IF NOT EXISTS idx_measurements_gin_raw
ON rawdatacor USING GIN (measurements);
CREATE INDEX IF NOT EXISTS idx_event_date_raw
ON rawdatacor(event_date);
CREATE INDEX IF NOT EXISTS idx_event_timestamp_raw
ON rawdatacor(event_timestamp);
"""
return sql
@@ -81,14 +87,13 @@ CREATE TABLE IF NOT EXISTS elabdatadisp (
unit_name VARCHAR(32),
tool_name_id VARCHAR(32) NOT NULL,
node_num INTEGER NOT NULL,
event_date DATE NOT NULL,
event_time TIME NOT NULL,
event_timestamp TIMESTAMP NOT NULL,
state VARCHAR(32),
calc_err INTEGER DEFAULT 0,
measurements JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_timestamp));
-- Note: PostgreSQL doesn't support PRIMARY KEY or UNIQUE constraints
-- with RANGE partitioning on expressions. Using sequence for id_elab_data auto-increment.
@@ -104,11 +109,18 @@ CREATE TABLE IF NOT EXISTS elabdatadisp_{year}
FOR VALUES FROM ({year}) TO ({next_year});
"""
# Add default partition for records outside the defined year range
sql += """
CREATE TABLE IF NOT EXISTS elabdatadisp_default
PARTITION OF elabdatadisp
DEFAULT;
"""
# Add indexes
sql += """
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_elab
ON elabdatadisp(unit_name, tool_name_id, node_num, event_date, event_time);
ON elabdatadisp(unit_name, tool_name_id, node_num, event_timestamp);
CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
ON elabdatadisp(unit_name, tool_name_id);
@@ -116,8 +128,8 @@ CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
CREATE INDEX IF NOT EXISTS idx_measurements_gin_elab
ON elabdatadisp USING GIN (measurements);
CREATE INDEX IF NOT EXISTS idx_event_date_elab
ON elabdatadisp(event_date);
CREATE INDEX IF NOT EXISTS idx_event_timestamp_elab
ON elabdatadisp(event_timestamp);
"""
return sql