feat: Add MySQL to PostgreSQL migration tool with JSONB transformation
Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
154
config.py
Normal file
154
config.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Configuration management using Pydantic settings."""
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
import os
|
||||
|
||||
|
||||
class DatabaseConfig(BaseSettings):
|
||||
"""Database configuration."""
|
||||
|
||||
host: str
|
||||
port: int
|
||||
user: str
|
||||
password: str
|
||||
database: str
|
||||
|
||||
class Config:
|
||||
env_prefix: str = ""
|
||||
|
||||
|
||||
class MySQLConfig(DatabaseConfig):
|
||||
"""MySQL source database configuration."""
|
||||
|
||||
class Config:
|
||||
env_prefix: str = "MYSQL_"
|
||||
|
||||
|
||||
class PostgreSQLConfig(DatabaseConfig):
|
||||
"""PostgreSQL target database configuration."""
|
||||
|
||||
class Config:
|
||||
env_prefix: str = "POSTGRES_"
|
||||
|
||||
|
||||
class MigrationSettings(BaseSettings):
|
||||
"""Migration settings."""
|
||||
|
||||
batch_size: int = 10000
|
||||
log_level: str = "INFO"
|
||||
dry_run: bool = False
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = False
|
||||
|
||||
|
||||
class BenchmarkSettings(BaseSettings):
|
||||
"""Benchmark settings."""
|
||||
|
||||
output_dir: str = "benchmark_results"
|
||||
iterations: int = 5
|
||||
|
||||
class Config:
|
||||
env_prefix: str = "BENCHMARK_"
|
||||
env_file = ".env"
|
||||
case_sensitive = False
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""All application settings."""
|
||||
|
||||
mysql: MySQLConfig
|
||||
postgres: PostgreSQLConfig
|
||||
migration: MigrationSettings
|
||||
benchmark: BenchmarkSettings
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
case_sensitive = False
|
||||
|
||||
@classmethod
|
||||
def from_env(cls):
|
||||
"""Load settings from environment variables."""
|
||||
return cls(
|
||||
mysql=MySQLConfig(),
|
||||
postgres=PostgreSQLConfig(),
|
||||
migration=MigrationSettings(),
|
||||
benchmark=BenchmarkSettings(),
|
||||
)
|
||||
|
||||
|
||||
# Lazy load settings
|
||||
_settings: Optional[Settings] = None
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
"""Get application settings, loading from .env if necessary."""
|
||||
global _settings
|
||||
if _settings is None:
|
||||
_settings = Settings.from_env()
|
||||
return _settings
|
||||
|
||||
|
||||
# Schema transformation definitions
|
||||
RAWDATACOR_COLUMNS = {
|
||||
"val_columns": ["Val0", "Val1", "Val2", "Val3", "Val4", "Val5", "Val6", "Val7", "Val8", "Val9", "ValA", "ValB", "ValC", "ValD", "ValE", "ValF"],
|
||||
"unit_columns": ["Val0_unitmisure", "Val1_unitmisure", "Val2_unitmisure", "Val3_unitmisure", "Val4_unitmisure", "Val5_unitmisure", "Val6_unitmisure", "Val7_unitmisure", "Val8_unitmisure", "Val9_unitmisure", "ValA_unitmisure", "ValB_unitmisure", "ValC_unitmisure", "ValD_unitmisure", "ValE_unitmisure", "ValF_unitmisure"],
|
||||
}
|
||||
|
||||
ELABDATADISP_MEASUREMENT_FIELDS = {
|
||||
"shifts": ["XShift", "YShift", "ZShift", "HShift", "HShiftDir", "HShift_local"],
|
||||
"coordinates": ["X", "Y", "Z", "Xstar", "Zstar"],
|
||||
"kinematics": ["speed", "speed_local", "acceleration", "acceleration_local"],
|
||||
"sensors": ["T_node", "load_value", "water_level", "pressure"],
|
||||
"calculated": ["AlfaX", "AlfaY", "Area"],
|
||||
}
|
||||
|
||||
ELABDATADISP_FIELD_MAPPING = {
|
||||
# shifts mapping (source -> (category, key))
|
||||
"XShift": ("shifts", "x"),
|
||||
"YShift": ("shifts", "y"),
|
||||
"ZShift": ("shifts", "z"),
|
||||
"HShift": ("shifts", "h"),
|
||||
"HShiftDir": ("shifts", "h_dir"),
|
||||
"HShift_local": ("shifts", "h_local"),
|
||||
# coordinates mapping
|
||||
"X": ("coordinates", "x"),
|
||||
"Y": ("coordinates", "y"),
|
||||
"Z": ("coordinates", "z"),
|
||||
"Xstar": ("coordinates", "x_star"),
|
||||
"Zstar": ("coordinates", "z_star"),
|
||||
# kinematics mapping
|
||||
"speed": ("kinematics", "speed"),
|
||||
"speed_local": ("kinematics", "speed_local"),
|
||||
"acceleration": ("kinematics", "acceleration"),
|
||||
"acceleration_local": ("kinematics", "acceleration_local"),
|
||||
# sensors mapping
|
||||
"T_node": ("sensors", "t_node"),
|
||||
"load_value": ("sensors", "load_value"),
|
||||
"water_level": ("sensors", "water_level"),
|
||||
"pressure": ("sensors", "pressure"),
|
||||
# calculated mapping
|
||||
"AlfaX": ("calculated", "alfa_x"),
|
||||
"AlfaY": ("calculated", "alfa_y"),
|
||||
"Area": ("calculated", "area"),
|
||||
}
|
||||
|
||||
# PostgreSQL Partition years (from both tables)
|
||||
PARTITION_YEARS = list(range(2014, 2032)) # 2014-2031
|
||||
|
||||
# Table configurations
|
||||
TABLE_CONFIGS = {
|
||||
"rawdatacor": {
|
||||
"mysql_table": "RAWDATACOR",
|
||||
"postgres_table": "rawdatacor",
|
||||
"primary_key": "id",
|
||||
"partition_key": "event_date",
|
||||
},
|
||||
"elabdatadisp": {
|
||||
"mysql_table": "ELABDATADISP",
|
||||
"postgres_table": "elabdatadisp",
|
||||
"primary_key": "idElabData",
|
||||
"partition_key": "event_date",
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user