Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
155 lines
4.3 KiB
Python
155 lines
4.3 KiB
Python
"""Configuration management using Pydantic settings."""
|
|
from pydantic_settings import BaseSettings
|
|
from typing import Optional
|
|
import os
|
|
|
|
|
|
class DatabaseConfig(BaseSettings):
|
|
"""Database configuration."""
|
|
|
|
host: str
|
|
port: int
|
|
user: str
|
|
password: str
|
|
database: str
|
|
|
|
class Config:
|
|
env_prefix: str = ""
|
|
|
|
|
|
class MySQLConfig(DatabaseConfig):
|
|
"""MySQL source database configuration."""
|
|
|
|
class Config:
|
|
env_prefix: str = "MYSQL_"
|
|
|
|
|
|
class PostgreSQLConfig(DatabaseConfig):
|
|
"""PostgreSQL target database configuration."""
|
|
|
|
class Config:
|
|
env_prefix: str = "POSTGRES_"
|
|
|
|
|
|
class MigrationSettings(BaseSettings):
|
|
"""Migration settings."""
|
|
|
|
batch_size: int = 10000
|
|
log_level: str = "INFO"
|
|
dry_run: bool = False
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
case_sensitive = False
|
|
|
|
|
|
class BenchmarkSettings(BaseSettings):
|
|
"""Benchmark settings."""
|
|
|
|
output_dir: str = "benchmark_results"
|
|
iterations: int = 5
|
|
|
|
class Config:
|
|
env_prefix: str = "BENCHMARK_"
|
|
env_file = ".env"
|
|
case_sensitive = False
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""All application settings."""
|
|
|
|
mysql: MySQLConfig
|
|
postgres: PostgreSQLConfig
|
|
migration: MigrationSettings
|
|
benchmark: BenchmarkSettings
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
case_sensitive = False
|
|
|
|
@classmethod
|
|
def from_env(cls):
|
|
"""Load settings from environment variables."""
|
|
return cls(
|
|
mysql=MySQLConfig(),
|
|
postgres=PostgreSQLConfig(),
|
|
migration=MigrationSettings(),
|
|
benchmark=BenchmarkSettings(),
|
|
)
|
|
|
|
|
|
# Lazy load settings
|
|
_settings: Optional[Settings] = None
|
|
|
|
|
|
def get_settings() -> Settings:
|
|
"""Get application settings, loading from .env if necessary."""
|
|
global _settings
|
|
if _settings is None:
|
|
_settings = Settings.from_env()
|
|
return _settings
|
|
|
|
|
|
# Schema transformation definitions
|
|
RAWDATACOR_COLUMNS = {
|
|
"val_columns": ["Val0", "Val1", "Val2", "Val3", "Val4", "Val5", "Val6", "Val7", "Val8", "Val9", "ValA", "ValB", "ValC", "ValD", "ValE", "ValF"],
|
|
"unit_columns": ["Val0_unitmisure", "Val1_unitmisure", "Val2_unitmisure", "Val3_unitmisure", "Val4_unitmisure", "Val5_unitmisure", "Val6_unitmisure", "Val7_unitmisure", "Val8_unitmisure", "Val9_unitmisure", "ValA_unitmisure", "ValB_unitmisure", "ValC_unitmisure", "ValD_unitmisure", "ValE_unitmisure", "ValF_unitmisure"],
|
|
}
|
|
|
|
ELABDATADISP_MEASUREMENT_FIELDS = {
|
|
"shifts": ["XShift", "YShift", "ZShift", "HShift", "HShiftDir", "HShift_local"],
|
|
"coordinates": ["X", "Y", "Z", "Xstar", "Zstar"],
|
|
"kinematics": ["speed", "speed_local", "acceleration", "acceleration_local"],
|
|
"sensors": ["T_node", "load_value", "water_level", "pressure"],
|
|
"calculated": ["AlfaX", "AlfaY", "Area"],
|
|
}
|
|
|
|
ELABDATADISP_FIELD_MAPPING = {
|
|
# shifts mapping (source -> (category, key))
|
|
"XShift": ("shifts", "x"),
|
|
"YShift": ("shifts", "y"),
|
|
"ZShift": ("shifts", "z"),
|
|
"HShift": ("shifts", "h"),
|
|
"HShiftDir": ("shifts", "h_dir"),
|
|
"HShift_local": ("shifts", "h_local"),
|
|
# coordinates mapping
|
|
"X": ("coordinates", "x"),
|
|
"Y": ("coordinates", "y"),
|
|
"Z": ("coordinates", "z"),
|
|
"Xstar": ("coordinates", "x_star"),
|
|
"Zstar": ("coordinates", "z_star"),
|
|
# kinematics mapping
|
|
"speed": ("kinematics", "speed"),
|
|
"speed_local": ("kinematics", "speed_local"),
|
|
"acceleration": ("kinematics", "acceleration"),
|
|
"acceleration_local": ("kinematics", "acceleration_local"),
|
|
# sensors mapping
|
|
"T_node": ("sensors", "t_node"),
|
|
"load_value": ("sensors", "load_value"),
|
|
"water_level": ("sensors", "water_level"),
|
|
"pressure": ("sensors", "pressure"),
|
|
# calculated mapping
|
|
"AlfaX": ("calculated", "alfa_x"),
|
|
"AlfaY": ("calculated", "alfa_y"),
|
|
"Area": ("calculated", "area"),
|
|
}
|
|
|
|
# PostgreSQL Partition years (from both tables)
|
|
PARTITION_YEARS = list(range(2014, 2032)) # 2014-2031
|
|
|
|
# Table configurations
|
|
TABLE_CONFIGS = {
|
|
"rawdatacor": {
|
|
"mysql_table": "RAWDATACOR",
|
|
"postgres_table": "rawdatacor",
|
|
"primary_key": "id",
|
|
"partition_key": "event_date",
|
|
},
|
|
"elabdatadisp": {
|
|
"mysql_table": "ELABDATADISP",
|
|
"postgres_table": "elabdatadisp",
|
|
"primary_key": "idElabData",
|
|
"partition_key": "event_date",
|
|
},
|
|
}
|