feat: Add MySQL to PostgreSQL migration tool with JSONB transformation
Implement comprehensive migration solution with: - Full and incremental migration modes - JSONB schema transformation for RAWDATACOR and ELABDATADISP tables - Native PostgreSQL partitioning (2014-2031) - Optimized GIN indexes for JSONB queries - Rich logging with progress tracking - Complete benchmark system for MySQL vs PostgreSQL comparison - CLI interface with multiple commands (setup, migrate, benchmark) - Configuration management via .env file - Error handling and retry logic - Batch processing for performance (configurable batch size) Database transformations: - RAWDATACOR: 16 Val columns + units → single JSONB measurements - ELABDATADISP: 25+ measurement fields → structured JSONB with categories 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
22
.env.example
Normal file
22
.env.example
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# MySQL Source Database
|
||||||
|
MYSQL_HOST=localhost
|
||||||
|
MYSQL_PORT=3306
|
||||||
|
MYSQL_USER=root
|
||||||
|
MYSQL_PASSWORD=your_mysql_password
|
||||||
|
MYSQL_DATABASE=your_database_name
|
||||||
|
|
||||||
|
# PostgreSQL Target Database (container Incus)
|
||||||
|
POSTGRES_HOST=localhost
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_USER=postgres
|
||||||
|
POSTGRES_PASSWORD=your_postgres_password
|
||||||
|
POSTGRES_DATABASE=migrated_db
|
||||||
|
|
||||||
|
# Migration Settings
|
||||||
|
BATCH_SIZE=10000
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
DRY_RUN=false
|
||||||
|
|
||||||
|
# Performance Testing
|
||||||
|
BENCHMARK_OUTPUT_DIR=benchmark_results
|
||||||
|
BENCHMARK_ITERATIONS=5
|
||||||
32
.gitignore
vendored
Normal file
32
.gitignore
vendored
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[oc]
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
|
||||||
|
# Project specific
|
||||||
|
*.log
|
||||||
|
migration_state.json
|
||||||
|
benchmark_results/
|
||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.14
|
||||||
154
config.py
Normal file
154
config.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
"""Configuration management using Pydantic settings."""
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class DatabaseConfig(BaseSettings):
|
||||||
|
"""Database configuration."""
|
||||||
|
|
||||||
|
host: str
|
||||||
|
port: int
|
||||||
|
user: str
|
||||||
|
password: str
|
||||||
|
database: str
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class MySQLConfig(DatabaseConfig):
|
||||||
|
"""MySQL source database configuration."""
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix: str = "MYSQL_"
|
||||||
|
|
||||||
|
|
||||||
|
class PostgreSQLConfig(DatabaseConfig):
|
||||||
|
"""PostgreSQL target database configuration."""
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix: str = "POSTGRES_"
|
||||||
|
|
||||||
|
|
||||||
|
class MigrationSettings(BaseSettings):
|
||||||
|
"""Migration settings."""
|
||||||
|
|
||||||
|
batch_size: int = 10000
|
||||||
|
log_level: str = "INFO"
|
||||||
|
dry_run: bool = False
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_file = ".env"
|
||||||
|
case_sensitive = False
|
||||||
|
|
||||||
|
|
||||||
|
class BenchmarkSettings(BaseSettings):
|
||||||
|
"""Benchmark settings."""
|
||||||
|
|
||||||
|
output_dir: str = "benchmark_results"
|
||||||
|
iterations: int = 5
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_prefix: str = "BENCHMARK_"
|
||||||
|
env_file = ".env"
|
||||||
|
case_sensitive = False
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
"""All application settings."""
|
||||||
|
|
||||||
|
mysql: MySQLConfig
|
||||||
|
postgres: PostgreSQLConfig
|
||||||
|
migration: MigrationSettings
|
||||||
|
benchmark: BenchmarkSettings
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
env_file = ".env"
|
||||||
|
case_sensitive = False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_env(cls):
|
||||||
|
"""Load settings from environment variables."""
|
||||||
|
return cls(
|
||||||
|
mysql=MySQLConfig(),
|
||||||
|
postgres=PostgreSQLConfig(),
|
||||||
|
migration=MigrationSettings(),
|
||||||
|
benchmark=BenchmarkSettings(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Lazy load settings
|
||||||
|
_settings: Optional[Settings] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_settings() -> Settings:
|
||||||
|
"""Get application settings, loading from .env if necessary."""
|
||||||
|
global _settings
|
||||||
|
if _settings is None:
|
||||||
|
_settings = Settings.from_env()
|
||||||
|
return _settings
|
||||||
|
|
||||||
|
|
||||||
|
# Schema transformation definitions
|
||||||
|
RAWDATACOR_COLUMNS = {
|
||||||
|
"val_columns": ["Val0", "Val1", "Val2", "Val3", "Val4", "Val5", "Val6", "Val7", "Val8", "Val9", "ValA", "ValB", "ValC", "ValD", "ValE", "ValF"],
|
||||||
|
"unit_columns": ["Val0_unitmisure", "Val1_unitmisure", "Val2_unitmisure", "Val3_unitmisure", "Val4_unitmisure", "Val5_unitmisure", "Val6_unitmisure", "Val7_unitmisure", "Val8_unitmisure", "Val9_unitmisure", "ValA_unitmisure", "ValB_unitmisure", "ValC_unitmisure", "ValD_unitmisure", "ValE_unitmisure", "ValF_unitmisure"],
|
||||||
|
}
|
||||||
|
|
||||||
|
ELABDATADISP_MEASUREMENT_FIELDS = {
|
||||||
|
"shifts": ["XShift", "YShift", "ZShift", "HShift", "HShiftDir", "HShift_local"],
|
||||||
|
"coordinates": ["X", "Y", "Z", "Xstar", "Zstar"],
|
||||||
|
"kinematics": ["speed", "speed_local", "acceleration", "acceleration_local"],
|
||||||
|
"sensors": ["T_node", "load_value", "water_level", "pressure"],
|
||||||
|
"calculated": ["AlfaX", "AlfaY", "Area"],
|
||||||
|
}
|
||||||
|
|
||||||
|
ELABDATADISP_FIELD_MAPPING = {
|
||||||
|
# shifts mapping (source -> (category, key))
|
||||||
|
"XShift": ("shifts", "x"),
|
||||||
|
"YShift": ("shifts", "y"),
|
||||||
|
"ZShift": ("shifts", "z"),
|
||||||
|
"HShift": ("shifts", "h"),
|
||||||
|
"HShiftDir": ("shifts", "h_dir"),
|
||||||
|
"HShift_local": ("shifts", "h_local"),
|
||||||
|
# coordinates mapping
|
||||||
|
"X": ("coordinates", "x"),
|
||||||
|
"Y": ("coordinates", "y"),
|
||||||
|
"Z": ("coordinates", "z"),
|
||||||
|
"Xstar": ("coordinates", "x_star"),
|
||||||
|
"Zstar": ("coordinates", "z_star"),
|
||||||
|
# kinematics mapping
|
||||||
|
"speed": ("kinematics", "speed"),
|
||||||
|
"speed_local": ("kinematics", "speed_local"),
|
||||||
|
"acceleration": ("kinematics", "acceleration"),
|
||||||
|
"acceleration_local": ("kinematics", "acceleration_local"),
|
||||||
|
# sensors mapping
|
||||||
|
"T_node": ("sensors", "t_node"),
|
||||||
|
"load_value": ("sensors", "load_value"),
|
||||||
|
"water_level": ("sensors", "water_level"),
|
||||||
|
"pressure": ("sensors", "pressure"),
|
||||||
|
# calculated mapping
|
||||||
|
"AlfaX": ("calculated", "alfa_x"),
|
||||||
|
"AlfaY": ("calculated", "alfa_y"),
|
||||||
|
"Area": ("calculated", "area"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# PostgreSQL Partition years (from both tables)
|
||||||
|
PARTITION_YEARS = list(range(2014, 2032)) # 2014-2031
|
||||||
|
|
||||||
|
# Table configurations
|
||||||
|
TABLE_CONFIGS = {
|
||||||
|
"rawdatacor": {
|
||||||
|
"mysql_table": "RAWDATACOR",
|
||||||
|
"postgres_table": "rawdatacor",
|
||||||
|
"primary_key": "id",
|
||||||
|
"partition_key": "event_date",
|
||||||
|
},
|
||||||
|
"elabdatadisp": {
|
||||||
|
"mysql_table": "ELABDATADISP",
|
||||||
|
"postgres_table": "elabdatadisp",
|
||||||
|
"primary_key": "idElabData",
|
||||||
|
"partition_key": "event_date",
|
||||||
|
},
|
||||||
|
}
|
||||||
197
main.py
Normal file
197
main.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
"""MySQL to PostgreSQL migration tool CLI."""
|
||||||
|
import click
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from config import get_settings
|
||||||
|
from src.utils.logger import setup_logger, get_logger
|
||||||
|
from src.transformers.schema_transformer import get_full_schema_script
|
||||||
|
from src.migrator.full_migration import run_full_migration
|
||||||
|
from src.migrator.incremental_migration import run_incremental_migration
|
||||||
|
from src.benchmark.performance_test import run_benchmark
|
||||||
|
from src.connectors.postgres_connector import PostgreSQLConnector
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@click.group()
|
||||||
|
@click.pass_context
|
||||||
|
def cli(ctx):
|
||||||
|
"""MySQL to PostgreSQL migration tool with performance benchmarking."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
ctx.ensure_object(dict)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
"--create-schema",
|
||||||
|
is_flag=True,
|
||||||
|
help="Create PostgreSQL schema and partitions"
|
||||||
|
)
|
||||||
|
def setup(create_schema):
|
||||||
|
"""Setup PostgreSQL database."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
if not create_schema:
|
||||||
|
click.echo("Usage: python main.py setup --create-schema")
|
||||||
|
click.echo("Create PostgreSQL schema and partitions")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with PostgreSQLConnector() as pg_conn:
|
||||||
|
logger.info("Creating PostgreSQL schema...")
|
||||||
|
schema_script = get_full_schema_script()
|
||||||
|
pg_conn.execute_script(schema_script)
|
||||||
|
logger.info("✓ Schema creation complete")
|
||||||
|
click.echo("✓ PostgreSQL schema created successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Setup failed: {e}")
|
||||||
|
click.echo(f"✗ Setup failed: {e}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.group()
|
||||||
|
def migrate():
|
||||||
|
"""Migrate data from MySQL to PostgreSQL."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@migrate.command()
|
||||||
|
@click.option(
|
||||||
|
"--table",
|
||||||
|
type=click.Choice(["RAWDATACOR", "ELABDATADISP", "all"]),
|
||||||
|
default="all",
|
||||||
|
help="Table to migrate (default: all)"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dry-run",
|
||||||
|
is_flag=True,
|
||||||
|
help="Show what would be done without modifying data"
|
||||||
|
)
|
||||||
|
def full(table, dry_run):
|
||||||
|
"""Perform full migration of all data."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
tables = ["RAWDATACOR", "ELABDATADISP"] if table == "all" else [table]
|
||||||
|
|
||||||
|
try:
|
||||||
|
total_migrated = 0
|
||||||
|
|
||||||
|
for tbl in tables:
|
||||||
|
click.echo(f"\nMigrating {tbl}...")
|
||||||
|
migrated = run_full_migration(tbl, dry_run=dry_run)
|
||||||
|
total_migrated += migrated
|
||||||
|
click.echo(f"✓ {tbl}: {migrated} rows migrated")
|
||||||
|
|
||||||
|
click.echo(f"\n✓ Full migration complete: {total_migrated} total rows migrated")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Migration failed: {e}")
|
||||||
|
click.echo(f"✗ Migration failed: {e}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@migrate.command()
|
||||||
|
@click.option(
|
||||||
|
"--table",
|
||||||
|
type=click.Choice(["RAWDATACOR", "ELABDATADISP", "all"]),
|
||||||
|
default="all",
|
||||||
|
help="Table to migrate (default: all)"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dry-run",
|
||||||
|
is_flag=True,
|
||||||
|
help="Show what would be done without modifying data"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--state-file",
|
||||||
|
default="migration_state.json",
|
||||||
|
help="Path to migration state file"
|
||||||
|
)
|
||||||
|
def incremental(table, dry_run, state_file):
|
||||||
|
"""Perform incremental migration since last sync."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
tables = ["RAWDATACOR", "ELABDATADISP"] if table == "all" else [table]
|
||||||
|
|
||||||
|
try:
|
||||||
|
total_migrated = 0
|
||||||
|
|
||||||
|
for tbl in tables:
|
||||||
|
click.echo(f"\nIncremental migration for {tbl}...")
|
||||||
|
migrated = run_incremental_migration(tbl, dry_run=dry_run, state_file=state_file)
|
||||||
|
total_migrated += migrated
|
||||||
|
if migrated > 0:
|
||||||
|
click.echo(f"✓ {tbl}: {migrated} rows migrated")
|
||||||
|
else:
|
||||||
|
click.echo(f"ℹ {tbl}: No new rows to migrate")
|
||||||
|
|
||||||
|
if total_migrated == 0:
|
||||||
|
click.echo("\nℹ No rows to migrate")
|
||||||
|
else:
|
||||||
|
click.echo(f"\n✓ Incremental migration complete: {total_migrated} total rows migrated")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Incremental migration failed: {e}")
|
||||||
|
click.echo(f"✗ Incremental migration failed: {e}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
@click.option(
|
||||||
|
"--iterations",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Number of iterations per query (default from config)"
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--output",
|
||||||
|
type=click.Path(),
|
||||||
|
default=None,
|
||||||
|
help="Output file path (default: benchmark_results/benchmark_TIMESTAMP.json)"
|
||||||
|
)
|
||||||
|
def benchmark(iterations, output):
|
||||||
|
"""Run performance benchmarks comparing MySQL and PostgreSQL."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
click.echo("Running performance benchmarks...")
|
||||||
|
output_file = run_benchmark(iterations=iterations, output_file=output)
|
||||||
|
click.echo(f"✓ Benchmark complete: results saved to {output_file}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Benchmark failed: {e}")
|
||||||
|
click.echo(f"✗ Benchmark failed: {e}", err=True)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command()
|
||||||
|
def info():
|
||||||
|
"""Show configuration information."""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
click.echo("\n[MySQL Configuration]")
|
||||||
|
click.echo(f" Host: {settings.mysql.host}:{settings.mysql.port}")
|
||||||
|
click.echo(f" Database: {settings.mysql.database}")
|
||||||
|
click.echo(f" User: {settings.mysql.user}")
|
||||||
|
|
||||||
|
click.echo("\n[PostgreSQL Configuration]")
|
||||||
|
click.echo(f" Host: {settings.postgres.host}:{settings.postgres.port}")
|
||||||
|
click.echo(f" Database: {settings.postgres.database}")
|
||||||
|
click.echo(f" User: {settings.postgres.user}")
|
||||||
|
|
||||||
|
click.echo("\n[Migration Settings]")
|
||||||
|
click.echo(f" Batch Size: {settings.migration.batch_size}")
|
||||||
|
click.echo(f" Log Level: {settings.migration.log_level}")
|
||||||
|
click.echo(f" Dry Run: {settings.migration.dry_run}")
|
||||||
|
|
||||||
|
click.echo("\n[Benchmark Settings]")
|
||||||
|
click.echo(f" Output Directory: {settings.benchmark.output_dir}")
|
||||||
|
click.echo(f" Iterations: {settings.benchmark.iterations}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli(obj={})
|
||||||
15
pyproject.toml
Normal file
15
pyproject.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[project]
|
||||||
|
name = "mysql2postgres"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Robust MySQL to PostgreSQL migration tool with schema transformation and performance benchmarking"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"pymysql>=1.1.0",
|
||||||
|
"psycopg[binary]>=3.1.0",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
|
"click>=8.1.0",
|
||||||
|
"rich>=13.0.0",
|
||||||
|
"pydantic>=2.5.0",
|
||||||
|
"pydantic-settings>=2.1.0",
|
||||||
|
]
|
||||||
1
src/__init__.py
Normal file
1
src/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""MySQL to PostgreSQL migration tool."""
|
||||||
0
src/benchmark/__init__.py
Normal file
0
src/benchmark/__init__.py
Normal file
263
src/benchmark/performance_test.py
Normal file
263
src/benchmark/performance_test.py
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
"""Performance benchmarking for MySQL vs PostgreSQL."""
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Dict, List, Any, Optional, Tuple
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import statistics
|
||||||
|
|
||||||
|
from config import get_settings
|
||||||
|
from src.connectors.mysql_connector import MySQLConnector
|
||||||
|
from src.connectors.postgres_connector import PostgreSQLConnector
|
||||||
|
from src.benchmark.query_generator import BenchmarkQueryGenerator
|
||||||
|
from src.utils.logger import get_logger, setup_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PerformanceBenchmark:
|
||||||
|
"""Run performance benchmarks comparing MySQL and PostgreSQL."""
|
||||||
|
|
||||||
|
def __init__(self, iterations: int = 5):
|
||||||
|
"""Initialize benchmark runner.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
iterations: Number of times to run each query
|
||||||
|
"""
|
||||||
|
self.iterations = iterations
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.results = {}
|
||||||
|
|
||||||
|
def run_all_benchmarks(self) -> Dict[str, Any]:
|
||||||
|
"""Run all benchmarks.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Benchmark results dictionary
|
||||||
|
"""
|
||||||
|
setup_logger(__name__)
|
||||||
|
logger.info(f"Starting performance benchmarks ({self.iterations} iterations per query)")
|
||||||
|
|
||||||
|
all_queries = BenchmarkQueryGenerator.get_all_benchmark_queries()
|
||||||
|
results = {
|
||||||
|
"timestamp": datetime.utcnow().isoformat(),
|
||||||
|
"iterations": self.iterations,
|
||||||
|
"tables": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for table_name, query_categories in all_queries.items():
|
||||||
|
logger.info(f"Benchmarking {table_name}...")
|
||||||
|
table_results = self._benchmark_table(table_name, query_categories)
|
||||||
|
results["tables"][table_name] = table_results
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _benchmark_table(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
query_categories: Dict[str, List[Tuple[str, str]]]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Benchmark queries for a specific table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
query_categories: Dictionary of query categories
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Benchmark results for the table
|
||||||
|
"""
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with MySQLConnector() as mysql_conn:
|
||||||
|
with PostgreSQLConnector() as pg_conn:
|
||||||
|
for category, queries in query_categories.items():
|
||||||
|
logger.debug(f" Benchmarking {category}...")
|
||||||
|
results[category] = self._benchmark_query_pair(
|
||||||
|
mysql_conn,
|
||||||
|
pg_conn,
|
||||||
|
queries[0],
|
||||||
|
category
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Benchmark failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _benchmark_query_pair(
|
||||||
|
self,
|
||||||
|
mysql_conn: MySQLConnector,
|
||||||
|
pg_conn: PostgreSQLConnector,
|
||||||
|
queries: Tuple[str, str],
|
||||||
|
category: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Benchmark a pair of MySQL and PostgreSQL queries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mysql_conn: MySQL connector
|
||||||
|
pg_conn: PostgreSQL connector
|
||||||
|
queries: Tuple of (mysql_query, postgres_query)
|
||||||
|
category: Query category name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Benchmark results for the query pair
|
||||||
|
"""
|
||||||
|
mysql_query, pg_query = queries
|
||||||
|
result = {
|
||||||
|
"category": category,
|
||||||
|
"mysql": None,
|
||||||
|
"postgres": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Benchmark MySQL query
|
||||||
|
if mysql_query:
|
||||||
|
try:
|
||||||
|
times = []
|
||||||
|
for _ in range(self.iterations):
|
||||||
|
start = time.perf_counter()
|
||||||
|
with mysql_conn.connection.cursor() as cursor:
|
||||||
|
cursor.execute(mysql_query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
end = time.perf_counter()
|
||||||
|
times.append((end - start) * 1000) # Convert to ms
|
||||||
|
|
||||||
|
result["mysql"] = self._calculate_stats(times, len(rows) if rows else 0)
|
||||||
|
logger.debug(f" MySQL {category}: {result['mysql']['mean']:.2f}ms")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"MySQL query failed: {e}")
|
||||||
|
result["mysql"] = {"error": str(e)}
|
||||||
|
|
||||||
|
# Benchmark PostgreSQL query
|
||||||
|
if pg_query:
|
||||||
|
try:
|
||||||
|
times = []
|
||||||
|
for _ in range(self.iterations):
|
||||||
|
start = time.perf_counter()
|
||||||
|
with pg_conn.connection.cursor() as cursor:
|
||||||
|
cursor.execute(pg_query)
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
end = time.perf_counter()
|
||||||
|
times.append((end - start) * 1000) # Convert to ms
|
||||||
|
|
||||||
|
result["postgres"] = self._calculate_stats(times, len(rows) if rows else 0)
|
||||||
|
logger.debug(f" PostgreSQL {category}: {result['postgres']['mean']:.2f}ms")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"PostgreSQL query failed: {e}")
|
||||||
|
result["postgres"] = {"error": str(e)}
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _calculate_stats(times: List[float], row_count: int = 0) -> Dict[str, float]:
|
||||||
|
"""Calculate statistics for a list of execution times.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
times: List of execution times in milliseconds
|
||||||
|
row_count: Number of rows returned (for throughput calculation)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with statistics
|
||||||
|
"""
|
||||||
|
if not times:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"min": min(times),
|
||||||
|
"max": max(times),
|
||||||
|
"mean": statistics.mean(times),
|
||||||
|
"median": statistics.median(times),
|
||||||
|
"stdev": statistics.stdev(times) if len(times) > 1 else 0,
|
||||||
|
"p95": sorted(times)[int(len(times) * 0.95)] if len(times) > 1 else times[0],
|
||||||
|
"row_count": row_count,
|
||||||
|
"throughput": (row_count / (statistics.mean(times) / 1000)) if times and statistics.mean(times) > 0 else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
def save_results(self, results: Dict[str, Any], output_file: Optional[str] = None) -> str:
|
||||||
|
"""Save benchmark results to file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: Benchmark results
|
||||||
|
output_file: Output file path (uses default from config if None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to output file
|
||||||
|
"""
|
||||||
|
if output_file is None:
|
||||||
|
output_dir = Path(self.settings.benchmark.output_dir)
|
||||||
|
output_dir.mkdir(exist_ok=True)
|
||||||
|
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||||
|
output_file = output_dir / f"benchmark_{timestamp}.json"
|
||||||
|
else:
|
||||||
|
output_file = Path(output_file)
|
||||||
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
json.dump(results, f, indent=2)
|
||||||
|
logger.info(f"Benchmark results saved to {output_file}")
|
||||||
|
return str(output_file)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save results: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def print_results(results: Dict[str, Any]) -> None:
|
||||||
|
"""Print benchmark results in a readable format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: Benchmark results
|
||||||
|
"""
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
for table_name, table_results in results.get("tables", {}).items():
|
||||||
|
console.print(f"\n[bold]{table_name}[/bold]")
|
||||||
|
|
||||||
|
for category, query_result in table_results.items():
|
||||||
|
mysql_result = query_result.get("mysql")
|
||||||
|
pg_result = query_result.get("postgres")
|
||||||
|
|
||||||
|
console.print(f"\n {category}:")
|
||||||
|
|
||||||
|
if mysql_result and "mean" in mysql_result:
|
||||||
|
console.print(
|
||||||
|
f" MySQL: {mysql_result['mean']:.2f}ms "
|
||||||
|
f"(min: {mysql_result['min']:.2f}ms, max: {mysql_result['max']:.2f}ms)"
|
||||||
|
)
|
||||||
|
|
||||||
|
if pg_result and "mean" in pg_result:
|
||||||
|
speedup = mysql_result['mean'] / pg_result['mean'] if mysql_result and 'mean' in mysql_result else 0
|
||||||
|
console.print(
|
||||||
|
f" PostgreSQL: {pg_result['mean']:.2f}ms "
|
||||||
|
f"(min: {pg_result['min']:.2f}ms, max: {pg_result['max']:.2f}ms)"
|
||||||
|
)
|
||||||
|
if speedup:
|
||||||
|
if speedup > 1:
|
||||||
|
console.print(f" [green]✓ PostgreSQL is {speedup:.1f}x faster[/green]")
|
||||||
|
else:
|
||||||
|
console.print(f" [yellow]⚠ MySQL is {1/speedup:.1f}x faster[/yellow]")
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(iterations: Optional[int] = None, output_file: Optional[str] = None) -> str:
|
||||||
|
"""Run performance benchmark and save results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
iterations: Number of iterations per query
|
||||||
|
output_file: Output file path
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to results file
|
||||||
|
"""
|
||||||
|
if iterations is None:
|
||||||
|
settings = get_settings()
|
||||||
|
iterations = settings.benchmark.iterations
|
||||||
|
|
||||||
|
benchmark = PerformanceBenchmark(iterations=iterations)
|
||||||
|
results = benchmark.run_all_benchmarks()
|
||||||
|
benchmark.print_results(results)
|
||||||
|
return benchmark.save_results(results, output_file)
|
||||||
173
src/benchmark/query_generator.py
Normal file
173
src/benchmark/query_generator.py
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
"""Benchmark query generator for MySQL and PostgreSQL."""
|
||||||
|
from typing import List, Dict, Tuple, Any
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
class BenchmarkQueryGenerator:
|
||||||
|
"""Generate benchmark queries for performance testing."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_rawdatacor_queries() -> Dict[str, List[Tuple[str, str]]]:
|
||||||
|
"""Generate benchmark queries for RAWDATACOR table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with query categories and (mysql_query, postgres_query) tuples
|
||||||
|
"""
|
||||||
|
# Sample data for queries
|
||||||
|
sample_unit_name = "Unit1"
|
||||||
|
sample_tool_name = "Tool1"
|
||||||
|
sample_node_num = 1
|
||||||
|
sample_date_start = "2024-01-01"
|
||||||
|
sample_date_end = "2024-01-31"
|
||||||
|
|
||||||
|
queries = {
|
||||||
|
"select_by_pk": [
|
||||||
|
(
|
||||||
|
"SELECT * FROM `RAWDATACOR` WHERE `id` = 1000 AND `EventDate` = '2024-01-15'",
|
||||||
|
"SELECT * FROM rawdatacor WHERE id = 1000 AND event_date = '2024-01-15'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"select_by_date_range": [
|
||||||
|
(
|
||||||
|
f"SELECT * FROM `RAWDATACOR` WHERE `EventDate` BETWEEN '{sample_date_start}' AND '{sample_date_end}'",
|
||||||
|
f"SELECT * FROM rawdatacor WHERE event_date BETWEEN '{sample_date_start}' AND '{sample_date_end}'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"select_by_unit_tool": [
|
||||||
|
(
|
||||||
|
f"SELECT * FROM `RAWDATACOR` WHERE `UnitName` = '{sample_unit_name}' AND `ToolNameID` = '{sample_tool_name}'",
|
||||||
|
f"SELECT * FROM rawdatacor WHERE unit_name = '{sample_unit_name}' AND tool_name_id = '{sample_tool_name}'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"select_count_by_unit": [
|
||||||
|
(
|
||||||
|
f"SELECT COUNT(*) FROM `RAWDATACOR` WHERE `UnitName` = '{sample_unit_name}'",
|
||||||
|
f"SELECT COUNT(*) FROM rawdatacor WHERE unit_name = '{sample_unit_name}'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"jsonb_filter_value": [
|
||||||
|
(
|
||||||
|
None, # Not applicable for MySQL
|
||||||
|
f"SELECT * FROM rawdatacor WHERE measurements->>'0'->>'value' IS NOT NULL LIMIT 1000"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"jsonb_contains": [
|
||||||
|
(
|
||||||
|
None, # Not applicable for MySQL
|
||||||
|
f"SELECT * FROM rawdatacor WHERE measurements ? '0' LIMIT 1000"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"aggregate_by_date": [
|
||||||
|
(
|
||||||
|
"SELECT `EventDate`, COUNT(*) as count FROM `RAWDATACOR` GROUP BY `EventDate` ORDER BY `EventDate`",
|
||||||
|
"SELECT event_date, COUNT(*) as count FROM rawdatacor GROUP BY event_date ORDER BY event_date"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"aggregate_with_filter": [
|
||||||
|
(
|
||||||
|
f"SELECT `UnitName`, `ToolNameID`, COUNT(*) as count FROM `RAWDATACOR` WHERE `EventDate` >= '{sample_date_start}' GROUP BY `UnitName`, `ToolNameID`",
|
||||||
|
f"SELECT unit_name, tool_name_id, COUNT(*) as count FROM rawdatacor WHERE event_date >= '{sample_date_start}' GROUP BY unit_name, tool_name_id"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
return queries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_elabdatadisp_queries() -> Dict[str, List[Tuple[str, str]]]:
|
||||||
|
"""Generate benchmark queries for ELABDATADISP table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with query categories and (mysql_query, postgres_query) tuples
|
||||||
|
"""
|
||||||
|
sample_unit_name = "Unit1"
|
||||||
|
sample_tool_name = "Tool1"
|
||||||
|
sample_date_start = "2024-01-01"
|
||||||
|
sample_date_end = "2024-01-31"
|
||||||
|
|
||||||
|
queries = {
|
||||||
|
"select_by_pk": [
|
||||||
|
(
|
||||||
|
"SELECT * FROM `ELABDATADISP` WHERE `idElabData` = 5000 AND `EventDate` = '2024-01-15'",
|
||||||
|
"SELECT * FROM elabdatadisp WHERE id_elab_data = 5000 AND event_date = '2024-01-15'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"select_by_date_range": [
|
||||||
|
(
|
||||||
|
f"SELECT * FROM `ELABDATADISP` WHERE `EventDate` BETWEEN '{sample_date_start}' AND '{sample_date_end}'",
|
||||||
|
f"SELECT * FROM elabdatadisp WHERE event_date BETWEEN '{sample_date_start}' AND '{sample_date_end}'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"select_by_unit_tool": [
|
||||||
|
(
|
||||||
|
f"SELECT * FROM `ELABDATADISP` WHERE `UnitName` = '{sample_unit_name}' AND `ToolNameID` = '{sample_tool_name}'",
|
||||||
|
f"SELECT * FROM elabdatadisp WHERE unit_name = '{sample_unit_name}' AND tool_name_id = '{sample_tool_name}'"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"jsonb_filter_speed": [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
f"SELECT * FROM elabdatadisp WHERE measurements->'kinematics'->>'speed' IS NOT NULL LIMIT 1000"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"jsonb_range_query": [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
f"SELECT * FROM elabdatadisp WHERE (measurements->'kinematics'->>'speed')::NUMERIC > 1.0 LIMIT 1000"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"jsonb_nested_contains": [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
f"SELECT * FROM elabdatadisp WHERE measurements @> '{{\"kinematics\"{{}}}}' LIMIT 1000"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"aggregate_measurements": [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
f"SELECT unit_name, AVG((measurements->'kinematics'->>'speed')::NUMERIC) as avg_speed FROM elabdatadisp WHERE event_date >= '{sample_date_start}' GROUP BY unit_name LIMIT 100"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"count_by_state": [
|
||||||
|
(
|
||||||
|
f"SELECT `State`, COUNT(*) as count FROM `ELABDATADISP` GROUP BY `State`",
|
||||||
|
f"SELECT state, COUNT(*) as count FROM elabdatadisp GROUP BY state"
|
||||||
|
)
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
return queries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def generate_insert_queries() -> Dict[str, Tuple[str, str]]:
|
||||||
|
"""Generate INSERT benchmark queries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with (mysql_query, postgres_query) tuples
|
||||||
|
"""
|
||||||
|
# These are placeholders - actual queries would be generated based on schema
|
||||||
|
queries = {
|
||||||
|
"insert_single_rawdatacor": (
|
||||||
|
"INSERT INTO `RAWDATACOR` (`UnitName`, `ToolNameID`, `NodeNum`, `EventDate`, `EventTime`, `BatLevel`, `Temperature`) VALUES ('Unit1', 'Tool1', 1, '2024-01-01', '12:00:00', 3.5, 25.5)",
|
||||||
|
"INSERT INTO rawdatacor (unit_name, tool_name_id, node_num, event_date, event_time, bat_level, temperature, measurements) VALUES ('Unit1', 'Tool1', 1, '2024-01-01', '12:00:00', 3.5, 25.5, '{}')"
|
||||||
|
),
|
||||||
|
"insert_single_elabdatadisp": (
|
||||||
|
"INSERT INTO `ELABDATADISP` (`UnitName`, `ToolNameID`, `NodeNum`, `EventDate`, `EventTime`) VALUES ('Unit1', 'Tool1', 1, '2024-01-01', '12:00:00')",
|
||||||
|
"INSERT INTO elabdatadisp (unit_name, tool_name_id, node_num, event_date, event_time, measurements) VALUES ('Unit1', 'Tool1', 1, '2024-01-01', '12:00:00', '{}')"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
return queries
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_all_benchmark_queries() -> Dict[str, Dict[str, List[Tuple[str, str]]]]:
|
||||||
|
"""Get all benchmark queries organized by table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with table names as keys and query dictionaries as values
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"RAWDATACOR": BenchmarkQueryGenerator.generate_rawdatacor_queries(),
|
||||||
|
"ELABDATADISP": BenchmarkQueryGenerator.generate_elabdatadisp_queries(),
|
||||||
|
}
|
||||||
0
src/connectors/__init__.py
Normal file
0
src/connectors/__init__.py
Normal file
166
src/connectors/mysql_connector.py
Normal file
166
src/connectors/mysql_connector.py
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
"""MySQL database connector."""
|
||||||
|
import pymysql
|
||||||
|
from typing import List, Dict, Any, Optional, Generator
|
||||||
|
from config import get_settings
|
||||||
|
from src.utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class MySQLConnector:
|
||||||
|
"""Connector for MySQL database."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize MySQL connector with settings."""
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.connection = None
|
||||||
|
|
||||||
|
def connect(self) -> None:
|
||||||
|
"""Establish connection to MySQL database."""
|
||||||
|
try:
|
||||||
|
self.connection = pymysql.connect(
|
||||||
|
host=self.settings.mysql.host,
|
||||||
|
port=self.settings.mysql.port,
|
||||||
|
user=self.settings.mysql.user,
|
||||||
|
password=self.settings.mysql.password,
|
||||||
|
database=self.settings.mysql.database,
|
||||||
|
charset="utf8mb4",
|
||||||
|
cursorclass=pymysql.cursors.DictCursor,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Connected to MySQL: {self.settings.mysql.host}:"
|
||||||
|
f"{self.settings.mysql.port}/{self.settings.mysql.database}"
|
||||||
|
)
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error(f"Failed to connect to MySQL: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def disconnect(self) -> None:
|
||||||
|
"""Close connection to MySQL database."""
|
||||||
|
if self.connection:
|
||||||
|
self.connection.close()
|
||||||
|
logger.info("Disconnected from MySQL")
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""Context manager entry."""
|
||||||
|
self.connect()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Context manager exit."""
|
||||||
|
self.disconnect()
|
||||||
|
|
||||||
|
def get_row_count(self, table: str) -> int:
|
||||||
|
"""Get total row count for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows in the table
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(f"SELECT COUNT(*) as count FROM `{table}`")
|
||||||
|
result = cursor.fetchone()
|
||||||
|
return result["count"]
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error(f"Failed to get row count for {table}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def fetch_all_rows(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
batch_size: Optional[int] = None
|
||||||
|
) -> Generator[List[Dict[str, Any]], None, None]:
|
||||||
|
"""Fetch all rows from a table in batches.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
batch_size: Number of rows per batch (uses config default if None)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Batches of row dictionaries
|
||||||
|
"""
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.settings.migration.batch_size
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
query = f"SELECT * FROM `{table}` LIMIT %s OFFSET %s"
|
||||||
|
cursor.execute(query, (batch_size, offset))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield rows
|
||||||
|
offset += len(rows)
|
||||||
|
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error(f"Failed to fetch rows from {table}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def fetch_rows_since(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
since_timestamp: str,
|
||||||
|
batch_size: Optional[int] = None
|
||||||
|
) -> Generator[List[Dict[str, Any]], None, None]:
|
||||||
|
"""Fetch rows modified since a timestamp.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
since_timestamp: ISO format timestamp (e.g., '2024-01-01T00:00:00')
|
||||||
|
batch_size: Number of rows per batch (uses config default if None)
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Batches of row dictionaries
|
||||||
|
"""
|
||||||
|
if batch_size is None:
|
||||||
|
batch_size = self.settings.migration.batch_size
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
timestamp_col = "updated_at" if table == "ELABDATADISP" else "created_at"
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
query = (
|
||||||
|
f"SELECT * FROM `{table}` "
|
||||||
|
f"WHERE `{timestamp_col}` > %s "
|
||||||
|
f"ORDER BY `{timestamp_col}` ASC "
|
||||||
|
f"LIMIT %s OFFSET %s"
|
||||||
|
)
|
||||||
|
cursor.execute(query, (since_timestamp, batch_size, offset))
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
|
||||||
|
yield rows
|
||||||
|
offset += len(rows)
|
||||||
|
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error(f"Failed to fetch rows from {table}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_table_structure(self, table: str) -> Dict[str, Any]:
|
||||||
|
"""Get table structure (column info).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with column information
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(f"DESCRIBE `{table}`")
|
||||||
|
columns = cursor.fetchall()
|
||||||
|
return {col["Field"]: col for col in columns}
|
||||||
|
except pymysql.Error as e:
|
||||||
|
logger.error(f"Failed to get structure for {table}: {e}")
|
||||||
|
raise
|
||||||
200
src/connectors/postgres_connector.py
Normal file
200
src/connectors/postgres_connector.py
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
"""PostgreSQL database connector."""
|
||||||
|
import psycopg
|
||||||
|
from typing import List, Dict, Any, Optional, Iterator
|
||||||
|
from psycopg import sql
|
||||||
|
import json
|
||||||
|
from config import get_settings
|
||||||
|
from src.utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class PostgreSQLConnector:
|
||||||
|
"""Connector for PostgreSQL database."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize PostgreSQL connector with settings."""
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.connection = None
|
||||||
|
|
||||||
|
def connect(self) -> None:
|
||||||
|
"""Establish connection to PostgreSQL database."""
|
||||||
|
try:
|
||||||
|
self.connection = psycopg.connect(
|
||||||
|
host=self.settings.postgres.host,
|
||||||
|
port=self.settings.postgres.port,
|
||||||
|
user=self.settings.postgres.user,
|
||||||
|
password=self.settings.postgres.password,
|
||||||
|
dbname=self.settings.postgres.database,
|
||||||
|
autocommit=False,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Connected to PostgreSQL: {self.settings.postgres.host}:"
|
||||||
|
f"{self.settings.postgres.port}/{self.settings.postgres.database}"
|
||||||
|
)
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logger.error(f"Failed to connect to PostgreSQL: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def disconnect(self) -> None:
|
||||||
|
"""Close connection to PostgreSQL database."""
|
||||||
|
if self.connection:
|
||||||
|
self.connection.close()
|
||||||
|
logger.info("Disconnected from PostgreSQL")
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""Context manager entry."""
|
||||||
|
self.connect()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Context manager exit."""
|
||||||
|
self.disconnect()
|
||||||
|
|
||||||
|
def execute(self, query: str, params: Optional[tuple] = None) -> None:
|
||||||
|
"""Execute a query without returning results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: SQL query
|
||||||
|
params: Query parameters
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(query, params)
|
||||||
|
self.connection.commit()
|
||||||
|
except psycopg.Error as e:
|
||||||
|
self.connection.rollback()
|
||||||
|
logger.error(f"Query execution failed: {e}\nQuery: {query}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def execute_script(self, script: str) -> None:
|
||||||
|
"""Execute multiple SQL statements (script).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
script: SQL script with multiple statements
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(script)
|
||||||
|
self.connection.commit()
|
||||||
|
logger.debug("Script executed successfully")
|
||||||
|
except psycopg.Error as e:
|
||||||
|
self.connection.rollback()
|
||||||
|
logger.error(f"Script execution failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def insert_batch(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
rows: List[Dict[str, Any]],
|
||||||
|
columns: List[str]
|
||||||
|
) -> int:
|
||||||
|
"""Insert a batch of rows using COPY (fast bulk insert).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
rows: List of row dictionaries
|
||||||
|
columns: Column names in order
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows inserted
|
||||||
|
"""
|
||||||
|
if not rows:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
# Prepare COPY data
|
||||||
|
copy_data = []
|
||||||
|
for row in rows:
|
||||||
|
values = []
|
||||||
|
for col in columns:
|
||||||
|
val = row.get(col)
|
||||||
|
if val is None:
|
||||||
|
values.append("\\N") # NULL representation
|
||||||
|
elif isinstance(val, (dict, list)):
|
||||||
|
values.append(json.dumps(val))
|
||||||
|
elif isinstance(val, str):
|
||||||
|
# Escape special characters
|
||||||
|
val = val.replace("\\", "\\\\").replace("\n", "\\n").replace("\t", "\\t")
|
||||||
|
values.append(val)
|
||||||
|
else:
|
||||||
|
values.append(str(val))
|
||||||
|
copy_data.append("\t".join(values))
|
||||||
|
|
||||||
|
# Use COPY for fast insert
|
||||||
|
copy_sql = f"COPY {table} ({','.join(columns)}) FROM STDIN"
|
||||||
|
cursor.copy(copy_sql, "\n".join(copy_data).encode())
|
||||||
|
self.connection.commit()
|
||||||
|
|
||||||
|
logger.debug(f"Inserted {len(rows)} rows into {table}")
|
||||||
|
return len(rows)
|
||||||
|
|
||||||
|
except psycopg.Error as e:
|
||||||
|
self.connection.rollback()
|
||||||
|
logger.error(f"Batch insert failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def table_exists(self, table: str) -> bool:
|
||||||
|
"""Check if a table exists.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if table exists, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"SELECT EXISTS("
|
||||||
|
" SELECT 1 FROM information_schema.tables "
|
||||||
|
" WHERE table_name = %s"
|
||||||
|
")",
|
||||||
|
(table,)
|
||||||
|
)
|
||||||
|
return cursor.fetchone()[0]
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logger.error(f"Failed to check if table exists: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_max_timestamp(
|
||||||
|
self,
|
||||||
|
table: str,
|
||||||
|
timestamp_col: str = "created_at"
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Get the maximum timestamp from a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
timestamp_col: Timestamp column name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ISO format timestamp or None if table is empty
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
query = f"SELECT MAX({timestamp_col})::text FROM {table}"
|
||||||
|
cursor.execute(query)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
return result[0] if result and result[0] else None
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logger.error(f"Failed to get max timestamp: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_row_count(self, table: str) -> int:
|
||||||
|
"""Get row count for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows in the table
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(f"SELECT COUNT(*) FROM {table}")
|
||||||
|
return cursor.fetchone()[0]
|
||||||
|
except psycopg.Error as e:
|
||||||
|
logger.error(f"Failed to get row count: {e}")
|
||||||
|
raise
|
||||||
0
src/migrator/__init__.py
Normal file
0
src/migrator/__init__.py
Normal file
149
src/migrator/full_migration.py
Normal file
149
src/migrator/full_migration.py
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
"""Full migration from MySQL to PostgreSQL."""
|
||||||
|
from typing import Optional
|
||||||
|
from datetime import datetime
|
||||||
|
import json
|
||||||
|
|
||||||
|
from config import get_settings, TABLE_CONFIGS
|
||||||
|
from src.connectors.mysql_connector import MySQLConnector
|
||||||
|
from src.connectors.postgres_connector import PostgreSQLConnector
|
||||||
|
from src.transformers.data_transformer import DataTransformer
|
||||||
|
from src.utils.logger import get_logger, setup_logger
|
||||||
|
from src.utils.progress import ProgressTracker
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class FullMigrator:
|
||||||
|
"""Perform full migration of a table from MySQL to PostgreSQL."""
|
||||||
|
|
||||||
|
def __init__(self, table: str):
|
||||||
|
"""Initialize migrator for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name to migrate ('RAWDATACOR' or 'ELABDATADISP')
|
||||||
|
"""
|
||||||
|
if table not in TABLE_CONFIGS:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
|
|
||||||
|
self.table = table
|
||||||
|
self.config = TABLE_CONFIGS[table]
|
||||||
|
self.settings = get_settings()
|
||||||
|
|
||||||
|
def migrate(self, dry_run: bool = False) -> int:
|
||||||
|
"""Perform full migration of the table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dry_run: If True, log what would be done but don't modify data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Total number of rows migrated
|
||||||
|
"""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
mysql_table = self.config["mysql_table"]
|
||||||
|
pg_table = self.config["postgres_table"]
|
||||||
|
|
||||||
|
logger.info(f"Starting full migration of {mysql_table} -> {pg_table}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with MySQLConnector() as mysql_conn:
|
||||||
|
# Get total row count
|
||||||
|
total_rows = mysql_conn.get_row_count(mysql_table)
|
||||||
|
logger.info(f"Total rows to migrate: {total_rows}")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
logger.info("[DRY RUN] Would migrate all rows")
|
||||||
|
return total_rows
|
||||||
|
|
||||||
|
with PostgreSQLConnector() as pg_conn:
|
||||||
|
# Check if table exists
|
||||||
|
if not pg_conn.table_exists(pg_table):
|
||||||
|
raise ValueError(
|
||||||
|
f"PostgreSQL table {pg_table} does not exist. "
|
||||||
|
"Run 'setup --create-schema' first."
|
||||||
|
)
|
||||||
|
|
||||||
|
migrated = 0
|
||||||
|
|
||||||
|
with ProgressTracker(
|
||||||
|
total_rows,
|
||||||
|
f"Migrating {mysql_table}"
|
||||||
|
) as progress:
|
||||||
|
# Fetch and migrate rows in batches
|
||||||
|
for batch in mysql_conn.fetch_all_rows(mysql_table):
|
||||||
|
# Transform batch
|
||||||
|
transformed = DataTransformer.transform_batch(
|
||||||
|
mysql_table,
|
||||||
|
batch
|
||||||
|
)
|
||||||
|
|
||||||
|
# Insert batch
|
||||||
|
columns = DataTransformer.get_column_order(pg_table)
|
||||||
|
inserted = pg_conn.insert_batch(
|
||||||
|
pg_table,
|
||||||
|
transformed,
|
||||||
|
columns
|
||||||
|
)
|
||||||
|
|
||||||
|
migrated += inserted
|
||||||
|
progress.update(inserted)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"✓ Migration complete: {migrated} rows migrated "
|
||||||
|
f"to {pg_table}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update migration state
|
||||||
|
self._update_migration_state(pg_conn, migrated)
|
||||||
|
|
||||||
|
return migrated
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Migration failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _update_migration_state(
|
||||||
|
self,
|
||||||
|
pg_conn: PostgreSQLConnector,
|
||||||
|
rows_migrated: int
|
||||||
|
) -> None:
|
||||||
|
"""Update migration state tracking table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pg_conn: PostgreSQL connection
|
||||||
|
rows_migrated: Number of rows migrated
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
pg_table = self.config["postgres_table"]
|
||||||
|
query = f"""
|
||||||
|
INSERT INTO migration_state
|
||||||
|
(table_name, last_migrated_timestamp, total_rows_migrated, migration_completed_at, status)
|
||||||
|
VALUES (%s, %s, %s, %s, %s)
|
||||||
|
ON CONFLICT (table_name) DO UPDATE SET
|
||||||
|
last_migrated_timestamp = EXCLUDED.last_migrated_timestamp,
|
||||||
|
total_rows_migrated = EXCLUDED.total_rows_migrated,
|
||||||
|
migration_completed_at = EXCLUDED.migration_completed_at,
|
||||||
|
status = EXCLUDED.status
|
||||||
|
"""
|
||||||
|
now = datetime.utcnow()
|
||||||
|
pg_conn.execute(query, (pg_table, now, rows_migrated, now, "completed"))
|
||||||
|
logger.debug("Migration state updated")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to update migration state: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def run_full_migration(
|
||||||
|
table: str,
|
||||||
|
dry_run: bool = False
|
||||||
|
) -> int:
|
||||||
|
"""Run full migration for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name to migrate
|
||||||
|
dry_run: If True, show what would be done without modifying data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows migrated
|
||||||
|
"""
|
||||||
|
migrator = FullMigrator(table)
|
||||||
|
return migrator.migrate(dry_run=dry_run)
|
||||||
155
src/migrator/incremental_migration.py
Normal file
155
src/migrator/incremental_migration.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
"""Incremental migration from MySQL to PostgreSQL based on timestamps."""
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from config import get_settings, TABLE_CONFIGS
|
||||||
|
from src.connectors.mysql_connector import MySQLConnector
|
||||||
|
from src.connectors.postgres_connector import PostgreSQLConnector
|
||||||
|
from src.transformers.data_transformer import DataTransformer
|
||||||
|
from src.utils.logger import get_logger, setup_logger
|
||||||
|
from src.utils.progress import ProgressTracker
|
||||||
|
from src.migrator.state import MigrationState
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class IncrementalMigrator:
|
||||||
|
"""Perform incremental migration based on timestamps."""
|
||||||
|
|
||||||
|
def __init__(self, table: str, state_file: str = "migration_state.json"):
|
||||||
|
"""Initialize incremental migrator.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name to migrate
|
||||||
|
state_file: Path to migration state file
|
||||||
|
"""
|
||||||
|
if table not in TABLE_CONFIGS:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
|
|
||||||
|
self.table = table
|
||||||
|
self.config = TABLE_CONFIGS[table]
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.state = MigrationState(state_file)
|
||||||
|
|
||||||
|
def migrate(self, dry_run: bool = False) -> int:
|
||||||
|
"""Perform incremental migration since last sync.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dry_run: If True, log what would be done but don't modify data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows migrated
|
||||||
|
"""
|
||||||
|
setup_logger(__name__)
|
||||||
|
|
||||||
|
mysql_table = self.config["mysql_table"]
|
||||||
|
pg_table = self.config["postgres_table"]
|
||||||
|
|
||||||
|
# Get last migration timestamp
|
||||||
|
last_timestamp = self.state.get_last_timestamp(pg_table)
|
||||||
|
|
||||||
|
if last_timestamp is None:
|
||||||
|
logger.info(
|
||||||
|
f"No previous migration found for {pg_table}. "
|
||||||
|
"Use 'migrate --full' for initial migration."
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Starting incremental migration of {mysql_table} -> {pg_table} "
|
||||||
|
f"since {last_timestamp}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with MySQLConnector() as mysql_conn:
|
||||||
|
# Count rows to migrate
|
||||||
|
timestamp_col = "updated_at" if mysql_table == "ELABDATADISP" else "created_at"
|
||||||
|
|
||||||
|
with PostgreSQLConnector() as pg_conn:
|
||||||
|
# Get max timestamp from PostgreSQL
|
||||||
|
pg_max_timestamp = pg_conn.get_max_timestamp(
|
||||||
|
pg_table,
|
||||||
|
timestamp_col
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Last timestamp in PostgreSQL: {pg_max_timestamp}")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
logger.info("[DRY RUN] Would migrate rows after timestamp")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
migrated = 0
|
||||||
|
migration_start_time = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
# Fetch and migrate rows in batches
|
||||||
|
batch_count = 0
|
||||||
|
for batch in mysql_conn.fetch_rows_since(
|
||||||
|
mysql_table,
|
||||||
|
last_timestamp
|
||||||
|
):
|
||||||
|
batch_count += 1
|
||||||
|
|
||||||
|
if batch_count == 1:
|
||||||
|
# Create progress tracker with unknown total
|
||||||
|
progress = ProgressTracker(
|
||||||
|
len(batch),
|
||||||
|
f"Migrating {mysql_table} (incremental)"
|
||||||
|
)
|
||||||
|
progress.__enter__()
|
||||||
|
|
||||||
|
# Transform batch
|
||||||
|
transformed = DataTransformer.transform_batch(
|
||||||
|
mysql_table,
|
||||||
|
batch
|
||||||
|
)
|
||||||
|
|
||||||
|
# Insert batch
|
||||||
|
columns = DataTransformer.get_column_order(pg_table)
|
||||||
|
inserted = pg_conn.insert_batch(
|
||||||
|
pg_table,
|
||||||
|
transformed,
|
||||||
|
columns
|
||||||
|
)
|
||||||
|
|
||||||
|
migrated += inserted
|
||||||
|
progress.update(inserted)
|
||||||
|
|
||||||
|
if batch_count == 0:
|
||||||
|
logger.info(f"No new rows to migrate for {mysql_table}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
progress.__exit__(None, None, None)
|
||||||
|
|
||||||
|
# Update migration state
|
||||||
|
self.state.set_last_timestamp(pg_table, migration_start_time)
|
||||||
|
self.state.increment_migration_count(pg_table, migrated)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"✓ Incremental migration complete: {migrated} rows migrated "
|
||||||
|
f"to {pg_table}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return migrated
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Incremental migration failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def run_incremental_migration(
|
||||||
|
table: str,
|
||||||
|
dry_run: bool = False,
|
||||||
|
state_file: str = "migration_state.json"
|
||||||
|
) -> int:
|
||||||
|
"""Run incremental migration for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name to migrate
|
||||||
|
dry_run: If True, show what would be done without modifying data
|
||||||
|
state_file: Path to migration state file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of rows migrated
|
||||||
|
"""
|
||||||
|
migrator = IncrementalMigrator(table, state_file)
|
||||||
|
return migrator.migrate(dry_run=dry_run)
|
||||||
105
src/migrator/state.py
Normal file
105
src/migrator/state.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
"""Migration state management."""
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
from src.utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class MigrationState:
|
||||||
|
"""Manage migration state for incremental migrations."""
|
||||||
|
|
||||||
|
DEFAULT_STATE_FILE = "migration_state.json"
|
||||||
|
|
||||||
|
def __init__(self, state_file: str = DEFAULT_STATE_FILE):
|
||||||
|
"""Initialize migration state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state_file: Path to state file
|
||||||
|
"""
|
||||||
|
self.state_file = Path(state_file)
|
||||||
|
self.state = self._load_state()
|
||||||
|
|
||||||
|
def _load_state(self) -> Dict[str, Any]:
|
||||||
|
"""Load state from file."""
|
||||||
|
if self.state_file.exists():
|
||||||
|
try:
|
||||||
|
with open(self.state_file, "r") as f:
|
||||||
|
return json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load state file: {e}")
|
||||||
|
return {}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _save_state(self) -> None:
|
||||||
|
"""Save state to file."""
|
||||||
|
try:
|
||||||
|
with open(self.state_file, "w") as f:
|
||||||
|
json.dump(self.state, f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save state file: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_last_timestamp(self, table: str) -> Optional[str]:
|
||||||
|
"""Get last migration timestamp for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ISO format timestamp or None if not found
|
||||||
|
"""
|
||||||
|
return self.state.get(table, {}).get("last_timestamp")
|
||||||
|
|
||||||
|
def set_last_timestamp(self, table: str, timestamp: str) -> None:
|
||||||
|
"""Set last migration timestamp for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
timestamp: ISO format timestamp
|
||||||
|
"""
|
||||||
|
if table not in self.state:
|
||||||
|
self.state[table] = {}
|
||||||
|
|
||||||
|
self.state[table]["last_timestamp"] = timestamp
|
||||||
|
self.state[table]["last_updated"] = datetime.utcnow().isoformat()
|
||||||
|
self._save_state()
|
||||||
|
|
||||||
|
def get_migration_count(self, table: str) -> int:
|
||||||
|
"""Get total migration count for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Total rows migrated
|
||||||
|
"""
|
||||||
|
return self.state.get(table, {}).get("total_migrated", 0)
|
||||||
|
|
||||||
|
def increment_migration_count(self, table: str, count: int) -> None:
|
||||||
|
"""Increment migration count for a table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name
|
||||||
|
count: Number of rows to add
|
||||||
|
"""
|
||||||
|
if table not in self.state:
|
||||||
|
self.state[table] = {}
|
||||||
|
|
||||||
|
current = self.state[table].get("total_migrated", 0)
|
||||||
|
self.state[table]["total_migrated"] = current + count
|
||||||
|
self._save_state()
|
||||||
|
|
||||||
|
def reset(self, table: Optional[str] = None) -> None:
|
||||||
|
"""Reset migration state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name to reset, or None to reset all
|
||||||
|
"""
|
||||||
|
if table:
|
||||||
|
self.state[table] = {}
|
||||||
|
else:
|
||||||
|
self.state = {}
|
||||||
|
self._save_state()
|
||||||
0
src/transformers/__init__.py
Normal file
0
src/transformers/__init__.py
Normal file
178
src/transformers/data_transformer.py
Normal file
178
src/transformers/data_transformer.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
"""Data transformation from MySQL to PostgreSQL format."""
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
from datetime import datetime
|
||||||
|
from config import (
|
||||||
|
RAWDATACOR_COLUMNS,
|
||||||
|
ELABDATADISP_FIELD_MAPPING,
|
||||||
|
TABLE_CONFIGS,
|
||||||
|
)
|
||||||
|
from src.utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DataTransformer:
|
||||||
|
"""Transform MySQL data to PostgreSQL format."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_rawdatacor_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Transform a RAWDATACOR row from MySQL to PostgreSQL format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mysql_row: Row dictionary from MySQL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transformed row dictionary for PostgreSQL
|
||||||
|
"""
|
||||||
|
# Create measurements JSONB
|
||||||
|
measurements = {}
|
||||||
|
|
||||||
|
# Map Val0-ValF with their units
|
||||||
|
for i, val_col in enumerate(RAWDATACOR_COLUMNS["val_columns"]):
|
||||||
|
unit_col = RAWDATACOR_COLUMNS["unit_columns"][i]
|
||||||
|
|
||||||
|
value = mysql_row.get(val_col)
|
||||||
|
unit = mysql_row.get(unit_col)
|
||||||
|
|
||||||
|
# Only add to JSONB if value is not None
|
||||||
|
if value is not None:
|
||||||
|
measurements[str(i)] = {
|
||||||
|
"value": str(value),
|
||||||
|
"unit": unit if unit else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create PostgreSQL row
|
||||||
|
pg_row = {
|
||||||
|
"id": mysql_row["id"],
|
||||||
|
"unit_name": mysql_row.get("UnitName"),
|
||||||
|
"tool_name_id": mysql_row["ToolNameID"],
|
||||||
|
"node_num": mysql_row["NodeNum"],
|
||||||
|
"event_date": mysql_row["EventDate"],
|
||||||
|
"event_time": mysql_row["EventTime"],
|
||||||
|
"bat_level": mysql_row["BatLevel"],
|
||||||
|
"temperature": mysql_row["Temperature"],
|
||||||
|
"measurements": measurements,
|
||||||
|
"created_at": mysql_row.get("created_at"),
|
||||||
|
"bat_level_module": mysql_row.get("BatLevelModule"),
|
||||||
|
"temperature_module": mysql_row.get("TemperatureModule"),
|
||||||
|
"rssi_module": mysql_row.get("RssiModule"),
|
||||||
|
}
|
||||||
|
|
||||||
|
return pg_row
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_elabdatadisp_row(mysql_row: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Transform an ELABDATADISP row from MySQL to PostgreSQL format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mysql_row: Row dictionary from MySQL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Transformed row dictionary for PostgreSQL
|
||||||
|
"""
|
||||||
|
# Create measurements JSONB with structured categories
|
||||||
|
measurements = {
|
||||||
|
"shifts": {},
|
||||||
|
"coordinates": {},
|
||||||
|
"kinematics": {},
|
||||||
|
"sensors": {},
|
||||||
|
"calculated": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Map all measurement fields using the configuration
|
||||||
|
for mysql_col, (category, pg_key) in ELABDATADISP_FIELD_MAPPING.items():
|
||||||
|
value = mysql_row.get(mysql_col)
|
||||||
|
if value is not None:
|
||||||
|
measurements[category][pg_key] = float(value) if isinstance(value, str) else value
|
||||||
|
|
||||||
|
# Remove empty categories
|
||||||
|
measurements = {
|
||||||
|
k: v for k, v in measurements.items() if v
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create PostgreSQL row
|
||||||
|
pg_row = {
|
||||||
|
"id_elab_data": mysql_row["idElabData"],
|
||||||
|
"unit_name": mysql_row.get("UnitName"),
|
||||||
|
"tool_name_id": mysql_row["ToolNameID"],
|
||||||
|
"node_num": mysql_row["NodeNum"],
|
||||||
|
"event_date": mysql_row["EventDate"],
|
||||||
|
"event_time": mysql_row["EventTime"],
|
||||||
|
"state": mysql_row.get("State"),
|
||||||
|
"calc_err": mysql_row.get("calcerr", 0),
|
||||||
|
"measurements": measurements,
|
||||||
|
"created_at": mysql_row.get("created_at"),
|
||||||
|
"updated_at": mysql_row.get("updated_at"),
|
||||||
|
}
|
||||||
|
|
||||||
|
return pg_row
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def transform_batch(
|
||||||
|
table: str,
|
||||||
|
rows: List[Dict[str, Any]]
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Transform a batch of rows from MySQL to PostgreSQL format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: Table name ('RAWDATACOR' or 'ELABDATADISP')
|
||||||
|
rows: List of row dictionaries from MySQL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of transformed row dictionaries for PostgreSQL
|
||||||
|
"""
|
||||||
|
if table == "RAWDATACOR":
|
||||||
|
return [
|
||||||
|
DataTransformer.transform_rawdatacor_row(row)
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
elif table == "ELABDATADISP":
|
||||||
|
return [
|
||||||
|
DataTransformer.transform_elabdatadisp_row(row)
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_column_order(table: str) -> List[str]:
|
||||||
|
"""Get the column order for inserting into PostgreSQL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: PostgreSQL table name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of column names in order
|
||||||
|
"""
|
||||||
|
if table == "rawdatacor":
|
||||||
|
return [
|
||||||
|
"id",
|
||||||
|
"unit_name",
|
||||||
|
"tool_name_id",
|
||||||
|
"node_num",
|
||||||
|
"event_date",
|
||||||
|
"event_time",
|
||||||
|
"bat_level",
|
||||||
|
"temperature",
|
||||||
|
"measurements",
|
||||||
|
"created_at",
|
||||||
|
"bat_level_module",
|
||||||
|
"temperature_module",
|
||||||
|
"rssi_module",
|
||||||
|
]
|
||||||
|
elif table == "elabdatadisp":
|
||||||
|
return [
|
||||||
|
"id_elab_data",
|
||||||
|
"unit_name",
|
||||||
|
"tool_name_id",
|
||||||
|
"node_num",
|
||||||
|
"event_date",
|
||||||
|
"event_time",
|
||||||
|
"state",
|
||||||
|
"calc_err",
|
||||||
|
"measurements",
|
||||||
|
"created_at",
|
||||||
|
"updated_at",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown table: {table}")
|
||||||
149
src/transformers/schema_transformer.py
Normal file
149
src/transformers/schema_transformer.py
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
"""PostgreSQL schema creation from MySQL structure."""
|
||||||
|
from config import PARTITION_YEARS
|
||||||
|
from src.utils.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def create_rawdatacor_schema() -> str:
|
||||||
|
"""Create PostgreSQL schema for RAWDATACOR table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SQL script to create the table with partitions
|
||||||
|
"""
|
||||||
|
sql = """
|
||||||
|
-- Create RAWDATACOR table with partitioning
|
||||||
|
CREATE TABLE IF NOT EXISTS rawdatacor (
|
||||||
|
id BIGSERIAL NOT NULL,
|
||||||
|
unit_name VARCHAR(32),
|
||||||
|
tool_name_id VARCHAR(32) NOT NULL,
|
||||||
|
node_num INTEGER NOT NULL,
|
||||||
|
event_date DATE NOT NULL,
|
||||||
|
event_time TIME NOT NULL,
|
||||||
|
bat_level NUMERIC(4,2) NOT NULL,
|
||||||
|
temperature NUMERIC(5,2) NOT NULL,
|
||||||
|
measurements JSONB,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
bat_level_module NUMERIC(4,2),
|
||||||
|
temperature_module NUMERIC(5,2),
|
||||||
|
rssi_module INTEGER,
|
||||||
|
PRIMARY KEY (id, event_date)
|
||||||
|
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
|
||||||
|
|
||||||
|
-- Create partitions for each year
|
||||||
|
"""
|
||||||
|
# Add partition creation statements
|
||||||
|
for year in PARTITION_YEARS:
|
||||||
|
next_year = year + 1
|
||||||
|
sql += f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS rawdatacor_{year}
|
||||||
|
PARTITION OF rawdatacor
|
||||||
|
FOR VALUES FROM ({year}) TO ({next_year});
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Add indexes
|
||||||
|
sql += """
|
||||||
|
-- Create indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_raw
|
||||||
|
ON rawdatacor(unit_name, tool_name_id, node_num, event_date, event_time);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_unit_tool_raw
|
||||||
|
ON rawdatacor(unit_name, tool_name_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_measurements_gin_raw
|
||||||
|
ON rawdatacor USING GIN (measurements);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_event_date_raw
|
||||||
|
ON rawdatacor(event_date);
|
||||||
|
"""
|
||||||
|
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
def create_elabdatadisp_schema() -> str:
|
||||||
|
"""Create PostgreSQL schema for ELABDATADISP table.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SQL script to create the table with partitions
|
||||||
|
"""
|
||||||
|
sql = """
|
||||||
|
-- Create ELABDATADISP table with partitioning
|
||||||
|
CREATE TABLE IF NOT EXISTS elabdatadisp (
|
||||||
|
id_elab_data BIGSERIAL NOT NULL,
|
||||||
|
unit_name VARCHAR(32),
|
||||||
|
tool_name_id VARCHAR(32) NOT NULL,
|
||||||
|
node_num INTEGER NOT NULL,
|
||||||
|
event_date DATE NOT NULL,
|
||||||
|
event_time TIME NOT NULL,
|
||||||
|
state VARCHAR(32),
|
||||||
|
calc_err INTEGER DEFAULT 0,
|
||||||
|
measurements JSONB,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
PRIMARY KEY (id_elab_data, event_date)
|
||||||
|
) PARTITION BY RANGE (EXTRACT(YEAR FROM event_date));
|
||||||
|
|
||||||
|
-- Create partitions for each year
|
||||||
|
"""
|
||||||
|
# Add partition creation statements
|
||||||
|
for year in PARTITION_YEARS:
|
||||||
|
next_year = year + 1
|
||||||
|
sql += f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS elabdatadisp_{year}
|
||||||
|
PARTITION OF elabdatadisp
|
||||||
|
FOR VALUES FROM ({year}) TO ({next_year});
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Add indexes
|
||||||
|
sql += """
|
||||||
|
-- Create indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_unit_tool_node_datetime_elab
|
||||||
|
ON elabdatadisp(unit_name, tool_name_id, node_num, event_date, event_time);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_unit_tool_elab
|
||||||
|
ON elabdatadisp(unit_name, tool_name_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_measurements_gin_elab
|
||||||
|
ON elabdatadisp USING GIN (measurements);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_event_date_elab
|
||||||
|
ON elabdatadisp(event_date);
|
||||||
|
"""
|
||||||
|
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
def create_migration_state_table() -> str:
|
||||||
|
"""Create table to track migration state.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SQL to create migration_state table
|
||||||
|
"""
|
||||||
|
sql = """
|
||||||
|
-- Create table to track migration state
|
||||||
|
CREATE TABLE IF NOT EXISTS migration_state (
|
||||||
|
table_name VARCHAR(255) PRIMARY KEY,
|
||||||
|
last_migrated_timestamp TIMESTAMP,
|
||||||
|
last_migrated_id BIGINT,
|
||||||
|
migration_started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
migration_completed_at TIMESTAMP,
|
||||||
|
total_rows_migrated BIGINT DEFAULT 0,
|
||||||
|
status VARCHAR(32) DEFAULT 'pending'
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
return sql
|
||||||
|
|
||||||
|
|
||||||
|
def get_full_schema_script() -> str:
|
||||||
|
"""Get complete schema creation script for PostgreSQL.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full SQL script to create all tables and indexes
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
create_rawdatacor_schema() +
|
||||||
|
"\n\n" +
|
||||||
|
create_elabdatadisp_schema() +
|
||||||
|
"\n\n" +
|
||||||
|
create_migration_state_table()
|
||||||
|
)
|
||||||
0
src/utils/__init__.py
Normal file
0
src/utils/__init__.py
Normal file
42
src/utils/logger.py
Normal file
42
src/utils/logger.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
"""Logging utility with Rich integration."""
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from rich.logging import RichHandler
|
||||||
|
from rich.console import Console
|
||||||
|
from config import get_settings
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logger(name: str) -> logging.Logger:
|
||||||
|
"""Set up a logger with Rich handler."""
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
logger.setLevel(getattr(logging, settings.migration.log_level))
|
||||||
|
|
||||||
|
# Remove existing handlers
|
||||||
|
logger.handlers.clear()
|
||||||
|
|
||||||
|
# Create console handler with Rich
|
||||||
|
handler = RichHandler(
|
||||||
|
console=Console(file=sys.stderr),
|
||||||
|
show_time=True,
|
||||||
|
show_level=True,
|
||||||
|
show_path=False,
|
||||||
|
)
|
||||||
|
handler.setLevel(getattr(logging, settings.migration.log_level))
|
||||||
|
|
||||||
|
# Create formatter
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger(name: str) -> logging.Logger:
|
||||||
|
"""Get or create a logger."""
|
||||||
|
return logging.getLogger(name)
|
||||||
73
src/utils/progress.py
Normal file
73
src/utils/progress.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
"""Progress tracking utility."""
|
||||||
|
from rich.progress import (
|
||||||
|
Progress,
|
||||||
|
SpinnerColumn,
|
||||||
|
BarColumn,
|
||||||
|
TaskProgressColumn,
|
||||||
|
TimeRemainingColumn,
|
||||||
|
TimeElapsedColumn,
|
||||||
|
TransferSpeedColumn,
|
||||||
|
)
|
||||||
|
from rich.console import Console
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressTracker:
|
||||||
|
"""Track migration progress with Rich progress bar."""
|
||||||
|
|
||||||
|
def __init__(self, total: int, description: str = "Migrating"):
|
||||||
|
"""Initialize progress tracker.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
total: Total number of items to process
|
||||||
|
description: Description of the task
|
||||||
|
"""
|
||||||
|
self.total = total
|
||||||
|
self.description = description
|
||||||
|
self.progress = Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
BarColumn(),
|
||||||
|
TaskProgressColumn(),
|
||||||
|
TimeElapsedColumn(),
|
||||||
|
TimeRemainingColumn(),
|
||||||
|
TransferSpeedColumn(),
|
||||||
|
console=Console(),
|
||||||
|
)
|
||||||
|
self.task_id = None
|
||||||
|
self.start_time = None
|
||||||
|
self.processed = 0
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
"""Context manager entry."""
|
||||||
|
self.progress.start()
|
||||||
|
self.task_id = self.progress.add_task(
|
||||||
|
self.description, total=self.total
|
||||||
|
)
|
||||||
|
self.start_time = time.time()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
"""Context manager exit."""
|
||||||
|
self.progress.stop()
|
||||||
|
if exc_type is None:
|
||||||
|
elapsed = time.time() - self.start_time
|
||||||
|
rate = self.processed / elapsed if elapsed > 0 else 0
|
||||||
|
self.progress.console.print(
|
||||||
|
f"[green]✓ Completed: {self.processed}/{self.total} items "
|
||||||
|
f"in {elapsed:.2f}s ({rate:.0f} items/sec)[/green]"
|
||||||
|
)
|
||||||
|
|
||||||
|
def update(self, advance: int = 1):
|
||||||
|
"""Update progress.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
advance: Number of items processed
|
||||||
|
"""
|
||||||
|
if self.task_id is not None:
|
||||||
|
self.progress.update(self.task_id, advance=advance)
|
||||||
|
self.processed += advance
|
||||||
|
|
||||||
|
def print_status(self, message: str):
|
||||||
|
"""Print a status message without interrupting progress bar."""
|
||||||
|
if self.task_id is not None:
|
||||||
|
self.progress.print(message)
|
||||||
Reference in New Issue
Block a user