fix incremental

This commit is contained in:
2025-12-30 15:16:54 +01:00
parent 79cd4f4559
commit 5c9df3d06f
14 changed files with 2901 additions and 233 deletions

43
main.py
View File

@@ -6,8 +6,9 @@ from pathlib import Path
from config import get_settings
from src.utils.logger import setup_logger, get_logger
from src.transformers.schema_transformer import get_full_schema_script
from src.migrator.full_migration import run_full_migration
from src.migrator.incremental_migration import run_incremental_migration
from src.migrator.full_migrator import run_full_migration
from src.migrator.incremental_migrator import run_incremental_migration
from src.migrator.parallel_migrator import run_parallel_migration
from src.benchmark.performance_test import run_benchmark
from src.connectors.postgres_connector import PostgreSQLConnector
@@ -80,18 +81,36 @@ def migrate():
default=None,
help="Only migrate this partition (for testing/debugging)"
)
def full(table, dry_run, resume, partition):
@click.option(
"--parallel",
type=int,
default=None,
help="Number of parallel workers (e.g., --parallel 5 for 5 workers)"
)
def full(table, dry_run, resume, partition, parallel):
"""Perform full migration of all data."""
setup_logger(__name__)
tables = ["RAWDATACOR", "ELABDATADISP"] if table == "all" else [table]
# Validate options
if parallel and partition:
click.echo("✗ Cannot use --parallel with --partition", err=True)
sys.exit(1)
try:
total_migrated = 0
for tbl in tables:
click.echo(f"\nMigrating {tbl}" + (f" (partition {partition})" if partition else "") + "...")
migrated = run_full_migration(tbl, dry_run=dry_run, resume=resume, partition=partition)
if parallel:
# Parallel migration mode
click.echo(f"\nMigrating {tbl} with {parallel} parallel workers...")
migrated = run_parallel_migration(tbl, num_workers=parallel, dry_run=dry_run, resume=resume)
else:
# Sequential migration mode
click.echo(f"\nMigrating {tbl}" + (f" (partition {partition})" if partition else "") + "...")
migrated = run_full_migration(tbl, dry_run=dry_run, resume=resume, partition=partition)
total_migrated += migrated
click.echo(f"{tbl}: {migrated} rows migrated")
@@ -115,14 +134,9 @@ def full(table, dry_run, resume, partition):
is_flag=True,
help="Show what would be done without modifying data"
)
@click.option(
"--state-file",
default="migration_state.json",
help="Path to migration state file"
)
def incremental(table, dry_run, state_file):
"""Perform incremental migration since last sync."""
setup_logger(__name__)
def incremental(table, dry_run):
"""Perform incremental migration since last sync (based on consolidation keys)."""
setup_logger("") # Set up root logger so all child loggers work
tables = ["RAWDATACOR", "ELABDATADISP"] if table == "all" else [table]
@@ -131,7 +145,7 @@ def incremental(table, dry_run, state_file):
for tbl in tables:
click.echo(f"\nIncremental migration for {tbl}...")
migrated = run_incremental_migration(tbl, dry_run=dry_run, state_file=state_file)
migrated = run_incremental_migration(tbl, dry_run=dry_run)
total_migrated += migrated
if migrated > 0:
click.echo(f"{tbl}: {migrated} rows migrated")
@@ -196,6 +210,7 @@ def info():
click.echo("\n[Migration Settings]")
click.echo(f" Batch Size: {settings.migration.batch_size}")
click.echo(f" Consolidation Group Limit: {settings.migration.consolidation_group_limit}")
click.echo(f" Log Level: {settings.migration.log_level}")
click.echo(f" Dry Run: {settings.migration.dry_run}")