docs: Add comprehensive documentation and helper scripts
Add: - QUICKSTART.md: 5-minute quick start guide with examples - scripts/incus_setup.sh: Automated PostgreSQL container setup - scripts/validate_migration.sql: SQL validation queries - scripts/setup_cron.sh: Cron job setup for incremental migrations - tests/test_setup.py: Unit tests for configuration and transformation - install.sh: Quick installation script Documentation includes: - Step-by-step setup instructions - Example queries for RAWDATACOR and ELABDATADISP - Troubleshooting guide - Performance optimization tips 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
275
QUICKSTART.md
Normal file
275
QUICKSTART.md
Normal file
@@ -0,0 +1,275 @@
|
|||||||
|
# Quick Start Guide
|
||||||
|
|
||||||
|
Guida rapida per iniziare con il migration tool.
|
||||||
|
|
||||||
|
## Setup in 5 minuti
|
||||||
|
|
||||||
|
### 1. Clonare e configurare
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Entrare nella directory
|
||||||
|
cd mysql2postgres
|
||||||
|
|
||||||
|
# Creare environment
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
|
||||||
|
# Installare dipendenze
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configurare .env
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copiare template
|
||||||
|
cp .env.example .env
|
||||||
|
|
||||||
|
# Editare con le tue credenziali
|
||||||
|
nano .env
|
||||||
|
```
|
||||||
|
|
||||||
|
**Esempio .env:**
|
||||||
|
```env
|
||||||
|
MYSQL_HOST=localhost
|
||||||
|
MYSQL_PORT=3306
|
||||||
|
MYSQL_USER=root
|
||||||
|
MYSQL_PASSWORD=mypassword
|
||||||
|
MYSQL_DATABASE=production_db
|
||||||
|
|
||||||
|
POSTGRES_HOST=localhost
|
||||||
|
POSTGRES_PORT=5432
|
||||||
|
POSTGRES_USER=postgres
|
||||||
|
POSTGRES_PASSWORD=pgpassword
|
||||||
|
POSTGRES_DATABASE=migrated_db
|
||||||
|
|
||||||
|
BATCH_SIZE=10000
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Creare PostgreSQL in Incus
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Creare container Incus
|
||||||
|
incus launch images:ubuntu/22.04 pg-server
|
||||||
|
|
||||||
|
# Accedere al container
|
||||||
|
incus shell pg-server
|
||||||
|
|
||||||
|
# Dentro il container:
|
||||||
|
apt update && apt install -y postgresql postgresql-contrib
|
||||||
|
|
||||||
|
# Avviare PostgreSQL
|
||||||
|
systemctl start postgresql
|
||||||
|
systemctl enable postgresql
|
||||||
|
|
||||||
|
# Uscire dal container
|
||||||
|
exit
|
||||||
|
|
||||||
|
# Ottenere IP del container
|
||||||
|
incus list
|
||||||
|
# Anota l'IP e usalo in POSTGRES_HOST nel .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Eseguire migrazione
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Verificare configurazione
|
||||||
|
python main.py info
|
||||||
|
|
||||||
|
# Creare schema
|
||||||
|
python main.py setup --create-schema
|
||||||
|
|
||||||
|
# Migrare tutti i dati
|
||||||
|
python main.py migrate full
|
||||||
|
|
||||||
|
# Verificare risultati
|
||||||
|
python main.py migrate incremental # Dovrebbe dire "No new rows"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Eseguire benchmark
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python main.py benchmark
|
||||||
|
```
|
||||||
|
|
||||||
|
## Comandi Frequenti
|
||||||
|
|
||||||
|
### Migrazione iniziale
|
||||||
|
```bash
|
||||||
|
# Dry-run (vedere cosa verrebbe fatto)
|
||||||
|
python main.py migrate full --dry-run
|
||||||
|
|
||||||
|
# Effettiva migrazione
|
||||||
|
python main.py migrate full
|
||||||
|
|
||||||
|
# Solo una tabella
|
||||||
|
python main.py migrate full --table RAWDATACOR
|
||||||
|
```
|
||||||
|
|
||||||
|
### Migrazioni periodiche
|
||||||
|
```bash
|
||||||
|
# Migrare solo i cambiamenti dal last sync
|
||||||
|
python main.py migrate incremental
|
||||||
|
|
||||||
|
# Con stato personalizzato
|
||||||
|
python main.py migrate incremental --state-file daily_sync.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Benchmark
|
||||||
|
```bash
|
||||||
|
# Benchmark di default (5 iterazioni)
|
||||||
|
python main.py benchmark
|
||||||
|
|
||||||
|
# Con più iterazioni
|
||||||
|
python main.py benchmark --iterations 20
|
||||||
|
|
||||||
|
# Con output personalizzato
|
||||||
|
python main.py benchmark --output my_results.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Esempi di Query su PostgreSQL
|
||||||
|
|
||||||
|
Dopo la migrazione, puoi interrogare i dati in PostgreSQL:
|
||||||
|
|
||||||
|
### RAWDATACOR
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Tutti i dati per un'unità
|
||||||
|
SELECT * FROM rawdatacor
|
||||||
|
WHERE unit_name = 'Unit1'
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- Filtrare per valore di una misura
|
||||||
|
SELECT id, event_date, event_time,
|
||||||
|
measurements->'0'->>'value' as val0,
|
||||||
|
measurements->'0'->>'unit' as val0_unit
|
||||||
|
FROM rawdatacor
|
||||||
|
WHERE measurements ? '0'
|
||||||
|
AND (measurements->'0'->>'value')::NUMERIC > 10.0;
|
||||||
|
|
||||||
|
-- Aggregazione per data
|
||||||
|
SELECT event_date, COUNT(*) as record_count
|
||||||
|
FROM rawdatacor
|
||||||
|
WHERE event_date >= '2024-01-01'
|
||||||
|
GROUP BY event_date
|
||||||
|
ORDER BY event_date;
|
||||||
|
|
||||||
|
-- Statistiche per unità e strumento
|
||||||
|
SELECT unit_name, tool_name_id, COUNT(*) as total_records
|
||||||
|
FROM rawdatacor
|
||||||
|
GROUP BY unit_name, tool_name_id
|
||||||
|
ORDER BY total_records DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
### ELABDATADISP
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Dati con velocità
|
||||||
|
SELECT id_elab_data, event_date, event_time,
|
||||||
|
(measurements->'kinematics'->>'speed')::NUMERIC as speed,
|
||||||
|
(measurements->'kinematics'->>'acceleration')::NUMERIC as acceleration
|
||||||
|
FROM elabdatadisp
|
||||||
|
WHERE measurements @> '{"kinematics": {}}'
|
||||||
|
LIMIT 10;
|
||||||
|
|
||||||
|
-- Filtro su intervallo
|
||||||
|
SELECT unit_name, COUNT(*) as count
|
||||||
|
FROM elabdatadisp
|
||||||
|
WHERE (measurements->'kinematics'->>'speed')::NUMERIC > 5.0
|
||||||
|
GROUP BY unit_name;
|
||||||
|
|
||||||
|
-- Media velocità per unità
|
||||||
|
SELECT unit_name,
|
||||||
|
AVG((measurements->'kinematics'->>'speed')::NUMERIC) as avg_speed,
|
||||||
|
MAX((measurements->'kinematics'->>'speed')::NUMERIC) as max_speed
|
||||||
|
FROM elabdatadisp
|
||||||
|
WHERE event_date >= '2024-01-01'
|
||||||
|
GROUP BY unit_name;
|
||||||
|
|
||||||
|
-- Dati con errore di calcolo
|
||||||
|
SELECT * FROM elabdatadisp
|
||||||
|
WHERE calc_err > 0
|
||||||
|
AND event_date >= '2024-01-01'
|
||||||
|
ORDER BY event_date DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitorare Progress
|
||||||
|
|
||||||
|
Il tool mostra una progress bar durante la migrazione:
|
||||||
|
|
||||||
|
```
|
||||||
|
Migrating RAWDATACOR █████████████░░░░░░░░░░░░░░░░░░░░░ 45% 00:05:23
|
||||||
|
```
|
||||||
|
|
||||||
|
I log sono salvati in:
|
||||||
|
- Console: Output di default
|
||||||
|
- File: `.log` (configurabile)
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### "Cannot connect to MySQL"
|
||||||
|
```bash
|
||||||
|
# Verificare che MySQL sia online
|
||||||
|
mysql -h localhost -u root -p -e "SELECT 1"
|
||||||
|
```
|
||||||
|
|
||||||
|
### "Table does not exist in PostgreSQL"
|
||||||
|
```bash
|
||||||
|
# Ricreate lo schema
|
||||||
|
python main.py setup --create-schema
|
||||||
|
```
|
||||||
|
|
||||||
|
### "Migration is slow"
|
||||||
|
```bash
|
||||||
|
# Aumentare batch size in .env
|
||||||
|
BATCH_SIZE=50000
|
||||||
|
|
||||||
|
# Oppure ottimizzare MySQL
|
||||||
|
mysql> FLUSH PRIVILEGES;
|
||||||
|
```
|
||||||
|
|
||||||
|
### "Benchmark queries fail"
|
||||||
|
```bash
|
||||||
|
# Verificare che le tabelle siano state migrate
|
||||||
|
SELECT COUNT(*) FROM rawdatacor;
|
||||||
|
|
||||||
|
# Verificare JSONB è valido
|
||||||
|
SELECT measurements FROM rawdatacor LIMIT 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Prossimi Passi
|
||||||
|
|
||||||
|
1. **Validare i dati**
|
||||||
|
```bash
|
||||||
|
# Contare righe in entrambi i database
|
||||||
|
# MySQL
|
||||||
|
mysql> SELECT COUNT(*) FROM RAWDATACOR;
|
||||||
|
|
||||||
|
# PostgreSQL
|
||||||
|
psql> SELECT COUNT(*) FROM rawdatacor;
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Testare query critiche**
|
||||||
|
- Assicurarsi che le query dell'applicazione funzionino su PostgreSQL
|
||||||
|
|
||||||
|
3. **Benchmark performance**
|
||||||
|
```bash
|
||||||
|
python main.py benchmark --iterations 20
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Setup migrazioni periodiche**
|
||||||
|
- Schedulare `python main.py migrate incremental` con cron/systemd timer
|
||||||
|
|
||||||
|
5. **Mantenimento indici**
|
||||||
|
```sql
|
||||||
|
-- Analizzare tabelle
|
||||||
|
ANALYZE rawdatacor;
|
||||||
|
ANALYZE elabdatadisp;
|
||||||
|
|
||||||
|
-- Reindex se necessario
|
||||||
|
REINDEX TABLE rawdatacor;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
Per domande o problemi, consulta il file README.md completo.
|
||||||
55
install.sh
Executable file
55
install.sh
Executable file
@@ -0,0 +1,55 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Quick installation script
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "MySQL to PostgreSQL Migration Tool - Installation"
|
||||||
|
echo "=================================================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check Python version
|
||||||
|
PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}')
|
||||||
|
echo "✓ Python $PYTHON_VERSION detected"
|
||||||
|
|
||||||
|
# Create virtual environment
|
||||||
|
echo ""
|
||||||
|
echo "Creating virtual environment..."
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
echo "✓ Virtual environment created"
|
||||||
|
|
||||||
|
# Upgrade pip
|
||||||
|
echo ""
|
||||||
|
echo "Upgrading pip..."
|
||||||
|
pip install --upgrade pip setuptools wheel > /dev/null 2>&1
|
||||||
|
echo "✓ pip upgraded"
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
echo ""
|
||||||
|
echo "Installing dependencies..."
|
||||||
|
pip install -e . > /dev/null 2>&1
|
||||||
|
echo "✓ Dependencies installed"
|
||||||
|
|
||||||
|
# Copy .env.example to .env if not exists
|
||||||
|
if [ ! -f .env ]; then
|
||||||
|
echo ""
|
||||||
|
echo "Creating .env file from template..."
|
||||||
|
cp .env.example .env
|
||||||
|
echo "✓ .env created (edit with your database credentials)"
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "ℹ .env already exists"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=================================================="
|
||||||
|
echo "Installation complete!"
|
||||||
|
echo ""
|
||||||
|
echo "Next steps:"
|
||||||
|
echo "1. Edit .env with your database credentials"
|
||||||
|
echo "2. Activate virtual environment: source venv/bin/activate"
|
||||||
|
echo "3. Verify setup: python main.py info"
|
||||||
|
echo "4. Create schema: python main.py setup --create-schema"
|
||||||
|
echo "5. Run migration: python main.py migrate full"
|
||||||
|
echo ""
|
||||||
|
echo "For more help, see README.md or QUICKSTART.md"
|
||||||
52
scripts/incus_setup.sh
Executable file
52
scripts/incus_setup.sh
Executable file
@@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Script per setup PostgreSQL in container Incus
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CONTAINER_NAME=${1:-pg-server}
|
||||||
|
POSTGRES_PASSWORD=${2:-postgres}
|
||||||
|
|
||||||
|
echo "Creating Incus container: $CONTAINER_NAME"
|
||||||
|
|
||||||
|
# Creare container
|
||||||
|
incus launch images:ubuntu/22.04 "$CONTAINER_NAME" --wait
|
||||||
|
|
||||||
|
echo "Installing PostgreSQL..."
|
||||||
|
|
||||||
|
# Installare PostgreSQL
|
||||||
|
incus exec "$CONTAINER_NAME" -- apt update
|
||||||
|
incus exec "$CONTAINER_NAME" -- apt install -y postgresql postgresql-contrib
|
||||||
|
|
||||||
|
echo "Starting PostgreSQL..."
|
||||||
|
|
||||||
|
# Avviare PostgreSQL
|
||||||
|
incus exec "$CONTAINER_NAME" -- systemctl start postgresql
|
||||||
|
incus exec "$CONTAINER_NAME" -- systemctl enable postgresql
|
||||||
|
|
||||||
|
# Impostare password postgres
|
||||||
|
incus exec "$CONTAINER_NAME" -- sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD '$POSTGRES_PASSWORD';"
|
||||||
|
|
||||||
|
# Permettere connessioni TCP
|
||||||
|
incus exec "$CONTAINER_NAME" -- bash -c "
|
||||||
|
echo \"host all all 0.0.0.0/0 md5\" >> /etc/postgresql/14/main/pg_hba.conf
|
||||||
|
sed -i \"s/#listen_addresses = 'localhost'/listen_addresses = '*'/\" /etc/postgresql/14/main/postgresql.conf
|
||||||
|
"
|
||||||
|
|
||||||
|
# Riavviare PostgreSQL
|
||||||
|
incus exec "$CONTAINER_NAME" -- systemctl restart postgresql
|
||||||
|
|
||||||
|
# Ottenere IP
|
||||||
|
IP=$(incus list "$CONTAINER_NAME" -c4 | tail -n1 | awk '{print $1}')
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✓ PostgreSQL is running!"
|
||||||
|
echo ""
|
||||||
|
echo "Connection details:"
|
||||||
|
echo " Host: $IP"
|
||||||
|
echo " Port: 5432"
|
||||||
|
echo " User: postgres"
|
||||||
|
echo " Password: $POSTGRES_PASSWORD"
|
||||||
|
echo ""
|
||||||
|
echo "Update .env file with:"
|
||||||
|
echo " POSTGRES_HOST=$IP"
|
||||||
|
echo " POSTGRES_PASSWORD=$POSTGRES_PASSWORD"
|
||||||
37
scripts/setup_cron.sh
Executable file
37
scripts/setup_cron.sh
Executable file
@@ -0,0 +1,37 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Setup cron job for incremental migration
|
||||||
|
|
||||||
|
PROJECT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
||||||
|
VENV_PYTHON="$PROJECT_DIR/venv/bin/python"
|
||||||
|
LOG_FILE="$PROJECT_DIR/migration_$(date +%Y%m%d).log"
|
||||||
|
|
||||||
|
# Create cron job entry
|
||||||
|
CRON_ENTRY="0 */6 * * * cd $PROJECT_DIR && $VENV_PYTHON main.py migrate incremental >> $LOG_FILE 2>&1"
|
||||||
|
|
||||||
|
echo "Cron job to be added:"
|
||||||
|
echo "$CRON_ENTRY"
|
||||||
|
echo ""
|
||||||
|
echo "This will run incremental migration every 6 hours."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check if already exists
|
||||||
|
if crontab -l 2>/dev/null | grep -q "migrate incremental"; then
|
||||||
|
echo "⚠ Cron job already exists"
|
||||||
|
echo ""
|
||||||
|
echo "Current cron jobs:"
|
||||||
|
crontab -l | grep -v '^#' | grep -v '^$'
|
||||||
|
else
|
||||||
|
echo "Add to crontab? (y/n)"
|
||||||
|
read -r response
|
||||||
|
|
||||||
|
if [ "$response" = "y" ]; then
|
||||||
|
# Add cron job
|
||||||
|
(crontab -l 2>/dev/null; echo "$CRON_ENTRY") | crontab -
|
||||||
|
echo "✓ Cron job added successfully"
|
||||||
|
echo ""
|
||||||
|
echo "Verify with: crontab -l"
|
||||||
|
echo "View logs: tail -f migration_*.log"
|
||||||
|
else
|
||||||
|
echo "Cron job not added"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
90
scripts/validate_migration.sql
Normal file
90
scripts/validate_migration.sql
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
-- Validation queries for PostgreSQL after migration
|
||||||
|
|
||||||
|
-- 1. Verify row counts match between MySQL and PostgreSQL
|
||||||
|
-- Run this on both databases and compare
|
||||||
|
|
||||||
|
-- PostgreSQL queries:
|
||||||
|
SELECT 'rawdatacor' as table_name, COUNT(*) as row_count FROM rawdatacor
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'elabdatadisp' as table_name, COUNT(*) as row_count FROM elabdatadisp;
|
||||||
|
|
||||||
|
-- 2. Check for NULL values in JSONB (should be empty)
|
||||||
|
SELECT 'rawdatacor with NULL measurements' as check_name, COUNT(*) as count
|
||||||
|
FROM rawdatacor WHERE measurements IS NULL
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'elabdatadisp with NULL measurements' as check_name, COUNT(*) as count
|
||||||
|
FROM elabdatadisp WHERE measurements IS NULL;
|
||||||
|
|
||||||
|
-- 3. Verify date range coverage
|
||||||
|
SELECT
|
||||||
|
'rawdatacor dates' as table_name,
|
||||||
|
MIN(event_date) as min_date,
|
||||||
|
MAX(event_date) as max_date,
|
||||||
|
COUNT(DISTINCT event_date) as distinct_dates
|
||||||
|
FROM rawdatacor
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'elabdatadisp dates' as table_name,
|
||||||
|
MIN(event_date) as min_date,
|
||||||
|
MAX(event_date) as max_date,
|
||||||
|
COUNT(DISTINCT event_date) as distinct_dates
|
||||||
|
FROM elabdatadisp;
|
||||||
|
|
||||||
|
-- 4. Verify partitions are in use
|
||||||
|
EXPLAIN (ANALYZE, BUFFERS)
|
||||||
|
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
|
||||||
|
|
||||||
|
-- 5. Check JSONB structure samples
|
||||||
|
-- RAWDATACOR
|
||||||
|
SELECT measurements FROM rawdatacor WHERE measurements IS NOT NULL LIMIT 1;
|
||||||
|
|
||||||
|
-- ELABDATADISP
|
||||||
|
SELECT measurements FROM elabdatadisp WHERE measurements IS NOT NULL LIMIT 1;
|
||||||
|
|
||||||
|
-- 6. Verify indexes exist
|
||||||
|
SELECT schemaname, tablename, indexname
|
||||||
|
FROM pg_indexes
|
||||||
|
WHERE tablename IN ('rawdatacor', 'elabdatadisp')
|
||||||
|
ORDER BY tablename, indexname;
|
||||||
|
|
||||||
|
-- 7. Performance: Simple queries
|
||||||
|
\timing on
|
||||||
|
|
||||||
|
-- Single row by primary key
|
||||||
|
SELECT * FROM rawdatacor WHERE id = 1000 AND event_date = '2024-01-01';
|
||||||
|
|
||||||
|
-- Date range scan
|
||||||
|
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
|
||||||
|
|
||||||
|
-- Unit and tool filter
|
||||||
|
SELECT COUNT(*) FROM rawdatacor WHERE unit_name = 'Unit1' AND tool_name_id = 'Tool1';
|
||||||
|
|
||||||
|
-- JSONB filter
|
||||||
|
SELECT COUNT(*) FROM rawdatacor WHERE measurements ? '0';
|
||||||
|
|
||||||
|
\timing off
|
||||||
|
|
||||||
|
-- 8. Identify partitions with data
|
||||||
|
SELECT
|
||||||
|
schemaname,
|
||||||
|
tablename,
|
||||||
|
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
|
||||||
|
FROM pg_tables
|
||||||
|
WHERE tablename LIKE 'rawdatacor_%' OR tablename LIKE 'elabdatadisp_%'
|
||||||
|
ORDER BY tablename;
|
||||||
|
|
||||||
|
-- 9. Check for constraint violations
|
||||||
|
-- Verify unique constraints
|
||||||
|
SELECT
|
||||||
|
'rawdatacor duplicate unique key' as check_name,
|
||||||
|
COUNT(*) as duplicate_count
|
||||||
|
FROM rawdatacor
|
||||||
|
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
'elabdatadisp duplicate unique key' as check_name,
|
||||||
|
COUNT(*) as duplicate_count
|
||||||
|
FROM elabdatadisp
|
||||||
|
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
|
||||||
|
HAVING COUNT(*) > 1;
|
||||||
153
tests/test_setup.py
Normal file
153
tests/test_setup.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
"""Test setup and basic functionality."""
|
||||||
|
import pytest
|
||||||
|
from config import get_settings, TABLE_CONFIGS, RAWDATACOR_COLUMNS, ELABDATADISP_FIELD_MAPPING
|
||||||
|
from src.transformers.data_transformer import DataTransformer
|
||||||
|
|
||||||
|
|
||||||
|
class TestConfiguration:
|
||||||
|
"""Test configuration loading."""
|
||||||
|
|
||||||
|
def test_settings_loaded(self):
|
||||||
|
"""Test that settings can be loaded."""
|
||||||
|
settings = get_settings()
|
||||||
|
assert settings is not None
|
||||||
|
assert settings.mysql.host is not None
|
||||||
|
assert settings.postgres.host is not None
|
||||||
|
|
||||||
|
def test_table_configs_exist(self):
|
||||||
|
"""Test that table configurations exist."""
|
||||||
|
assert "RAWDATACOR" in TABLE_CONFIGS or len(TABLE_CONFIGS) > 0
|
||||||
|
|
||||||
|
def test_migration_batch_size(self):
|
||||||
|
"""Test that batch size is configured."""
|
||||||
|
settings = get_settings()
|
||||||
|
assert settings.migration.batch_size > 0
|
||||||
|
assert settings.migration.batch_size <= 1000000
|
||||||
|
|
||||||
|
|
||||||
|
class TestDataTransformation:
|
||||||
|
"""Test data transformation logic."""
|
||||||
|
|
||||||
|
def test_rawdatacor_transformation(self):
|
||||||
|
"""Test RAWDATACOR row transformation."""
|
||||||
|
# Sample MySQL row
|
||||||
|
mysql_row = {
|
||||||
|
"id": 1,
|
||||||
|
"UnitName": "TestUnit",
|
||||||
|
"ToolNameID": "Tool1",
|
||||||
|
"NodeNum": 1,
|
||||||
|
"EventDate": "2024-01-01",
|
||||||
|
"EventTime": "12:00:00",
|
||||||
|
"BatLevel": 3.5,
|
||||||
|
"Temperature": 25.5,
|
||||||
|
"Val0": "100.5",
|
||||||
|
"Val1": None,
|
||||||
|
"Val2": "200.3",
|
||||||
|
"Val0_unitmisure": "°C",
|
||||||
|
"Val1_unitmisure": "bar",
|
||||||
|
"Val2_unitmisure": "m/s",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add remaining Val columns as None
|
||||||
|
for i in range(3, 16):
|
||||||
|
col = f"Val{i:X}" # Val3-ValF
|
||||||
|
mysql_row[col] = None
|
||||||
|
mysql_row[f"{col}_unitmisure"] = None
|
||||||
|
|
||||||
|
# Transform
|
||||||
|
pg_row = DataTransformer.transform_rawdatacor_row(mysql_row)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert pg_row["id"] == 1
|
||||||
|
assert pg_row["unit_name"] == "TestUnit"
|
||||||
|
assert pg_row["tool_name_id"] == "Tool1"
|
||||||
|
assert isinstance(pg_row["measurements"], dict)
|
||||||
|
assert "0" in pg_row["measurements"]
|
||||||
|
assert pg_row["measurements"]["0"]["value"] == "100.5"
|
||||||
|
assert pg_row["measurements"]["0"]["unit"] == "°C"
|
||||||
|
assert "1" not in pg_row["measurements"] # NULL values excluded
|
||||||
|
assert "2" in pg_row["measurements"]
|
||||||
|
|
||||||
|
def test_elabdatadisp_transformation(self):
|
||||||
|
"""Test ELABDATADISP row transformation."""
|
||||||
|
# Sample MySQL row
|
||||||
|
mysql_row = {
|
||||||
|
"idElabData": 5000,
|
||||||
|
"UnitName": "TestUnit",
|
||||||
|
"ToolNameID": "Tool1",
|
||||||
|
"NodeNum": 1,
|
||||||
|
"EventDate": "2024-01-01",
|
||||||
|
"EventTime": "12:00:00",
|
||||||
|
"State": "OK",
|
||||||
|
"calcerr": 0,
|
||||||
|
"XShift": 1.234567,
|
||||||
|
"YShift": 2.345678,
|
||||||
|
"ZShift": 3.456789,
|
||||||
|
"HShift": 4.567890,
|
||||||
|
"HShiftDir": 5.678901,
|
||||||
|
"HShift_local": 6.789012,
|
||||||
|
"X": 10.123456,
|
||||||
|
"Y": 20.234567,
|
||||||
|
"Z": 30.345678,
|
||||||
|
"Xstar": 40.456789,
|
||||||
|
"Zstar": 50.567890,
|
||||||
|
"speed": 1.111111,
|
||||||
|
"speed_local": 2.222222,
|
||||||
|
"acceleration": 3.333333,
|
||||||
|
"acceleration_local": 4.444444,
|
||||||
|
"T_node": 25.5,
|
||||||
|
"load_value": 100.5,
|
||||||
|
"water_level": 50.5,
|
||||||
|
"pressure": 1.013,
|
||||||
|
"AlfaX": 0.123456,
|
||||||
|
"AlfaY": 0.234567,
|
||||||
|
"Area": 100.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Transform
|
||||||
|
pg_row = DataTransformer.transform_elabdatadisp_row(mysql_row)
|
||||||
|
|
||||||
|
# Verify
|
||||||
|
assert pg_row["id_elab_data"] == 5000
|
||||||
|
assert pg_row["state"] == "OK"
|
||||||
|
assert isinstance(pg_row["measurements"], dict)
|
||||||
|
assert "shifts" in pg_row["measurements"]
|
||||||
|
assert "coordinates" in pg_row["measurements"]
|
||||||
|
assert "kinematics" in pg_row["measurements"]
|
||||||
|
assert pg_row["measurements"]["shifts"]["x"] == 1.234567
|
||||||
|
assert pg_row["measurements"]["coordinates"]["x"] == 10.123456
|
||||||
|
assert pg_row["measurements"]["kinematics"]["speed"] == 1.111111
|
||||||
|
|
||||||
|
def test_column_order_rawdatacor(self):
|
||||||
|
"""Test column order for RAWDATACOR."""
|
||||||
|
columns = DataTransformer.get_column_order("rawdatacor")
|
||||||
|
assert isinstance(columns, list)
|
||||||
|
assert "id" in columns
|
||||||
|
assert "measurements" in columns
|
||||||
|
assert "unit_name" in columns
|
||||||
|
|
||||||
|
def test_column_order_elabdatadisp(self):
|
||||||
|
"""Test column order for ELABDATADISP."""
|
||||||
|
columns = DataTransformer.get_column_order("elabdatadisp")
|
||||||
|
assert isinstance(columns, list)
|
||||||
|
assert "id_elab_data" in columns
|
||||||
|
assert "measurements" in columns
|
||||||
|
assert "state" in columns
|
||||||
|
|
||||||
|
|
||||||
|
class TestFieldMapping:
|
||||||
|
"""Test field mapping configuration."""
|
||||||
|
|
||||||
|
def test_all_rawdatacor_columns_mapped(self):
|
||||||
|
"""Test that all RAWDATACOR value columns are defined."""
|
||||||
|
for val_col in RAWDATACOR_COLUMNS["val_columns"]:
|
||||||
|
assert val_col.startswith("Val")
|
||||||
|
|
||||||
|
def test_all_elabdatadisp_fields_mapped(self):
|
||||||
|
"""Test that all ELABDATADISP fields are mapped."""
|
||||||
|
mapped_fields = set(ELABDATADISP_FIELD_MAPPING.keys())
|
||||||
|
assert len(mapped_fields) > 20 # Should have many fields
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
Reference in New Issue
Block a user