docs: Add comprehensive documentation and helper scripts

Add:
- QUICKSTART.md: 5-minute quick start guide with examples
- scripts/incus_setup.sh: Automated PostgreSQL container setup
- scripts/validate_migration.sql: SQL validation queries
- scripts/setup_cron.sh: Cron job setup for incremental migrations
- tests/test_setup.py: Unit tests for configuration and transformation
- install.sh: Quick installation script

Documentation includes:
- Step-by-step setup instructions
- Example queries for RAWDATACOR and ELABDATADISP
- Troubleshooting guide
- Performance optimization tips

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-10 19:58:20 +01:00
parent 62577d3200
commit fccc83eb74
6 changed files with 662 additions and 0 deletions

52
scripts/incus_setup.sh Executable file
View File

@@ -0,0 +1,52 @@
#!/bin/bash
# Script per setup PostgreSQL in container Incus
set -e
CONTAINER_NAME=${1:-pg-server}
POSTGRES_PASSWORD=${2:-postgres}
echo "Creating Incus container: $CONTAINER_NAME"
# Creare container
incus launch images:ubuntu/22.04 "$CONTAINER_NAME" --wait
echo "Installing PostgreSQL..."
# Installare PostgreSQL
incus exec "$CONTAINER_NAME" -- apt update
incus exec "$CONTAINER_NAME" -- apt install -y postgresql postgresql-contrib
echo "Starting PostgreSQL..."
# Avviare PostgreSQL
incus exec "$CONTAINER_NAME" -- systemctl start postgresql
incus exec "$CONTAINER_NAME" -- systemctl enable postgresql
# Impostare password postgres
incus exec "$CONTAINER_NAME" -- sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD '$POSTGRES_PASSWORD';"
# Permettere connessioni TCP
incus exec "$CONTAINER_NAME" -- bash -c "
echo \"host all all 0.0.0.0/0 md5\" >> /etc/postgresql/14/main/pg_hba.conf
sed -i \"s/#listen_addresses = 'localhost'/listen_addresses = '*'/\" /etc/postgresql/14/main/postgresql.conf
"
# Riavviare PostgreSQL
incus exec "$CONTAINER_NAME" -- systemctl restart postgresql
# Ottenere IP
IP=$(incus list "$CONTAINER_NAME" -c4 | tail -n1 | awk '{print $1}')
echo ""
echo "✓ PostgreSQL is running!"
echo ""
echo "Connection details:"
echo " Host: $IP"
echo " Port: 5432"
echo " User: postgres"
echo " Password: $POSTGRES_PASSWORD"
echo ""
echo "Update .env file with:"
echo " POSTGRES_HOST=$IP"
echo " POSTGRES_PASSWORD=$POSTGRES_PASSWORD"

37
scripts/setup_cron.sh Executable file
View File

@@ -0,0 +1,37 @@
#!/bin/bash
# Setup cron job for incremental migration
PROJECT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
VENV_PYTHON="$PROJECT_DIR/venv/bin/python"
LOG_FILE="$PROJECT_DIR/migration_$(date +%Y%m%d).log"
# Create cron job entry
CRON_ENTRY="0 */6 * * * cd $PROJECT_DIR && $VENV_PYTHON main.py migrate incremental >> $LOG_FILE 2>&1"
echo "Cron job to be added:"
echo "$CRON_ENTRY"
echo ""
echo "This will run incremental migration every 6 hours."
echo ""
# Check if already exists
if crontab -l 2>/dev/null | grep -q "migrate incremental"; then
echo "⚠ Cron job already exists"
echo ""
echo "Current cron jobs:"
crontab -l | grep -v '^#' | grep -v '^$'
else
echo "Add to crontab? (y/n)"
read -r response
if [ "$response" = "y" ]; then
# Add cron job
(crontab -l 2>/dev/null; echo "$CRON_ENTRY") | crontab -
echo "✓ Cron job added successfully"
echo ""
echo "Verify with: crontab -l"
echo "View logs: tail -f migration_*.log"
else
echo "Cron job not added"
fi
fi

View File

@@ -0,0 +1,90 @@
-- Validation queries for PostgreSQL after migration
-- 1. Verify row counts match between MySQL and PostgreSQL
-- Run this on both databases and compare
-- PostgreSQL queries:
SELECT 'rawdatacor' as table_name, COUNT(*) as row_count FROM rawdatacor
UNION ALL
SELECT 'elabdatadisp' as table_name, COUNT(*) as row_count FROM elabdatadisp;
-- 2. Check for NULL values in JSONB (should be empty)
SELECT 'rawdatacor with NULL measurements' as check_name, COUNT(*) as count
FROM rawdatacor WHERE measurements IS NULL
UNION ALL
SELECT 'elabdatadisp with NULL measurements' as check_name, COUNT(*) as count
FROM elabdatadisp WHERE measurements IS NULL;
-- 3. Verify date range coverage
SELECT
'rawdatacor dates' as table_name,
MIN(event_date) as min_date,
MAX(event_date) as max_date,
COUNT(DISTINCT event_date) as distinct_dates
FROM rawdatacor
UNION ALL
SELECT
'elabdatadisp dates' as table_name,
MIN(event_date) as min_date,
MAX(event_date) as max_date,
COUNT(DISTINCT event_date) as distinct_dates
FROM elabdatadisp;
-- 4. Verify partitions are in use
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
-- 5. Check JSONB structure samples
-- RAWDATACOR
SELECT measurements FROM rawdatacor WHERE measurements IS NOT NULL LIMIT 1;
-- ELABDATADISP
SELECT measurements FROM elabdatadisp WHERE measurements IS NOT NULL LIMIT 1;
-- 6. Verify indexes exist
SELECT schemaname, tablename, indexname
FROM pg_indexes
WHERE tablename IN ('rawdatacor', 'elabdatadisp')
ORDER BY tablename, indexname;
-- 7. Performance: Simple queries
\timing on
-- Single row by primary key
SELECT * FROM rawdatacor WHERE id = 1000 AND event_date = '2024-01-01';
-- Date range scan
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
-- Unit and tool filter
SELECT COUNT(*) FROM rawdatacor WHERE unit_name = 'Unit1' AND tool_name_id = 'Tool1';
-- JSONB filter
SELECT COUNT(*) FROM rawdatacor WHERE measurements ? '0';
\timing off
-- 8. Identify partitions with data
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
WHERE tablename LIKE 'rawdatacor_%' OR tablename LIKE 'elabdatadisp_%'
ORDER BY tablename;
-- 9. Check for constraint violations
-- Verify unique constraints
SELECT
'rawdatacor duplicate unique key' as check_name,
COUNT(*) as duplicate_count
FROM rawdatacor
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
HAVING COUNT(*) > 1
UNION ALL
SELECT
'elabdatadisp duplicate unique key' as check_name,
COUNT(*) as duplicate_count
FROM elabdatadisp
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
HAVING COUNT(*) > 1;