docs: Add comprehensive documentation and helper scripts
Add: - QUICKSTART.md: 5-minute quick start guide with examples - scripts/incus_setup.sh: Automated PostgreSQL container setup - scripts/validate_migration.sql: SQL validation queries - scripts/setup_cron.sh: Cron job setup for incremental migrations - tests/test_setup.py: Unit tests for configuration and transformation - install.sh: Quick installation script Documentation includes: - Step-by-step setup instructions - Example queries for RAWDATACOR and ELABDATADISP - Troubleshooting guide - Performance optimization tips 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
52
scripts/incus_setup.sh
Executable file
52
scripts/incus_setup.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
# Script per setup PostgreSQL in container Incus
|
||||
|
||||
set -e
|
||||
|
||||
CONTAINER_NAME=${1:-pg-server}
|
||||
POSTGRES_PASSWORD=${2:-postgres}
|
||||
|
||||
echo "Creating Incus container: $CONTAINER_NAME"
|
||||
|
||||
# Creare container
|
||||
incus launch images:ubuntu/22.04 "$CONTAINER_NAME" --wait
|
||||
|
||||
echo "Installing PostgreSQL..."
|
||||
|
||||
# Installare PostgreSQL
|
||||
incus exec "$CONTAINER_NAME" -- apt update
|
||||
incus exec "$CONTAINER_NAME" -- apt install -y postgresql postgresql-contrib
|
||||
|
||||
echo "Starting PostgreSQL..."
|
||||
|
||||
# Avviare PostgreSQL
|
||||
incus exec "$CONTAINER_NAME" -- systemctl start postgresql
|
||||
incus exec "$CONTAINER_NAME" -- systemctl enable postgresql
|
||||
|
||||
# Impostare password postgres
|
||||
incus exec "$CONTAINER_NAME" -- sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD '$POSTGRES_PASSWORD';"
|
||||
|
||||
# Permettere connessioni TCP
|
||||
incus exec "$CONTAINER_NAME" -- bash -c "
|
||||
echo \"host all all 0.0.0.0/0 md5\" >> /etc/postgresql/14/main/pg_hba.conf
|
||||
sed -i \"s/#listen_addresses = 'localhost'/listen_addresses = '*'/\" /etc/postgresql/14/main/postgresql.conf
|
||||
"
|
||||
|
||||
# Riavviare PostgreSQL
|
||||
incus exec "$CONTAINER_NAME" -- systemctl restart postgresql
|
||||
|
||||
# Ottenere IP
|
||||
IP=$(incus list "$CONTAINER_NAME" -c4 | tail -n1 | awk '{print $1}')
|
||||
|
||||
echo ""
|
||||
echo "✓ PostgreSQL is running!"
|
||||
echo ""
|
||||
echo "Connection details:"
|
||||
echo " Host: $IP"
|
||||
echo " Port: 5432"
|
||||
echo " User: postgres"
|
||||
echo " Password: $POSTGRES_PASSWORD"
|
||||
echo ""
|
||||
echo "Update .env file with:"
|
||||
echo " POSTGRES_HOST=$IP"
|
||||
echo " POSTGRES_PASSWORD=$POSTGRES_PASSWORD"
|
||||
37
scripts/setup_cron.sh
Executable file
37
scripts/setup_cron.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
# Setup cron job for incremental migration
|
||||
|
||||
PROJECT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
||||
VENV_PYTHON="$PROJECT_DIR/venv/bin/python"
|
||||
LOG_FILE="$PROJECT_DIR/migration_$(date +%Y%m%d).log"
|
||||
|
||||
# Create cron job entry
|
||||
CRON_ENTRY="0 */6 * * * cd $PROJECT_DIR && $VENV_PYTHON main.py migrate incremental >> $LOG_FILE 2>&1"
|
||||
|
||||
echo "Cron job to be added:"
|
||||
echo "$CRON_ENTRY"
|
||||
echo ""
|
||||
echo "This will run incremental migration every 6 hours."
|
||||
echo ""
|
||||
|
||||
# Check if already exists
|
||||
if crontab -l 2>/dev/null | grep -q "migrate incremental"; then
|
||||
echo "⚠ Cron job already exists"
|
||||
echo ""
|
||||
echo "Current cron jobs:"
|
||||
crontab -l | grep -v '^#' | grep -v '^$'
|
||||
else
|
||||
echo "Add to crontab? (y/n)"
|
||||
read -r response
|
||||
|
||||
if [ "$response" = "y" ]; then
|
||||
# Add cron job
|
||||
(crontab -l 2>/dev/null; echo "$CRON_ENTRY") | crontab -
|
||||
echo "✓ Cron job added successfully"
|
||||
echo ""
|
||||
echo "Verify with: crontab -l"
|
||||
echo "View logs: tail -f migration_*.log"
|
||||
else
|
||||
echo "Cron job not added"
|
||||
fi
|
||||
fi
|
||||
90
scripts/validate_migration.sql
Normal file
90
scripts/validate_migration.sql
Normal file
@@ -0,0 +1,90 @@
|
||||
-- Validation queries for PostgreSQL after migration
|
||||
|
||||
-- 1. Verify row counts match between MySQL and PostgreSQL
|
||||
-- Run this on both databases and compare
|
||||
|
||||
-- PostgreSQL queries:
|
||||
SELECT 'rawdatacor' as table_name, COUNT(*) as row_count FROM rawdatacor
|
||||
UNION ALL
|
||||
SELECT 'elabdatadisp' as table_name, COUNT(*) as row_count FROM elabdatadisp;
|
||||
|
||||
-- 2. Check for NULL values in JSONB (should be empty)
|
||||
SELECT 'rawdatacor with NULL measurements' as check_name, COUNT(*) as count
|
||||
FROM rawdatacor WHERE measurements IS NULL
|
||||
UNION ALL
|
||||
SELECT 'elabdatadisp with NULL measurements' as check_name, COUNT(*) as count
|
||||
FROM elabdatadisp WHERE measurements IS NULL;
|
||||
|
||||
-- 3. Verify date range coverage
|
||||
SELECT
|
||||
'rawdatacor dates' as table_name,
|
||||
MIN(event_date) as min_date,
|
||||
MAX(event_date) as max_date,
|
||||
COUNT(DISTINCT event_date) as distinct_dates
|
||||
FROM rawdatacor
|
||||
UNION ALL
|
||||
SELECT
|
||||
'elabdatadisp dates' as table_name,
|
||||
MIN(event_date) as min_date,
|
||||
MAX(event_date) as max_date,
|
||||
COUNT(DISTINCT event_date) as distinct_dates
|
||||
FROM elabdatadisp;
|
||||
|
||||
-- 4. Verify partitions are in use
|
||||
EXPLAIN (ANALYZE, BUFFERS)
|
||||
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
|
||||
|
||||
-- 5. Check JSONB structure samples
|
||||
-- RAWDATACOR
|
||||
SELECT measurements FROM rawdatacor WHERE measurements IS NOT NULL LIMIT 1;
|
||||
|
||||
-- ELABDATADISP
|
||||
SELECT measurements FROM elabdatadisp WHERE measurements IS NOT NULL LIMIT 1;
|
||||
|
||||
-- 6. Verify indexes exist
|
||||
SELECT schemaname, tablename, indexname
|
||||
FROM pg_indexes
|
||||
WHERE tablename IN ('rawdatacor', 'elabdatadisp')
|
||||
ORDER BY tablename, indexname;
|
||||
|
||||
-- 7. Performance: Simple queries
|
||||
\timing on
|
||||
|
||||
-- Single row by primary key
|
||||
SELECT * FROM rawdatacor WHERE id = 1000 AND event_date = '2024-01-01';
|
||||
|
||||
-- Date range scan
|
||||
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
|
||||
|
||||
-- Unit and tool filter
|
||||
SELECT COUNT(*) FROM rawdatacor WHERE unit_name = 'Unit1' AND tool_name_id = 'Tool1';
|
||||
|
||||
-- JSONB filter
|
||||
SELECT COUNT(*) FROM rawdatacor WHERE measurements ? '0';
|
||||
|
||||
\timing off
|
||||
|
||||
-- 8. Identify partitions with data
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
|
||||
FROM pg_tables
|
||||
WHERE tablename LIKE 'rawdatacor_%' OR tablename LIKE 'elabdatadisp_%'
|
||||
ORDER BY tablename;
|
||||
|
||||
-- 9. Check for constraint violations
|
||||
-- Verify unique constraints
|
||||
SELECT
|
||||
'rawdatacor duplicate unique key' as check_name,
|
||||
COUNT(*) as duplicate_count
|
||||
FROM rawdatacor
|
||||
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
|
||||
HAVING COUNT(*) > 1
|
||||
UNION ALL
|
||||
SELECT
|
||||
'elabdatadisp duplicate unique key' as check_name,
|
||||
COUNT(*) as duplicate_count
|
||||
FROM elabdatadisp
|
||||
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
|
||||
HAVING COUNT(*) > 1;
|
||||
Reference in New Issue
Block a user