Files
mysql2postgres/scripts/validate_migration.sql
alex 4d72d2a42e chore: Add validation queries for default timestamp records
Added queries to identify and sample records with default timestamp
(1970-01-01 00:00:00) which resulted from invalid MySQL dates during
migration. These records need date recovery from the MySQL source.

Queries:
- Count records with default timestamp in both tables
- Sample first 10 records from rawdatacor with default timestamp

These will help quantify the scope of date recovery work needed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-23 20:47:01 +01:00

111 lines
3.3 KiB
SQL

-- Validation queries for PostgreSQL after migration
-- 1. Verify row counts match between MySQL and PostgreSQL
-- Run this on both databases and compare
-- PostgreSQL queries:
SELECT 'rawdatacor' as table_name, COUNT(*) as row_count FROM rawdatacor
UNION ALL
SELECT 'elabdatadisp' as table_name, COUNT(*) as row_count FROM elabdatadisp;
-- 2. Check for NULL values in JSONB (should be empty)
SELECT 'rawdatacor with NULL measurements' as check_name, COUNT(*) as count
FROM rawdatacor WHERE measurements IS NULL
UNION ALL
SELECT 'elabdatadisp with NULL measurements' as check_name, COUNT(*) as count
FROM elabdatadisp WHERE measurements IS NULL;
-- 3. Verify date range coverage
SELECT
'rawdatacor dates' as table_name,
MIN(event_date) as min_date,
MAX(event_date) as max_date,
COUNT(DISTINCT event_date) as distinct_dates
FROM rawdatacor
UNION ALL
SELECT
'elabdatadisp dates' as table_name,
MIN(event_date) as min_date,
MAX(event_date) as max_date,
COUNT(DISTINCT event_date) as distinct_dates
FROM elabdatadisp;
-- 4. Verify partitions are in use
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
-- 5. Check JSONB structure samples
-- RAWDATACOR
SELECT measurements FROM rawdatacor WHERE measurements IS NOT NULL LIMIT 1;
-- ELABDATADISP
SELECT measurements FROM elabdatadisp WHERE measurements IS NOT NULL LIMIT 1;
-- 6. Verify indexes exist
SELECT schemaname, tablename, indexname
FROM pg_indexes
WHERE tablename IN ('rawdatacor', 'elabdatadisp')
ORDER BY tablename, indexname;
-- 7. Performance: Simple queries
\timing on
-- Single row by primary key
SELECT * FROM rawdatacor WHERE id = 1000 AND event_date = '2024-01-01';
-- Date range scan
SELECT COUNT(*) FROM rawdatacor WHERE event_date >= '2024-01-01' AND event_date < '2024-12-31';
-- Unit and tool filter
SELECT COUNT(*) FROM rawdatacor WHERE unit_name = 'Unit1' AND tool_name_id = 'Tool1';
-- JSONB filter
SELECT COUNT(*) FROM rawdatacor WHERE measurements ? '0';
\timing off
-- 8. Identify partitions with data
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
WHERE tablename LIKE 'rawdatacor_%' OR tablename LIKE 'elabdatadisp_%'
ORDER BY tablename;
-- 9. Check for constraint violations
-- Verify unique constraints
SELECT
'rawdatacor duplicate unique key' as check_name,
COUNT(*) as duplicate_count
FROM rawdatacor
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
HAVING COUNT(*) > 1
UNION ALL
SELECT
'elabdatadisp duplicate unique key' as check_name,
COUNT(*) as duplicate_count
FROM elabdatadisp
GROUP BY unit_name, tool_name_id, node_num, event_date, event_time
HAVING COUNT(*) > 1;
-- 10. Identify records with invalid/default timestamps (1970-01-01)
-- These need date recovery from MySQL
SELECT
'rawdatacor with default timestamp' as table_name,
COUNT(*) as record_count
FROM rawdatacor
WHERE event_timestamp = '1970-01-01 00:00:00'
UNION ALL
SELECT
'elabdatadisp with default timestamp' as table_name,
COUNT(*) as record_count
FROM elabdatadisp
WHERE event_timestamp = '1970-01-01 00:00:00';
-- Sample records with default timestamp (first 10 from rawdatacor)
SELECT id, unit_name, tool_name_id, event_timestamp, measurements
FROM rawdatacor
WHERE event_timestamp = '1970-01-01 00:00:00'
LIMIT 10;