fix: Change from COPY to parameterized INSERT for batch inserts

Replace cursor.copy() with cursor.executemany() for more reliable
batch inserts in PostgreSQL. The COPY method has issues with format
and data encoding in psycopg3.

Changes:
- Use executemany() with parameterized INSERT statements
- Let psycopg handle parameter escaping and encoding
- Convert JSONB dicts to JSON strings automatically
- More compatible with various data types

This ensures that data is actually being inserted into PostgreSQL
during migration, fixing the issue where data wasn't appearing in
the database after migration completed.

Fixes: Data not being persisted in PostgreSQL during migration
This commit is contained in:
2025-12-10 20:48:20 +01:00
parent e2377d4191
commit 821cda850e

View File

@@ -101,7 +101,7 @@ class PostgreSQLConnector:
rows: List[Dict[str, Any]],
columns: List[str]
) -> int:
"""Insert a batch of rows using COPY (fast bulk insert).
"""Insert a batch of rows using parameterized INSERT.
Args:
table: Table name
@@ -116,27 +116,25 @@ class PostgreSQLConnector:
try:
with self.connection.cursor() as cursor:
# Prepare COPY data
copy_data = []
# Prepare values for INSERT
values_list = []
for row in rows:
values = []
for col in columns:
val = row.get(col)
if val is None:
values.append("\\N") # NULL representation
elif isinstance(val, (dict, list)):
# Convert JSONB dicts to JSON strings
if isinstance(val, (dict, list)):
values.append(json.dumps(val))
elif isinstance(val, str):
# Escape special characters
val = val.replace("\\", "\\\\").replace("\n", "\\n").replace("\t", "\\t")
values.append(val)
else:
values.append(str(val))
copy_data.append("\t".join(values))
values.append(val)
values_list.append(tuple(values))
# Use COPY for fast insert
copy_sql = f"COPY {table} ({','.join(columns)}) FROM STDIN"
cursor.copy(copy_sql, "\n".join(copy_data).encode())
# Build parameterized INSERT query
placeholders = ",".join(["%s"] * len(columns))
insert_sql = f"INSERT INTO {table} ({','.join(columns)}) VALUES ({placeholders})"
# Execute batch insert
cursor.executemany(insert_sql, values_list)
self.connection.commit()
logger.debug(f"Inserted {len(rows)} rows into {table}")