fix: Change from COPY to parameterized INSERT for batch inserts
Replace cursor.copy() with cursor.executemany() for more reliable batch inserts in PostgreSQL. The COPY method has issues with format and data encoding in psycopg3. Changes: - Use executemany() with parameterized INSERT statements - Let psycopg handle parameter escaping and encoding - Convert JSONB dicts to JSON strings automatically - More compatible with various data types This ensures that data is actually being inserted into PostgreSQL during migration, fixing the issue where data wasn't appearing in the database after migration completed. Fixes: Data not being persisted in PostgreSQL during migration
This commit is contained in:
@@ -101,7 +101,7 @@ class PostgreSQLConnector:
|
|||||||
rows: List[Dict[str, Any]],
|
rows: List[Dict[str, Any]],
|
||||||
columns: List[str]
|
columns: List[str]
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Insert a batch of rows using COPY (fast bulk insert).
|
"""Insert a batch of rows using parameterized INSERT.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
table: Table name
|
table: Table name
|
||||||
@@ -116,27 +116,25 @@ class PostgreSQLConnector:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with self.connection.cursor() as cursor:
|
with self.connection.cursor() as cursor:
|
||||||
# Prepare COPY data
|
# Prepare values for INSERT
|
||||||
copy_data = []
|
values_list = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
values = []
|
values = []
|
||||||
for col in columns:
|
for col in columns:
|
||||||
val = row.get(col)
|
val = row.get(col)
|
||||||
if val is None:
|
# Convert JSONB dicts to JSON strings
|
||||||
values.append("\\N") # NULL representation
|
if isinstance(val, (dict, list)):
|
||||||
elif isinstance(val, (dict, list)):
|
|
||||||
values.append(json.dumps(val))
|
values.append(json.dumps(val))
|
||||||
elif isinstance(val, str):
|
|
||||||
# Escape special characters
|
|
||||||
val = val.replace("\\", "\\\\").replace("\n", "\\n").replace("\t", "\\t")
|
|
||||||
values.append(val)
|
|
||||||
else:
|
else:
|
||||||
values.append(str(val))
|
values.append(val)
|
||||||
copy_data.append("\t".join(values))
|
values_list.append(tuple(values))
|
||||||
|
|
||||||
# Use COPY for fast insert
|
# Build parameterized INSERT query
|
||||||
copy_sql = f"COPY {table} ({','.join(columns)}) FROM STDIN"
|
placeholders = ",".join(["%s"] * len(columns))
|
||||||
cursor.copy(copy_sql, "\n".join(copy_data).encode())
|
insert_sql = f"INSERT INTO {table} ({','.join(columns)}) VALUES ({placeholders})"
|
||||||
|
|
||||||
|
# Execute batch insert
|
||||||
|
cursor.executemany(insert_sql, values_list)
|
||||||
self.connection.commit()
|
self.connection.commit()
|
||||||
|
|
||||||
logger.debug(f"Inserted {len(rows)} rows into {table}")
|
logger.debug(f"Inserted {len(rows)} rows into {table}")
|
||||||
|
|||||||
Reference in New Issue
Block a user