load csv async

This commit is contained in:
2025-05-11 11:20:18 +02:00
parent 1dfb1a2efa
commit cbcadbf015
4 changed files with 27 additions and 36 deletions

View File

@@ -19,7 +19,7 @@
[csvelab]
logFilename = csvElab.log
max_threads = 5
max_threads = 10
[db]
hostname = 10.211.114.173

View File

@@ -4,8 +4,7 @@
import mysql.connector
import logging
import importlib
import time
import threading
import asyncio
# Import custom modules for configuration and database connection
from utils.config import loader as setting
@@ -16,14 +15,14 @@ from utils.database.loader_action import CSV_RECEIVED
logger = logging.getLogger(__name__)
# Function to elaborate CSV data
def load_csv(cfg: object, threads: list) -> bool:
async def load_csv(cfg: object) -> bool:
try:
# Establish a database connection
with connetti_db(cfg) as conn:
cur = conn.cursor()
# Select a single record from the raw data table that is not currently locked and has a status of 0
cur.execute(f'select id, unit_name, unit_type, tool_name, tool_type, tool_data from {cfg.dbname}.{cfg.dbrectable} where locked = 0 and status = {CSV_RECEIVED} limit 1')
id, unit_name, unit_type, tool_name, tool_type, tool_data = cur.fetchone()
cur.execute(f'select id, unit_type, tool_type from {cfg.dbname}.{cfg.dbrectable} where locked = 0 and status = {CSV_RECEIVED} limit 1')
id, unit_type, tool_type = cur.fetchone()
if id:
# If a record is found, lock it by updating the 'locked' field to 1
cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 1 where id = {id}')
@@ -36,27 +35,20 @@ def load_csv(cfg: object, threads: list) -> bool:
# Get the 'main_loader' function from the imported module
funzione = getattr(modulo, "main_loader")
# Create a new thread to execute the 'main_loader' function with the configuration and record ID
thread = threading.Thread(target = funzione, args=(cfg, id))
# Add the thread to the list of active threads
threads.append(thread)
# Start the thread's execution
thread.start()
# Return True to indicate that a record was processed
funzione(cfg, id)
return True
else:
# If no record is found, wait for 20 seconds before attempting to fetch again
time.sleep(20)
await asyncio.sleep(20)
# Return False to indicate that no record was processed
return False
except mysql.connector.Error as e:
# Handle database connection errors
print(f"Error: {e}")
logger.error(f'{e}')
def main():
async def main():
# Load the configuration settings
cfg = setting.Config()
@@ -67,30 +59,25 @@ def main():
filename=cfg.logfilename,
level=logging.INFO,
)
# Initialize an empty list to keep track of active threads
threads = []
# Limita il numero di esecuzioni concorrenti a max_threads
semaphore = asyncio.Semaphore(cfg.max_threads)
# Enter an infinite loop to continuously process records
while True:
# Check if the number of active threads exceeds the maximum allowed
while len(threads) > cfg.max_threads:
# Iterate through the list of threads
for thread in threads:
# If a thread is no longer alive (has finished execution)
if not thread.is_alive():
# Remove it from the list of active threads
threads.remove(thread)
# Attempt to process a CSV record
if load_csv(cfg, threads):
# If a record was successfully processed, log the number of threads currently running
logger.info(f"Threads in execution: {len(threads)}")
pass
async with semaphore:
try:
await asyncio.create_task(load_csv(cfg))
except Exception as e:
logger.error(f"Error: {e}.")
except KeyboardInterrupt:
# Handle a keyboard interrupt (e.g., Ctrl+C) to gracefully shut down the program
logger.info("Info: Shutdown requested...exiting")
except Exception as e:
logger.info(f"Error: {e}.")
logger.error(f"Error: {e}.")
if __name__ == "__main__":
main()
asyncio.run(main())

View File

@@ -32,7 +32,7 @@ class Config:
self.dbuser = c.get("db", "user")
self.dbpass = c.get("db", "password")
self.dbname = c.get("db", "dbName")
self.dbschema = c.get("db", "dbSchema")
#self.dbschema = c.get("db", "dbSchema")
self.dbusertable = c.get("db", "userTableName")
self.dbrectable = c.get("db", "recTableName")
self.dbrawdata = c.get("db", "rawTableName")

View File

@@ -8,6 +8,9 @@ CSV_RECEIVED = 0
DATA_LOADED = 1
DATA_ELABORATED = 2
timestamp_cols = ['inserted_at', 'loaded_at', 'elaborated_at']
def load_data(cfg: object, matrice_valori: list) -> bool :
sql_insert_RAWDATA = f'''
INSERT IGNORE INTO {cfg.dbname}.{cfg.dbrawdata} (
@@ -42,8 +45,9 @@ def update_status(cfg: object, id: int, status: int) -> None:
with connetti_db(cfg) as conn:
cur = conn.cursor()
try:
cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 0, status = {status} where id = {id}')
cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 0, status = {status}, {timestamp_cols[status]} = now() where id = {id}')
conn.commit()
logging.info("Status updated.")
except Exception as e:
conn.rollback()
logging.error(f'Error: {e}')