load csv async

This commit is contained in:
2025-05-11 11:20:18 +02:00
parent 1dfb1a2efa
commit cbcadbf015
4 changed files with 27 additions and 36 deletions

View File

@@ -19,7 +19,7 @@
[csvelab] [csvelab]
logFilename = csvElab.log logFilename = csvElab.log
max_threads = 5 max_threads = 10
[db] [db]
hostname = 10.211.114.173 hostname = 10.211.114.173

View File

@@ -4,8 +4,7 @@
import mysql.connector import mysql.connector
import logging import logging
import importlib import importlib
import time import asyncio
import threading
# Import custom modules for configuration and database connection # Import custom modules for configuration and database connection
from utils.config import loader as setting from utils.config import loader as setting
@@ -16,14 +15,14 @@ from utils.database.loader_action import CSV_RECEIVED
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Function to elaborate CSV data # Function to elaborate CSV data
def load_csv(cfg: object, threads: list) -> bool: async def load_csv(cfg: object) -> bool:
try: try:
# Establish a database connection # Establish a database connection
with connetti_db(cfg) as conn: with connetti_db(cfg) as conn:
cur = conn.cursor() cur = conn.cursor()
# Select a single record from the raw data table that is not currently locked and has a status of 0 # Select a single record from the raw data table that is not currently locked and has a status of 0
cur.execute(f'select id, unit_name, unit_type, tool_name, tool_type, tool_data from {cfg.dbname}.{cfg.dbrectable} where locked = 0 and status = {CSV_RECEIVED} limit 1') cur.execute(f'select id, unit_type, tool_type from {cfg.dbname}.{cfg.dbrectable} where locked = 0 and status = {CSV_RECEIVED} limit 1')
id, unit_name, unit_type, tool_name, tool_type, tool_data = cur.fetchone() id, unit_type, tool_type = cur.fetchone()
if id: if id:
# If a record is found, lock it by updating the 'locked' field to 1 # If a record is found, lock it by updating the 'locked' field to 1
cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 1 where id = {id}') cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 1 where id = {id}')
@@ -36,27 +35,20 @@ def load_csv(cfg: object, threads: list) -> bool:
# Get the 'main_loader' function from the imported module # Get the 'main_loader' function from the imported module
funzione = getattr(modulo, "main_loader") funzione = getattr(modulo, "main_loader")
# Create a new thread to execute the 'main_loader' function with the configuration and record ID funzione(cfg, id)
thread = threading.Thread(target = funzione, args=(cfg, id))
# Add the thread to the list of active threads
threads.append(thread)
# Start the thread's execution
thread.start()
# Return True to indicate that a record was processed
return True return True
else: else:
# If no record is found, wait for 20 seconds before attempting to fetch again # If no record is found, wait for 20 seconds before attempting to fetch again
time.sleep(20) await asyncio.sleep(20)
# Return False to indicate that no record was processed # Return False to indicate that no record was processed
return False return False
except mysql.connector.Error as e: except mysql.connector.Error as e:
# Handle database connection errors # Handle database connection errors
print(f"Error: {e}")
logger.error(f'{e}') logger.error(f'{e}')
def main(): async def main():
# Load the configuration settings # Load the configuration settings
cfg = setting.Config() cfg = setting.Config()
@@ -67,30 +59,25 @@ def main():
filename=cfg.logfilename, filename=cfg.logfilename,
level=logging.INFO, level=logging.INFO,
) )
# Initialize an empty list to keep track of active threads
threads = [] # Limita il numero di esecuzioni concorrenti a max_threads
semaphore = asyncio.Semaphore(cfg.max_threads)
# Enter an infinite loop to continuously process records # Enter an infinite loop to continuously process records
while True: while True:
# Check if the number of active threads exceeds the maximum allowed async with semaphore:
while len(threads) > cfg.max_threads: try:
# Iterate through the list of threads await asyncio.create_task(load_csv(cfg))
for thread in threads:
# If a thread is no longer alive (has finished execution) except Exception as e:
if not thread.is_alive(): logger.error(f"Error: {e}.")
# Remove it from the list of active threads
threads.remove(thread)
# Attempt to process a CSV record
if load_csv(cfg, threads):
# If a record was successfully processed, log the number of threads currently running
logger.info(f"Threads in execution: {len(threads)}")
pass
except KeyboardInterrupt: except KeyboardInterrupt:
# Handle a keyboard interrupt (e.g., Ctrl+C) to gracefully shut down the program # Handle a keyboard interrupt (e.g., Ctrl+C) to gracefully shut down the program
logger.info("Info: Shutdown requested...exiting") logger.info("Info: Shutdown requested...exiting")
except Exception as e: except Exception as e:
logger.info(f"Error: {e}.") logger.error(f"Error: {e}.")
if __name__ == "__main__": if __name__ == "__main__":
main() asyncio.run(main())

View File

@@ -32,7 +32,7 @@ class Config:
self.dbuser = c.get("db", "user") self.dbuser = c.get("db", "user")
self.dbpass = c.get("db", "password") self.dbpass = c.get("db", "password")
self.dbname = c.get("db", "dbName") self.dbname = c.get("db", "dbName")
self.dbschema = c.get("db", "dbSchema") #self.dbschema = c.get("db", "dbSchema")
self.dbusertable = c.get("db", "userTableName") self.dbusertable = c.get("db", "userTableName")
self.dbrectable = c.get("db", "recTableName") self.dbrectable = c.get("db", "recTableName")
self.dbrawdata = c.get("db", "rawTableName") self.dbrawdata = c.get("db", "rawTableName")

View File

@@ -8,6 +8,9 @@ CSV_RECEIVED = 0
DATA_LOADED = 1 DATA_LOADED = 1
DATA_ELABORATED = 2 DATA_ELABORATED = 2
timestamp_cols = ['inserted_at', 'loaded_at', 'elaborated_at']
def load_data(cfg: object, matrice_valori: list) -> bool : def load_data(cfg: object, matrice_valori: list) -> bool :
sql_insert_RAWDATA = f''' sql_insert_RAWDATA = f'''
INSERT IGNORE INTO {cfg.dbname}.{cfg.dbrawdata} ( INSERT IGNORE INTO {cfg.dbname}.{cfg.dbrawdata} (
@@ -42,8 +45,9 @@ def update_status(cfg: object, id: int, status: int) -> None:
with connetti_db(cfg) as conn: with connetti_db(cfg) as conn:
cur = conn.cursor() cur = conn.cursor()
try: try:
cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 0, status = {status} where id = {id}') cur.execute(f'update {cfg.dbname}.{cfg.dbrectable} set locked = 0, status = {status}, {timestamp_cols[status]} = now() where id = {id}')
conn.commit() conn.commit()
logging.info("Status updated.")
except Exception as e: except Exception as e:
conn.rollback() conn.rollback()
logging.error(f'Error: {e}') logging.error(f'Error: {e}')