ASE/src/elab_orchestrator.py

#!.venv/bin/python
"""
Orchestratore dei worker che lanciano le elaborazioni
"""

# Import necessary libraries
import logging
import asyncio

# Import custom modules for configuration and database connection
from utils.config import loader_matlab_elab as setting
from utils.database import WorkflowFlags
from utils.database.action_query import get_tool_info
from utils.csv.loaders import get_next_csv_atomic
from utils.orchestrator_utils import run_orchestrator, worker_context
from utils.database.loader_action import update_status, unlock
from utils.connect.send_email import send_error_email
from utils.general import read_error_lines_from_logs

# Initialize the logger for this module
logger = logging.getLogger()

# Delay tra un processamento CSV e il successivo (in secondi)
ELAB_PROCESSING_DELAY = 0.2
# Tempo di attesa se non ci sono record da elaborare
NO_RECORD_SLEEP = 60


async def worker(worker_id: int, cfg: object, pool: object) -> None:
    """Esegue il ciclo di lavoro per l'elaborazione dei dati caricati.

    Il worker preleva un record dal database che indica dati pronti per
    l'elaborazione, esegue un comando Matlab associato e attende
    prima di iniziare un nuovo ciclo.

    Args:
        worker_id (int): L'ID univoco del worker.
        cfg (object): L'oggetto di configurazione.
        pool (object): Il pool di connessioni al database.
    """
    # Imposta il context per questo worker
    worker_context.set(f"W{worker_id:02d}")

    debug_mode = logging.getLogger().getEffectiveLevel() == logging.DEBUG
    logger.info("Avviato")

    while True:
        try:
            logger.info("Inizio elaborazione")
            record = await get_next_csv_atomic(pool, cfg.dbrectable, WorkflowFlags.DATA_LOADED, WorkflowFlags.DATA_ELABORATED)
            if record:
                rec_id, _, tool_type, unit_name, tool_name = [x.lower().replace(" ", "_") if isinstance(x, str) else x for x in record]
                if tool_type.lower() != "gd": # i tool GD non devono essere elaborati ???
                    tool_elab_info = await get_tool_info(WorkflowFlags.DATA_ELABORATED, unit_name.upper(), tool_name.upper(), pool)
                    if tool_elab_info:
                        if tool_elab_info['statustools'].lower() in cfg.elab_status:
                            logger.info("Elaborazione ID %s per %s %s", rec_id, unit_name, tool_name)
                            await update_status(cfg, rec_id, WorkflowFlags.START_ELAB, pool)
                            matlab_cmd = f"timeout {cfg.matlab_timeout} ./run_{tool_elab_info['matcall']}.sh {cfg.matlab_runtime} {unit_name.upper()} {tool_name.upper()}"
                            proc = await asyncio.create_subprocess_shell(
                                matlab_cmd,
                                cwd=cfg.matlab_func_path,
                                stdout=asyncio.subprocess.PIPE,
                                stderr=asyncio.subprocess.PIPE
                            )

                            stdout, stderr = await proc.communicate()

                            if proc.returncode != 0:
                                logger.error("Errore durante l'elaborazione")
                                logger.error(stderr.decode().strip())

                                if proc.returncode == 124:
                                    error_type = f"Matlab elab excessive duration: killed after {cfg.matlab_timeout} seconds."
                                else:
                                    error_type = f"Matlab elab failed: {proc.returncode}."

                                # da verificare i log dove prenderli
                                # with open(f"{cfg.matlab_error_path}{unit_name}{tool_name}_output_error.txt", "w") as f:
                                #    f.write(stderr.decode().strip())
                                # errors = [line for line in stderr.decode().strip() if line.startswith("Error")]
                                # warnings = [line for line in stderr.decode().strip() if not line.startswith("Error")]

                                errors, warnings = await read_error_lines_from_logs(cfg.matlab_error_path, f"_{unit_name}_{tool_name}*_*_output_error.txt")
                                await send_error_email(unit_name.upper(), tool_name.upper(), tool_elab_info['matcall'], error_type, errors, warnings)


                            else:
                                logger.info(stdout.decode().strip())
                                await update_status(cfg, rec_id, WorkflowFlags.DATA_ELABORATED, pool)
                            await unlock(cfg, rec_id, pool)
                            await asyncio.sleep(ELAB_PROCESSING_DELAY)
                        else:
                            logger.info("ID %s %s - %s %s: MatLab calc by-passed.", rec_id, unit_name, tool_name, tool_elab_info['statustools'])
                            await update_status(cfg, rec_id, WorkflowFlags.DATA_ELABORATED, pool)
                            await update_status(cfg, rec_id, WorkflowFlags.DUMMY_ELABORATED, pool)
                            await unlock(cfg, rec_id, pool)
                else:
                    await update_status(cfg, rec_id, WorkflowFlags.DATA_ELABORATED, pool)
                    await update_status(cfg, rec_id, WorkflowFlags.DUMMY_ELABORATED, pool)
                    await unlock(cfg, rec_id, pool)

            else:
                logger.info("Nessun record disponibile")
                await asyncio.sleep(NO_RECORD_SLEEP)

        except Exception as e: # pylint: disable=broad-except
            logger.error("Errore durante l'esecuzione: %s", e, exc_info=debug_mode)
            await asyncio.sleep(1)


async def main():
    """Funzione principale che avvia l'elab_orchestrator."""
    await run_orchestrator(setting.Config, worker)

if __name__ == "__main__":
    asyncio.run(main())