ASE/src/refactory_scripts/loaders/hirpinia_loader.py

"""
Hirpinia data loader - Refactored version with async support.

This script processes Hirpinia ODS files and loads data into the database.
Replaces the legacy hirpiniaLoadScript.py with modern async/await patterns.
"""

import asyncio
import logging
import sys
from datetime import datetime
from pathlib import Path

import ezodf

from refactory_scripts.config import DatabaseConfig
from refactory_scripts.utils import execute_many, execute_query, get_db_connection

logger = logging.getLogger(__name__)


class HirpiniaLoader:
    """Loads Hirpinia sensor data from ODS files into the database."""

    def __init__(self, db_config: DatabaseConfig):
        """
        Initialize the Hirpinia loader.

        Args:
            db_config: Database configuration object
        """
        self.db_config = db_config
        self.conn = None

    async def __aenter__(self):
        """Async context manager entry."""
        self.conn = await get_db_connection(self.db_config.as_dict())
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self.conn:
            self.conn.close()

    def _extract_metadata(self, file_path: Path) -> tuple[str, str]:
        """
        Extract unit name and tool name from file path.

        Args:
            file_path: Path to the ODS file

        Returns:
            Tuple of (unit_name, tool_name)
        """
        folder_path = file_path.parent
        unit_name = folder_path.name

        file_name = file_path.stem  # Filename without extension
        tool_name = file_name.replace("HIRPINIA_", "")
        tool_name = tool_name.split("_")[0]

        logger.debug(f"Extracted metadata - Unit: {unit_name}, Tool: {tool_name}")
        return unit_name, tool_name

    def _parse_ods_file(self, file_path: Path, unit_name: str, tool_name: str) -> list[tuple]:
        """
        Parse ODS file and extract raw data.

        Args:
            file_path: Path to the ODS file
            unit_name: Unit name
            tool_name: Tool name

        Returns:
            List of tuples ready for database insertion
        """
        data_rows = []
        doc = ezodf.opendoc(str(file_path))

        for sheet in doc.sheets:
            node_num = sheet.name.replace("S-", "")
            logger.debug(f"Processing sheet: {sheet.name} (Node: {node_num})")

            rows_to_skip = 2  # Skip header rows

            for i, row in enumerate(sheet.rows()):
                if i < rows_to_skip:
                    continue

                row_data = [cell.value for cell in row]

                # Parse datetime
                try:
                    dt = datetime.strptime(row_data[0], "%Y-%m-%dT%H:%M:%S")
                    date = dt.strftime("%Y-%m-%d")
                    time = dt.strftime("%H:%M:%S")
                except (ValueError, TypeError) as e:
                    logger.warning(f"Failed to parse datetime in row {i}: {row_data[0]} - {e}")
                    continue

                # Extract values
                val0 = row_data[2] if len(row_data) > 2 else None
                val1 = row_data[4] if len(row_data) > 4 else None
                val2 = row_data[6] if len(row_data) > 6 else None
                val3 = row_data[8] if len(row_data) > 8 else None

                # Create tuple for database insertion
                data_rows.append((unit_name, tool_name, node_num, date, time, -1, -273, val0, val1, val2, val3))

        logger.info(f"Parsed {len(data_rows)} data rows from {file_path.name}")
        return data_rows

    async def _insert_raw_data(self, data_rows: list[tuple]) -> int:
        """
        Insert raw data into the database.

        Args:
            data_rows: List of data tuples

        Returns:
            Number of rows inserted
        """
        if not data_rows:
            logger.warning("No data rows to insert")
            return 0

        query = """
            INSERT IGNORE INTO RAWDATACOR
            (UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0, Val1, Val2, Val3)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """

        rows_affected = await execute_many(self.conn, query, data_rows)
        logger.info(f"Inserted {rows_affected} rows into RAWDATACOR")

        return rows_affected

    async def _get_matlab_function(self, unit_name: str, tool_name: str) -> str | None:
        """
        Get the MATLAB function name for this unit/tool combination.

        Args:
            unit_name: Unit name
            tool_name: Tool name

        Returns:
            MATLAB function name or None if not found
        """
        query = """
            SELECT m.matcall
            FROM tools AS t
            JOIN units AS u ON u.id = t.unit_id
            JOIN matfuncs AS m ON m.id = t.matfunc
            WHERE u.name = %s AND t.name = %s
        """

        result = await execute_query(self.conn, query, (unit_name, tool_name), fetch_one=True)

        if result and result.get("matcall"):
            matlab_func = result["matcall"]
            logger.info(f"MATLAB function found: {matlab_func}")
            return matlab_func

        logger.warning(f"No MATLAB function found for {unit_name}/{tool_name}")
        return None

    async def process_file(self, file_path: str | Path, trigger_matlab: bool = True) -> bool:
        """
        Process a Hirpinia ODS file and load data into the database.

        Args:
            file_path: Path to the ODS file to process
            trigger_matlab: Whether to trigger MATLAB elaboration after loading

        Returns:
            True if processing was successful, False otherwise
        """
        file_path = Path(file_path)

        if not file_path.exists():
            logger.error(f"File not found: {file_path}")
            return False

        if file_path.suffix.lower() not in [".ods"]:
            logger.error(f"Invalid file type: {file_path.suffix}. Expected .ods")
            return False

        try:
            # Extract metadata
            unit_name, tool_name = self._extract_metadata(file_path)

            # Parse ODS file
            data_rows = self._parse_ods_file(file_path, unit_name, tool_name)

            # Insert data
            rows_inserted = await self._insert_raw_data(data_rows)

            if rows_inserted > 0:
                logger.info(f"Successfully loaded {rows_inserted} rows from {file_path.name}")

                # Optionally trigger MATLAB elaboration
                if trigger_matlab:
                    matlab_func = await self._get_matlab_function(unit_name, tool_name)
                    if matlab_func:
                        logger.warning(
                            f"MATLAB elaboration would be triggered: {matlab_func} for {unit_name}/{tool_name}"
                        )
                        logger.warning("Note: Direct MATLAB execution not implemented in refactored version")
                        # In production, this should integrate with elab_orchestrator instead
                        # of calling MATLAB directly via os.system()

                return True
            else:
                logger.warning(f"No new rows inserted from {file_path.name}")
                return False

        except Exception as e:
            logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
            return False


async def main(file_path: str):
    """
    Main entry point for the Hirpinia loader.

    Args:
        file_path: Path to the ODS file to process
    """
    # Setup logging
    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger.info("Hirpinia Loader started")
    logger.info(f"Processing file: {file_path}")

    try:
        # Load configuration
        db_config = DatabaseConfig()

        # Process file
        async with HirpiniaLoader(db_config) as loader:
            success = await loader.process_file(file_path)

        if success:
            logger.info("Processing completed successfully")
            return 0
        else:
            logger.error("Processing failed")
            return 1

    except Exception as e:
        logger.error(f"Unexpected error: {e}", exc_info=True)
        return 1

    finally:
        logger.info("Hirpinia Loader finished")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python hirpinia_loader.py <path_to_ods_file>")
        sys.exit(1)

    exit_code = asyncio.run(main(sys.argv[1]))
    sys.exit(exit_code)