ASE/src/refactory_scripts/loaders/sorotec_loader.py

"""
Sorotec Pini data loader - Refactored version with async support.

This script processes Sorotec Pini CSV files and loads multi-channel sensor data.
Handles two different file formats (_1_ and _2_) with different channel mappings.
Replaces the legacy sorotecPini.py with modern async/await patterns.
"""

import asyncio
import logging
import sys
from pathlib import Path

from refactory_scripts.config import DatabaseConfig
from refactory_scripts.utils import execute_many, get_db_connection

logger = logging.getLogger(__name__)


class SorotecLoader:
    """Loads Sorotec Pini multi-channel sensor data from CSV files."""

    # File type identifiers
    FILE_TYPE_1 = "_1_"
    FILE_TYPE_2 = "_2_"

    # Default values
    DEFAULT_TEMPERATURE = -273
    DEFAULT_UNIT_NAME = "ID0247"
    DEFAULT_TOOL_NAME = "DT0001"

    # Channel mappings for File Type 1 (nodes 1-26)
    CHANNELS_TYPE_1 = list(range(1, 27))  # Nodes 1 to 26

    # Channel mappings for File Type 2 (selective nodes)
    CHANNELS_TYPE_2 = [41, 42, 43, 44, 49, 50, 51, 52, 56, 57, 58, 59, 60, 61, 62]  # 15 nodes

    def __init__(self, db_config: DatabaseConfig):
        """
        Initialize the Sorotec loader.

        Args:
            db_config: Database configuration object
        """
        self.db_config = db_config
        self.conn = None

    async def __aenter__(self):
        """Async context manager entry."""
        self.conn = await get_db_connection(self.db_config.as_dict())
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        if self.conn:
            self.conn.close()

    def _extract_metadata(self, file_path: Path) -> tuple[str, str]:
        """
        Extract unit name and tool name from file path.

        For Sorotec, metadata is determined by folder name.

        Args:
            file_path: Path to the CSV file

        Returns:
            Tuple of (unit_name, tool_name)
        """
        # Get folder name (second to last part of path)
        folder_name = file_path.parent.name

        # Currently hardcoded for ID0247
        # TODO: Make this configurable if more units are added
        if folder_name == "ID0247":
            unit_name = self.DEFAULT_UNIT_NAME
            tool_name = self.DEFAULT_TOOL_NAME
        else:
            logger.warning(f"Unknown folder: {folder_name}, using defaults")
            unit_name = self.DEFAULT_UNIT_NAME
            tool_name = self.DEFAULT_TOOL_NAME

        logger.debug(f"Metadata: Unit={unit_name}, Tool={tool_name}")
        return unit_name, tool_name

    def _determine_file_type(self, file_path: Path) -> str | None:
        """
        Determine file type based on filename pattern.

        Args:
            file_path: Path to the CSV file

        Returns:
            File type identifier ("_1_" or "_2_") or None if unknown
        """
        filename = file_path.name

        if self.FILE_TYPE_1 in filename:
            return self.FILE_TYPE_1
        elif self.FILE_TYPE_2 in filename:
            return self.FILE_TYPE_2
        else:
            logger.error(f"Unknown file type: {filename}")
            return None

    def _parse_datetime(self, timestamp_str: str) -> tuple[str, str]:
        """
        Parse datetime string and convert to database format.

        Converts from "DD-MM-YYYY HH:MM:SS" to ("YYYY-MM-DD", "HH:MM:SS")

        Args:
            timestamp_str: Timestamp string in format "DD-MM-YYYY HH:MM:SS"

        Returns:
            Tuple of (date, time) strings

        Examples:
            >>> _parse_datetime("11-10-2024 14:30:00")
            ("2024-10-11", "14:30:00")
        """
        parts = timestamp_str.split(" ")
        date_parts = parts[0].split("-")

        # Convert DD-MM-YYYY to YYYY-MM-DD
        date = f"{date_parts[2]}-{date_parts[1]}-{date_parts[0]}"
        time = parts[1]

        return date, time

    def _parse_csv_type_1(self, lines: list[str], unit_name: str, tool_name: str) -> tuple[list, list]:
        """
        Parse CSV file of type 1 (_1_).

        File Type 1 has 38 columns and maps to nodes 1-26.

        Args:
            lines: List of CSV lines
            unit_name: Unit name
            tool_name: Tool name

        Returns:
            Tuple of (raw_data_rows, elab_data_rows)
        """
        raw_data = []
        elab_data = []

        for line in lines:
            # Parse CSV row
            row = line.replace('"', "").split(";")

            # Extract timestamp
            date, time = self._parse_datetime(row[0])

            # Extract battery voltage (an4 = column 2)
            battery = row[2]

            # Extract channel values (E8_xxx_CHx)
            # Type 1 mapping: columns 4-35 map to channels
            ch_values = [
                row[35],  # E8_181_CH1 (node 1)
                row[4],  # E8_181_CH2 (node 2)
                row[5],  # E8_181_CH3 (node 3)
                row[6],  # E8_181_CH4 (node 4)
                row[7],  # E8_181_CH5 (node 5)
                row[8],  # E8_181_CH6 (node 6)
                row[9],  # E8_181_CH7 (node 7)
                row[10],  # E8_181_CH8 (node 8)
                row[11],  # E8_182_CH1 (node 9)
                row[12],  # E8_182_CH2 (node 10)
                row[13],  # E8_182_CH3 (node 11)
                row[14],  # E8_182_CH4 (node 12)
                row[15],  # E8_182_CH5 (node 13)
                row[16],  # E8_182_CH6 (node 14)
                row[17],  # E8_182_CH7 (node 15)
                row[18],  # E8_182_CH8 (node 16)
                row[19],  # E8_183_CH1 (node 17)
                row[20],  # E8_183_CH2 (node 18)
                row[21],  # E8_183_CH3 (node 19)
                row[22],  # E8_183_CH4 (node 20)
                row[23],  # E8_183_CH5 (node 21)
                row[24],  # E8_183_CH6 (node 22)
                row[25],  # E8_183_CH7 (node 23)
                row[26],  # E8_183_CH8 (node 24)
                row[27],  # E8_184_CH1 (node 25)
                row[28],  # E8_184_CH2 (node 26)
            ]

            # Create data rows for each channel
            for node_num, value in enumerate(ch_values, start=1):
                # Raw data (with battery info)
                raw_data.append((unit_name, tool_name, node_num, date, time, battery, self.DEFAULT_TEMPERATURE, value))

                # Elaborated data (just the load value)
                elab_data.append((unit_name, tool_name, node_num, date, time, value))

        logger.info(f"Parsed Type 1: {len(elab_data)} channel readings ({len(elab_data)//26} timestamps x 26 channels)")
        return raw_data, elab_data

    def _parse_csv_type_2(self, lines: list[str], unit_name: str, tool_name: str) -> tuple[list, list]:
        """
        Parse CSV file of type 2 (_2_).

        File Type 2 has 38 columns and maps to selective nodes (41-62).

        Args:
            lines: List of CSV lines
            unit_name: Unit name
            tool_name: Tool name

        Returns:
            Tuple of (raw_data_rows, elab_data_rows)
        """
        raw_data = []
        elab_data = []

        for line in lines:
            # Parse CSV row
            row = line.replace('"', "").split(";")

            # Extract timestamp
            date, time = self._parse_datetime(row[0])

            # Extract battery voltage (an4 = column 37)
            battery = row[37]

            # Extract channel values for Type 2
            # Type 2 mapping: specific columns to specific nodes
            channel_mapping = [
                (41, row[13]),  # E8_182_CH1
                (42, row[14]),  # E8_182_CH2
                (43, row[15]),  # E8_182_CH3
                (44, row[16]),  # E8_182_CH4
                (49, row[21]),  # E8_183_CH1
                (50, row[22]),  # E8_183_CH2
                (51, row[23]),  # E8_183_CH3
                (52, row[24]),  # E8_183_CH4
                (56, row[28]),  # E8_183_CH8
                (57, row[29]),  # E8_184_CH1
                (58, row[30]),  # E8_184_CH2
                (59, row[31]),  # E8_184_CH3
                (60, row[32]),  # E8_184_CH4
                (61, row[33]),  # E8_184_CH5
                (62, row[34]),  # E8_184_CH6
            ]

            # Create data rows for each channel
            for node_num, value in channel_mapping:
                # Raw data (with battery info)
                raw_data.append((unit_name, tool_name, node_num, date, time, battery, self.DEFAULT_TEMPERATURE, value))

                # Elaborated data (just the load value)
                elab_data.append((unit_name, tool_name, node_num, date, time, value))

        logger.info(f"Parsed Type 2: {len(elab_data)} channel readings ({len(elab_data)//15} timestamps x 15 channels)")
        return raw_data, elab_data

    async def _insert_data(self, raw_data: list, elab_data: list) -> tuple[int, int]:
        """
        Insert raw and elaborated data into the database.

        Args:
            raw_data: List of raw data tuples
            elab_data: List of elaborated data tuples

        Returns:
            Tuple of (raw_rows_inserted, elab_rows_inserted)
        """
        raw_query = """
            INSERT IGNORE INTO RAWDATACOR
            (UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
        """

        elab_query = """
            INSERT IGNORE INTO ELABDATADISP
            (UnitName, ToolNameID, NodeNum, EventDate, EventTime, load_value)
            VALUES (%s, %s, %s, %s, %s, %s)
        """

        # Insert elaborated data first
        elab_count = await execute_many(self.conn, elab_query, elab_data)
        logger.info(f"Inserted {elab_count} elaborated records")

        # Insert raw data
        raw_count = await execute_many(self.conn, raw_query, raw_data)
        logger.info(f"Inserted {raw_count} raw records")

        return raw_count, elab_count

    async def process_file(self, file_path: str | Path) -> bool:
        """
        Process a Sorotec CSV file and load data into the database.

        Args:
            file_path: Path to the CSV file to process

        Returns:
            True if processing was successful, False otherwise
        """
        file_path = Path(file_path)

        if not file_path.exists():
            logger.error(f"File not found: {file_path}")
            return False

        if file_path.suffix.lower() not in [".csv", ".txt"]:
            logger.error(f"Invalid file type: {file_path.suffix}")
            return False

        try:
            logger.info(f"Processing file: {file_path.name}")

            # Extract metadata
            unit_name, tool_name = self._extract_metadata(file_path)

            # Determine file type
            file_type = self._determine_file_type(file_path)
            if not file_type:
                return False

            logger.info(f"File type detected: {file_type}")

            # Read file
            with open(file_path, encoding="utf-8") as f:
                lines = [line.rstrip() for line in f.readlines()]

            # Remove empty lines and header rows
            lines = [line for line in lines if line]
            if len(lines) > 4:
                lines = lines[4:]  # Skip first 4 header lines

            if not lines:
                logger.warning(f"No data lines found in {file_path.name}")
                return False

            # Parse based on file type
            if file_type == self.FILE_TYPE_1:
                raw_data, elab_data = self._parse_csv_type_1(lines, unit_name, tool_name)
            else:  # FILE_TYPE_2
                raw_data, elab_data = self._parse_csv_type_2(lines, unit_name, tool_name)

            # Insert into database
            raw_count, elab_count = await self._insert_data(raw_data, elab_data)

            logger.info(f"Successfully processed {file_path.name}: {raw_count} raw, {elab_count} elab records")
            return True

        except Exception as e:
            logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
            return False


async def main(file_path: str):
    """
    Main entry point for the Sorotec loader.

    Args:
        file_path: Path to the CSV file to process
    """
    # Setup logging
    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    logger.info("Sorotec Loader started")
    logger.info(f"Processing file: {file_path}")

    try:
        # Load configuration
        db_config = DatabaseConfig()

        # Process file
        async with SorotecLoader(db_config) as loader:
            success = await loader.process_file(file_path)

        if success:
            logger.info("Processing completed successfully")
            return 0
        else:
            logger.error("Processing failed")
            return 1

    except Exception as e:
        logger.error(f"Unexpected error: {e}", exc_info=True)
        return 1

    finally:
        logger.info("Sorotec Loader finished")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python sorotec_loader.py <path_to_csv_file>")
        sys.exit(1)

    exit_code = asyncio.run(main(sys.argv[1]))
    sys.exit(exit_code)