""" Hirpinia data loader - Refactored version with async support. This script processes Hirpinia ODS files and loads data into the database. Replaces the legacy hirpiniaLoadScript.py with modern async/await patterns. """ import asyncio import logging import sys from datetime import datetime from pathlib import Path import ezodf from refactory_scripts.config import DatabaseConfig from refactory_scripts.utils import execute_many, execute_query, get_db_connection logger = logging.getLogger(__name__) class HirpiniaLoader: """Loads Hirpinia sensor data from ODS files into the database.""" def __init__(self, db_config: DatabaseConfig): """ Initialize the Hirpinia loader. Args: db_config: Database configuration object """ self.db_config = db_config self.conn = None async def __aenter__(self): """Async context manager entry.""" self.conn = await get_db_connection(self.db_config.as_dict()) return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" if self.conn: self.conn.close() def _extract_metadata(self, file_path: Path) -> tuple[str, str]: """ Extract unit name and tool name from file path. Args: file_path: Path to the ODS file Returns: Tuple of (unit_name, tool_name) """ folder_path = file_path.parent unit_name = folder_path.name file_name = file_path.stem # Filename without extension tool_name = file_name.replace("HIRPINIA_", "") tool_name = tool_name.split("_")[0] logger.debug(f"Extracted metadata - Unit: {unit_name}, Tool: {tool_name}") return unit_name, tool_name def _parse_ods_file(self, file_path: Path, unit_name: str, tool_name: str) -> list[tuple]: """ Parse ODS file and extract raw data. Args: file_path: Path to the ODS file unit_name: Unit name tool_name: Tool name Returns: List of tuples ready for database insertion """ data_rows = [] doc = ezodf.opendoc(str(file_path)) for sheet in doc.sheets: node_num = sheet.name.replace("S-", "") logger.debug(f"Processing sheet: {sheet.name} (Node: {node_num})") rows_to_skip = 2 # Skip header rows for i, row in enumerate(sheet.rows()): if i < rows_to_skip: continue row_data = [cell.value for cell in row] # Parse datetime try: dt = datetime.strptime(row_data[0], "%Y-%m-%dT%H:%M:%S") date = dt.strftime("%Y-%m-%d") time = dt.strftime("%H:%M:%S") except (ValueError, TypeError) as e: logger.warning(f"Failed to parse datetime in row {i}: {row_data[0]} - {e}") continue # Extract values val0 = row_data[2] if len(row_data) > 2 else None val1 = row_data[4] if len(row_data) > 4 else None val2 = row_data[6] if len(row_data) > 6 else None val3 = row_data[8] if len(row_data) > 8 else None # Create tuple for database insertion data_rows.append((unit_name, tool_name, node_num, date, time, -1, -273, val0, val1, val2, val3)) logger.info(f"Parsed {len(data_rows)} data rows from {file_path.name}") return data_rows async def _insert_raw_data(self, data_rows: list[tuple]) -> int: """ Insert raw data into the database. Args: data_rows: List of data tuples Returns: Number of rows inserted """ if not data_rows: logger.warning("No data rows to insert") return 0 query = """ INSERT IGNORE INTO RAWDATACOR (UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0, Val1, Val2, Val3) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """ rows_affected = await execute_many(self.conn, query, data_rows) logger.info(f"Inserted {rows_affected} rows into RAWDATACOR") return rows_affected async def _get_matlab_function(self, unit_name: str, tool_name: str) -> str | None: """ Get the MATLAB function name for this unit/tool combination. Args: unit_name: Unit name tool_name: Tool name Returns: MATLAB function name or None if not found """ query = """ SELECT m.matcall FROM tools AS t JOIN units AS u ON u.id = t.unit_id JOIN matfuncs AS m ON m.id = t.matfunc WHERE u.name = %s AND t.name = %s """ result = await execute_query(self.conn, query, (unit_name, tool_name), fetch_one=True) if result and result.get("matcall"): matlab_func = result["matcall"] logger.info(f"MATLAB function found: {matlab_func}") return matlab_func logger.warning(f"No MATLAB function found for {unit_name}/{tool_name}") return None async def process_file(self, file_path: str | Path, trigger_matlab: bool = True) -> bool: """ Process a Hirpinia ODS file and load data into the database. Args: file_path: Path to the ODS file to process trigger_matlab: Whether to trigger MATLAB elaboration after loading Returns: True if processing was successful, False otherwise """ file_path = Path(file_path) if not file_path.exists(): logger.error(f"File not found: {file_path}") return False if file_path.suffix.lower() not in [".ods"]: logger.error(f"Invalid file type: {file_path.suffix}. Expected .ods") return False try: # Extract metadata unit_name, tool_name = self._extract_metadata(file_path) # Parse ODS file data_rows = self._parse_ods_file(file_path, unit_name, tool_name) # Insert data rows_inserted = await self._insert_raw_data(data_rows) if rows_inserted > 0: logger.info(f"Successfully loaded {rows_inserted} rows from {file_path.name}") # Optionally trigger MATLAB elaboration if trigger_matlab: matlab_func = await self._get_matlab_function(unit_name, tool_name) if matlab_func: logger.warning( f"MATLAB elaboration would be triggered: {matlab_func} for {unit_name}/{tool_name}" ) logger.warning("Note: Direct MATLAB execution not implemented in refactored version") # In production, this should integrate with elab_orchestrator instead # of calling MATLAB directly via os.system() return True else: logger.warning(f"No new rows inserted from {file_path.name}") return False except Exception as e: logger.error(f"Failed to process file {file_path}: {e}", exc_info=True) return False async def main(file_path: str): """ Main entry point for the Hirpinia loader. Args: file_path: Path to the ODS file to process """ # Setup logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger.info("Hirpinia Loader started") logger.info(f"Processing file: {file_path}") try: # Load configuration db_config = DatabaseConfig() # Process file async with HirpiniaLoader(db_config) as loader: success = await loader.process_file(file_path) if success: logger.info("Processing completed successfully") return 0 else: logger.error("Processing failed") return 1 except Exception as e: logger.error(f"Unexpected error: {e}", exc_info=True) return 1 finally: logger.info("Hirpinia Loader finished") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python hirpinia_loader.py ") sys.exit(1) exit_code = asyncio.run(main(sys.argv[1])) sys.exit(exit_code)