Files
ASE/src/refactory_scripts/loaders/hirpinia_loader.py
2025-10-11 22:31:54 +02:00

265 lines
8.4 KiB
Python

"""
Hirpinia data loader - Refactored version with async support.
This script processes Hirpinia ODS files and loads data into the database.
Replaces the legacy hirpiniaLoadScript.py with modern async/await patterns.
"""
import asyncio
import logging
import sys
from datetime import datetime
from pathlib import Path
import ezodf
from refactory_scripts.config import DatabaseConfig
from refactory_scripts.utils import execute_many, execute_query, get_db_connection
logger = logging.getLogger(__name__)
class HirpiniaLoader:
"""Loads Hirpinia sensor data from ODS files into the database."""
def __init__(self, db_config: DatabaseConfig):
"""
Initialize the Hirpinia loader.
Args:
db_config: Database configuration object
"""
self.db_config = db_config
self.conn = None
async def __aenter__(self):
"""Async context manager entry."""
self.conn = await get_db_connection(self.db_config.as_dict())
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit."""
if self.conn:
self.conn.close()
def _extract_metadata(self, file_path: Path) -> tuple[str, str]:
"""
Extract unit name and tool name from file path.
Args:
file_path: Path to the ODS file
Returns:
Tuple of (unit_name, tool_name)
"""
folder_path = file_path.parent
unit_name = folder_path.name
file_name = file_path.stem # Filename without extension
tool_name = file_name.replace("HIRPINIA_", "")
tool_name = tool_name.split("_")[0]
logger.debug(f"Extracted metadata - Unit: {unit_name}, Tool: {tool_name}")
return unit_name, tool_name
def _parse_ods_file(self, file_path: Path, unit_name: str, tool_name: str) -> list[tuple]:
"""
Parse ODS file and extract raw data.
Args:
file_path: Path to the ODS file
unit_name: Unit name
tool_name: Tool name
Returns:
List of tuples ready for database insertion
"""
data_rows = []
doc = ezodf.opendoc(str(file_path))
for sheet in doc.sheets:
node_num = sheet.name.replace("S-", "")
logger.debug(f"Processing sheet: {sheet.name} (Node: {node_num})")
rows_to_skip = 2 # Skip header rows
for i, row in enumerate(sheet.rows()):
if i < rows_to_skip:
continue
row_data = [cell.value for cell in row]
# Parse datetime
try:
dt = datetime.strptime(row_data[0], "%Y-%m-%dT%H:%M:%S")
date = dt.strftime("%Y-%m-%d")
time = dt.strftime("%H:%M:%S")
except (ValueError, TypeError) as e:
logger.warning(f"Failed to parse datetime in row {i}: {row_data[0]} - {e}")
continue
# Extract values
val0 = row_data[2] if len(row_data) > 2 else None
val1 = row_data[4] if len(row_data) > 4 else None
val2 = row_data[6] if len(row_data) > 6 else None
val3 = row_data[8] if len(row_data) > 8 else None
# Create tuple for database insertion
data_rows.append((unit_name, tool_name, node_num, date, time, -1, -273, val0, val1, val2, val3))
logger.info(f"Parsed {len(data_rows)} data rows from {file_path.name}")
return data_rows
async def _insert_raw_data(self, data_rows: list[tuple]) -> int:
"""
Insert raw data into the database.
Args:
data_rows: List of data tuples
Returns:
Number of rows inserted
"""
if not data_rows:
logger.warning("No data rows to insert")
return 0
query = """
INSERT IGNORE INTO RAWDATACOR
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0, Val1, Val2, Val3)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
rows_affected = await execute_many(self.conn, query, data_rows)
logger.info(f"Inserted {rows_affected} rows into RAWDATACOR")
return rows_affected
async def _get_matlab_function(self, unit_name: str, tool_name: str) -> str | None:
"""
Get the MATLAB function name for this unit/tool combination.
Args:
unit_name: Unit name
tool_name: Tool name
Returns:
MATLAB function name or None if not found
"""
query = """
SELECT m.matcall
FROM tools AS t
JOIN units AS u ON u.id = t.unit_id
JOIN matfuncs AS m ON m.id = t.matfunc
WHERE u.name = %s AND t.name = %s
"""
result = await execute_query(self.conn, query, (unit_name, tool_name), fetch_one=True)
if result and result.get("matcall"):
matlab_func = result["matcall"]
logger.info(f"MATLAB function found: {matlab_func}")
return matlab_func
logger.warning(f"No MATLAB function found for {unit_name}/{tool_name}")
return None
async def process_file(self, file_path: str | Path, trigger_matlab: bool = True) -> bool:
"""
Process a Hirpinia ODS file and load data into the database.
Args:
file_path: Path to the ODS file to process
trigger_matlab: Whether to trigger MATLAB elaboration after loading
Returns:
True if processing was successful, False otherwise
"""
file_path = Path(file_path)
if not file_path.exists():
logger.error(f"File not found: {file_path}")
return False
if file_path.suffix.lower() not in [".ods"]:
logger.error(f"Invalid file type: {file_path.suffix}. Expected .ods")
return False
try:
# Extract metadata
unit_name, tool_name = self._extract_metadata(file_path)
# Parse ODS file
data_rows = self._parse_ods_file(file_path, unit_name, tool_name)
# Insert data
rows_inserted = await self._insert_raw_data(data_rows)
if rows_inserted > 0:
logger.info(f"Successfully loaded {rows_inserted} rows from {file_path.name}")
# Optionally trigger MATLAB elaboration
if trigger_matlab:
matlab_func = await self._get_matlab_function(unit_name, tool_name)
if matlab_func:
logger.warning(
f"MATLAB elaboration would be triggered: {matlab_func} for {unit_name}/{tool_name}"
)
logger.warning("Note: Direct MATLAB execution not implemented in refactored version")
# In production, this should integrate with elab_orchestrator instead
# of calling MATLAB directly via os.system()
return True
else:
logger.warning(f"No new rows inserted from {file_path.name}")
return False
except Exception as e:
logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
return False
async def main(file_path: str):
"""
Main entry point for the Hirpinia loader.
Args:
file_path: Path to the ODS file to process
"""
# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger.info("Hirpinia Loader started")
logger.info(f"Processing file: {file_path}")
try:
# Load configuration
db_config = DatabaseConfig()
# Process file
async with HirpiniaLoader(db_config) as loader:
success = await loader.process_file(file_path)
if success:
logger.info("Processing completed successfully")
return 0
else:
logger.error("Processing failed")
return 1
except Exception as e:
logger.error(f"Unexpected error: {e}", exc_info=True)
return 1
finally:
logger.info("Hirpinia Loader finished")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python hirpinia_loader.py <path_to_ods_file>")
sys.exit(1)
exit_code = asyncio.run(main(sys.argv[1]))
sys.exit(exit_code)