265 lines
8.4 KiB
Python
265 lines
8.4 KiB
Python
"""
|
|
Hirpinia data loader - Refactored version with async support.
|
|
|
|
This script processes Hirpinia ODS files and loads data into the database.
|
|
Replaces the legacy hirpiniaLoadScript.py with modern async/await patterns.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import ezodf
|
|
|
|
from refactory_scripts.config import DatabaseConfig
|
|
from refactory_scripts.utils import execute_many, execute_query, get_db_connection
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class HirpiniaLoader:
|
|
"""Loads Hirpinia sensor data from ODS files into the database."""
|
|
|
|
def __init__(self, db_config: DatabaseConfig):
|
|
"""
|
|
Initialize the Hirpinia loader.
|
|
|
|
Args:
|
|
db_config: Database configuration object
|
|
"""
|
|
self.db_config = db_config
|
|
self.conn = None
|
|
|
|
async def __aenter__(self):
|
|
"""Async context manager entry."""
|
|
self.conn = await get_db_connection(self.db_config.as_dict())
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
"""Async context manager exit."""
|
|
if self.conn:
|
|
self.conn.close()
|
|
|
|
def _extract_metadata(self, file_path: Path) -> tuple[str, str]:
|
|
"""
|
|
Extract unit name and tool name from file path.
|
|
|
|
Args:
|
|
file_path: Path to the ODS file
|
|
|
|
Returns:
|
|
Tuple of (unit_name, tool_name)
|
|
"""
|
|
folder_path = file_path.parent
|
|
unit_name = folder_path.name
|
|
|
|
file_name = file_path.stem # Filename without extension
|
|
tool_name = file_name.replace("HIRPINIA_", "")
|
|
tool_name = tool_name.split("_")[0]
|
|
|
|
logger.debug(f"Extracted metadata - Unit: {unit_name}, Tool: {tool_name}")
|
|
return unit_name, tool_name
|
|
|
|
def _parse_ods_file(self, file_path: Path, unit_name: str, tool_name: str) -> list[tuple]:
|
|
"""
|
|
Parse ODS file and extract raw data.
|
|
|
|
Args:
|
|
file_path: Path to the ODS file
|
|
unit_name: Unit name
|
|
tool_name: Tool name
|
|
|
|
Returns:
|
|
List of tuples ready for database insertion
|
|
"""
|
|
data_rows = []
|
|
doc = ezodf.opendoc(str(file_path))
|
|
|
|
for sheet in doc.sheets:
|
|
node_num = sheet.name.replace("S-", "")
|
|
logger.debug(f"Processing sheet: {sheet.name} (Node: {node_num})")
|
|
|
|
rows_to_skip = 2 # Skip header rows
|
|
|
|
for i, row in enumerate(sheet.rows()):
|
|
if i < rows_to_skip:
|
|
continue
|
|
|
|
row_data = [cell.value for cell in row]
|
|
|
|
# Parse datetime
|
|
try:
|
|
dt = datetime.strptime(row_data[0], "%Y-%m-%dT%H:%M:%S")
|
|
date = dt.strftime("%Y-%m-%d")
|
|
time = dt.strftime("%H:%M:%S")
|
|
except (ValueError, TypeError) as e:
|
|
logger.warning(f"Failed to parse datetime in row {i}: {row_data[0]} - {e}")
|
|
continue
|
|
|
|
# Extract values
|
|
val0 = row_data[2] if len(row_data) > 2 else None
|
|
val1 = row_data[4] if len(row_data) > 4 else None
|
|
val2 = row_data[6] if len(row_data) > 6 else None
|
|
val3 = row_data[8] if len(row_data) > 8 else None
|
|
|
|
# Create tuple for database insertion
|
|
data_rows.append((unit_name, tool_name, node_num, date, time, -1, -273, val0, val1, val2, val3))
|
|
|
|
logger.info(f"Parsed {len(data_rows)} data rows from {file_path.name}")
|
|
return data_rows
|
|
|
|
async def _insert_raw_data(self, data_rows: list[tuple]) -> int:
|
|
"""
|
|
Insert raw data into the database.
|
|
|
|
Args:
|
|
data_rows: List of data tuples
|
|
|
|
Returns:
|
|
Number of rows inserted
|
|
"""
|
|
if not data_rows:
|
|
logger.warning("No data rows to insert")
|
|
return 0
|
|
|
|
query = """
|
|
INSERT IGNORE INTO RAWDATACOR
|
|
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0, Val1, Val2, Val3)
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
"""
|
|
|
|
rows_affected = await execute_many(self.conn, query, data_rows)
|
|
logger.info(f"Inserted {rows_affected} rows into RAWDATACOR")
|
|
|
|
return rows_affected
|
|
|
|
async def _get_matlab_function(self, unit_name: str, tool_name: str) -> str | None:
|
|
"""
|
|
Get the MATLAB function name for this unit/tool combination.
|
|
|
|
Args:
|
|
unit_name: Unit name
|
|
tool_name: Tool name
|
|
|
|
Returns:
|
|
MATLAB function name or None if not found
|
|
"""
|
|
query = """
|
|
SELECT m.matcall
|
|
FROM tools AS t
|
|
JOIN units AS u ON u.id = t.unit_id
|
|
JOIN matfuncs AS m ON m.id = t.matfunc
|
|
WHERE u.name = %s AND t.name = %s
|
|
"""
|
|
|
|
result = await execute_query(self.conn, query, (unit_name, tool_name), fetch_one=True)
|
|
|
|
if result and result.get("matcall"):
|
|
matlab_func = result["matcall"]
|
|
logger.info(f"MATLAB function found: {matlab_func}")
|
|
return matlab_func
|
|
|
|
logger.warning(f"No MATLAB function found for {unit_name}/{tool_name}")
|
|
return None
|
|
|
|
async def process_file(self, file_path: str | Path, trigger_matlab: bool = True) -> bool:
|
|
"""
|
|
Process a Hirpinia ODS file and load data into the database.
|
|
|
|
Args:
|
|
file_path: Path to the ODS file to process
|
|
trigger_matlab: Whether to trigger MATLAB elaboration after loading
|
|
|
|
Returns:
|
|
True if processing was successful, False otherwise
|
|
"""
|
|
file_path = Path(file_path)
|
|
|
|
if not file_path.exists():
|
|
logger.error(f"File not found: {file_path}")
|
|
return False
|
|
|
|
if file_path.suffix.lower() not in [".ods"]:
|
|
logger.error(f"Invalid file type: {file_path.suffix}. Expected .ods")
|
|
return False
|
|
|
|
try:
|
|
# Extract metadata
|
|
unit_name, tool_name = self._extract_metadata(file_path)
|
|
|
|
# Parse ODS file
|
|
data_rows = self._parse_ods_file(file_path, unit_name, tool_name)
|
|
|
|
# Insert data
|
|
rows_inserted = await self._insert_raw_data(data_rows)
|
|
|
|
if rows_inserted > 0:
|
|
logger.info(f"Successfully loaded {rows_inserted} rows from {file_path.name}")
|
|
|
|
# Optionally trigger MATLAB elaboration
|
|
if trigger_matlab:
|
|
matlab_func = await self._get_matlab_function(unit_name, tool_name)
|
|
if matlab_func:
|
|
logger.warning(
|
|
f"MATLAB elaboration would be triggered: {matlab_func} for {unit_name}/{tool_name}"
|
|
)
|
|
logger.warning("Note: Direct MATLAB execution not implemented in refactored version")
|
|
# In production, this should integrate with elab_orchestrator instead
|
|
# of calling MATLAB directly via os.system()
|
|
|
|
return True
|
|
else:
|
|
logger.warning(f"No new rows inserted from {file_path.name}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
|
|
return False
|
|
|
|
|
|
async def main(file_path: str):
|
|
"""
|
|
Main entry point for the Hirpinia loader.
|
|
|
|
Args:
|
|
file_path: Path to the ODS file to process
|
|
"""
|
|
# Setup logging
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
|
|
logger.info("Hirpinia Loader started")
|
|
logger.info(f"Processing file: {file_path}")
|
|
|
|
try:
|
|
# Load configuration
|
|
db_config = DatabaseConfig()
|
|
|
|
# Process file
|
|
async with HirpiniaLoader(db_config) as loader:
|
|
success = await loader.process_file(file_path)
|
|
|
|
if success:
|
|
logger.info("Processing completed successfully")
|
|
return 0
|
|
else:
|
|
logger.error("Processing failed")
|
|
return 1
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error: {e}", exc_info=True)
|
|
return 1
|
|
|
|
finally:
|
|
logger.info("Hirpinia Loader finished")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python hirpinia_loader.py <path_to_ods_file>")
|
|
sys.exit(1)
|
|
|
|
exit_code = asyncio.run(main(sys.argv[1]))
|
|
sys.exit(exit_code)
|