""" Sorotec Pini data loader - Refactored version with async support. This script processes Sorotec Pini CSV files and loads multi-channel sensor data. Handles two different file formats (_1_ and _2_) with different channel mappings. Replaces the legacy sorotecPini.py with modern async/await patterns. """ import asyncio import logging import sys from pathlib import Path from refactory_scripts.config import DatabaseConfig from refactory_scripts.utils import execute_many, get_db_connection logger = logging.getLogger(__name__) class SorotecLoader: """Loads Sorotec Pini multi-channel sensor data from CSV files.""" # File type identifiers FILE_TYPE_1 = "_1_" FILE_TYPE_2 = "_2_" # Default values DEFAULT_TEMPERATURE = -273 DEFAULT_UNIT_NAME = "ID0247" DEFAULT_TOOL_NAME = "DT0001" # Channel mappings for File Type 1 (nodes 1-26) CHANNELS_TYPE_1 = list(range(1, 27)) # Nodes 1 to 26 # Channel mappings for File Type 2 (selective nodes) CHANNELS_TYPE_2 = [41, 42, 43, 44, 49, 50, 51, 52, 56, 57, 58, 59, 60, 61, 62] # 15 nodes def __init__(self, db_config: DatabaseConfig): """ Initialize the Sorotec loader. Args: db_config: Database configuration object """ self.db_config = db_config self.conn = None async def __aenter__(self): """Async context manager entry.""" self.conn = await get_db_connection(self.db_config.as_dict()) return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" if self.conn: self.conn.close() def _extract_metadata(self, file_path: Path) -> tuple[str, str]: """ Extract unit name and tool name from file path. For Sorotec, metadata is determined by folder name. Args: file_path: Path to the CSV file Returns: Tuple of (unit_name, tool_name) """ # Get folder name (second to last part of path) folder_name = file_path.parent.name # Currently hardcoded for ID0247 # TODO: Make this configurable if more units are added if folder_name == "ID0247": unit_name = self.DEFAULT_UNIT_NAME tool_name = self.DEFAULT_TOOL_NAME else: logger.warning(f"Unknown folder: {folder_name}, using defaults") unit_name = self.DEFAULT_UNIT_NAME tool_name = self.DEFAULT_TOOL_NAME logger.debug(f"Metadata: Unit={unit_name}, Tool={tool_name}") return unit_name, tool_name def _determine_file_type(self, file_path: Path) -> str | None: """ Determine file type based on filename pattern. Args: file_path: Path to the CSV file Returns: File type identifier ("_1_" or "_2_") or None if unknown """ filename = file_path.name if self.FILE_TYPE_1 in filename: return self.FILE_TYPE_1 elif self.FILE_TYPE_2 in filename: return self.FILE_TYPE_2 else: logger.error(f"Unknown file type: {filename}") return None def _parse_datetime(self, timestamp_str: str) -> tuple[str, str]: """ Parse datetime string and convert to database format. Converts from "DD-MM-YYYY HH:MM:SS" to ("YYYY-MM-DD", "HH:MM:SS") Args: timestamp_str: Timestamp string in format "DD-MM-YYYY HH:MM:SS" Returns: Tuple of (date, time) strings Examples: >>> _parse_datetime("11-10-2024 14:30:00") ("2024-10-11", "14:30:00") """ parts = timestamp_str.split(" ") date_parts = parts[0].split("-") # Convert DD-MM-YYYY to YYYY-MM-DD date = f"{date_parts[2]}-{date_parts[1]}-{date_parts[0]}" time = parts[1] return date, time def _parse_csv_type_1(self, lines: list[str], unit_name: str, tool_name: str) -> tuple[list, list]: """ Parse CSV file of type 1 (_1_). File Type 1 has 38 columns and maps to nodes 1-26. Args: lines: List of CSV lines unit_name: Unit name tool_name: Tool name Returns: Tuple of (raw_data_rows, elab_data_rows) """ raw_data = [] elab_data = [] for line in lines: # Parse CSV row row = line.replace('"', "").split(";") # Extract timestamp date, time = self._parse_datetime(row[0]) # Extract battery voltage (an4 = column 2) battery = row[2] # Extract channel values (E8_xxx_CHx) # Type 1 mapping: columns 4-35 map to channels ch_values = [ row[35], # E8_181_CH1 (node 1) row[4], # E8_181_CH2 (node 2) row[5], # E8_181_CH3 (node 3) row[6], # E8_181_CH4 (node 4) row[7], # E8_181_CH5 (node 5) row[8], # E8_181_CH6 (node 6) row[9], # E8_181_CH7 (node 7) row[10], # E8_181_CH8 (node 8) row[11], # E8_182_CH1 (node 9) row[12], # E8_182_CH2 (node 10) row[13], # E8_182_CH3 (node 11) row[14], # E8_182_CH4 (node 12) row[15], # E8_182_CH5 (node 13) row[16], # E8_182_CH6 (node 14) row[17], # E8_182_CH7 (node 15) row[18], # E8_182_CH8 (node 16) row[19], # E8_183_CH1 (node 17) row[20], # E8_183_CH2 (node 18) row[21], # E8_183_CH3 (node 19) row[22], # E8_183_CH4 (node 20) row[23], # E8_183_CH5 (node 21) row[24], # E8_183_CH6 (node 22) row[25], # E8_183_CH7 (node 23) row[26], # E8_183_CH8 (node 24) row[27], # E8_184_CH1 (node 25) row[28], # E8_184_CH2 (node 26) ] # Create data rows for each channel for node_num, value in enumerate(ch_values, start=1): # Raw data (with battery info) raw_data.append((unit_name, tool_name, node_num, date, time, battery, self.DEFAULT_TEMPERATURE, value)) # Elaborated data (just the load value) elab_data.append((unit_name, tool_name, node_num, date, time, value)) logger.info(f"Parsed Type 1: {len(elab_data)} channel readings ({len(elab_data)//26} timestamps x 26 channels)") return raw_data, elab_data def _parse_csv_type_2(self, lines: list[str], unit_name: str, tool_name: str) -> tuple[list, list]: """ Parse CSV file of type 2 (_2_). File Type 2 has 38 columns and maps to selective nodes (41-62). Args: lines: List of CSV lines unit_name: Unit name tool_name: Tool name Returns: Tuple of (raw_data_rows, elab_data_rows) """ raw_data = [] elab_data = [] for line in lines: # Parse CSV row row = line.replace('"', "").split(";") # Extract timestamp date, time = self._parse_datetime(row[0]) # Extract battery voltage (an4 = column 37) battery = row[37] # Extract channel values for Type 2 # Type 2 mapping: specific columns to specific nodes channel_mapping = [ (41, row[13]), # E8_182_CH1 (42, row[14]), # E8_182_CH2 (43, row[15]), # E8_182_CH3 (44, row[16]), # E8_182_CH4 (49, row[21]), # E8_183_CH1 (50, row[22]), # E8_183_CH2 (51, row[23]), # E8_183_CH3 (52, row[24]), # E8_183_CH4 (56, row[28]), # E8_183_CH8 (57, row[29]), # E8_184_CH1 (58, row[30]), # E8_184_CH2 (59, row[31]), # E8_184_CH3 (60, row[32]), # E8_184_CH4 (61, row[33]), # E8_184_CH5 (62, row[34]), # E8_184_CH6 ] # Create data rows for each channel for node_num, value in channel_mapping: # Raw data (with battery info) raw_data.append((unit_name, tool_name, node_num, date, time, battery, self.DEFAULT_TEMPERATURE, value)) # Elaborated data (just the load value) elab_data.append((unit_name, tool_name, node_num, date, time, value)) logger.info(f"Parsed Type 2: {len(elab_data)} channel readings ({len(elab_data)//15} timestamps x 15 channels)") return raw_data, elab_data async def _insert_data(self, raw_data: list, elab_data: list) -> tuple[int, int]: """ Insert raw and elaborated data into the database. Args: raw_data: List of raw data tuples elab_data: List of elaborated data tuples Returns: Tuple of (raw_rows_inserted, elab_rows_inserted) """ raw_query = """ INSERT IGNORE INTO RAWDATACOR (UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0) VALUES (%s, %s, %s, %s, %s, %s, %s, %s) """ elab_query = """ INSERT IGNORE INTO ELABDATADISP (UnitName, ToolNameID, NodeNum, EventDate, EventTime, load_value) VALUES (%s, %s, %s, %s, %s, %s) """ # Insert elaborated data first elab_count = await execute_many(self.conn, elab_query, elab_data) logger.info(f"Inserted {elab_count} elaborated records") # Insert raw data raw_count = await execute_many(self.conn, raw_query, raw_data) logger.info(f"Inserted {raw_count} raw records") return raw_count, elab_count async def process_file(self, file_path: str | Path) -> bool: """ Process a Sorotec CSV file and load data into the database. Args: file_path: Path to the CSV file to process Returns: True if processing was successful, False otherwise """ file_path = Path(file_path) if not file_path.exists(): logger.error(f"File not found: {file_path}") return False if file_path.suffix.lower() not in [".csv", ".txt"]: logger.error(f"Invalid file type: {file_path.suffix}") return False try: logger.info(f"Processing file: {file_path.name}") # Extract metadata unit_name, tool_name = self._extract_metadata(file_path) # Determine file type file_type = self._determine_file_type(file_path) if not file_type: return False logger.info(f"File type detected: {file_type}") # Read file with open(file_path, encoding="utf-8") as f: lines = [line.rstrip() for line in f.readlines()] # Remove empty lines and header rows lines = [line for line in lines if line] if len(lines) > 4: lines = lines[4:] # Skip first 4 header lines if not lines: logger.warning(f"No data lines found in {file_path.name}") return False # Parse based on file type if file_type == self.FILE_TYPE_1: raw_data, elab_data = self._parse_csv_type_1(lines, unit_name, tool_name) else: # FILE_TYPE_2 raw_data, elab_data = self._parse_csv_type_2(lines, unit_name, tool_name) # Insert into database raw_count, elab_count = await self._insert_data(raw_data, elab_data) logger.info(f"Successfully processed {file_path.name}: {raw_count} raw, {elab_count} elab records") return True except Exception as e: logger.error(f"Failed to process file {file_path}: {e}", exc_info=True) return False async def main(file_path: str): """ Main entry point for the Sorotec loader. Args: file_path: Path to the CSV file to process """ # Setup logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger.info("Sorotec Loader started") logger.info(f"Processing file: {file_path}") try: # Load configuration db_config = DatabaseConfig() # Process file async with SorotecLoader(db_config) as loader: success = await loader.process_file(file_path) if success: logger.info("Processing completed successfully") return 0 else: logger.error("Processing failed") return 1 except Exception as e: logger.error(f"Unexpected error: {e}", exc_info=True) return 1 finally: logger.info("Sorotec Loader finished") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python sorotec_loader.py ") sys.exit(1) exit_code = asyncio.run(main(sys.argv[1])) sys.exit(exit_code)