refactory old scripts
This commit is contained in:
7
src/refactory_scripts/loaders/__init__.py
Normal file
7
src/refactory_scripts/loaders/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""Data loaders for various sensor types."""
|
||||
|
||||
from refactory_scripts.loaders.hirpinia_loader import HirpiniaLoader
|
||||
from refactory_scripts.loaders.sisgeo_loader import SisgeoLoader
|
||||
from refactory_scripts.loaders.vulink_loader import VulinkLoader
|
||||
|
||||
__all__ = ["HirpiniaLoader", "SisgeoLoader", "VulinkLoader"]
|
||||
264
src/refactory_scripts/loaders/hirpinia_loader.py
Normal file
264
src/refactory_scripts/loaders/hirpinia_loader.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Hirpinia data loader - Refactored version with async support.
|
||||
|
||||
This script processes Hirpinia ODS files and loads data into the database.
|
||||
Replaces the legacy hirpiniaLoadScript.py with modern async/await patterns.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import ezodf
|
||||
|
||||
from refactory_scripts.config import DatabaseConfig
|
||||
from refactory_scripts.utils import execute_many, execute_query, get_db_connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HirpiniaLoader:
|
||||
"""Loads Hirpinia sensor data from ODS files into the database."""
|
||||
|
||||
def __init__(self, db_config: DatabaseConfig):
|
||||
"""
|
||||
Initialize the Hirpinia loader.
|
||||
|
||||
Args:
|
||||
db_config: Database configuration object
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.conn = None
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
self.conn = await get_db_connection(self.db_config.as_dict())
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
def _extract_metadata(self, file_path: Path) -> tuple[str, str]:
|
||||
"""
|
||||
Extract unit name and tool name from file path.
|
||||
|
||||
Args:
|
||||
file_path: Path to the ODS file
|
||||
|
||||
Returns:
|
||||
Tuple of (unit_name, tool_name)
|
||||
"""
|
||||
folder_path = file_path.parent
|
||||
unit_name = folder_path.name
|
||||
|
||||
file_name = file_path.stem # Filename without extension
|
||||
tool_name = file_name.replace("HIRPINIA_", "")
|
||||
tool_name = tool_name.split("_")[0]
|
||||
|
||||
logger.debug(f"Extracted metadata - Unit: {unit_name}, Tool: {tool_name}")
|
||||
return unit_name, tool_name
|
||||
|
||||
def _parse_ods_file(self, file_path: Path, unit_name: str, tool_name: str) -> list[tuple]:
|
||||
"""
|
||||
Parse ODS file and extract raw data.
|
||||
|
||||
Args:
|
||||
file_path: Path to the ODS file
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
|
||||
Returns:
|
||||
List of tuples ready for database insertion
|
||||
"""
|
||||
data_rows = []
|
||||
doc = ezodf.opendoc(str(file_path))
|
||||
|
||||
for sheet in doc.sheets:
|
||||
node_num = sheet.name.replace("S-", "")
|
||||
logger.debug(f"Processing sheet: {sheet.name} (Node: {node_num})")
|
||||
|
||||
rows_to_skip = 2 # Skip header rows
|
||||
|
||||
for i, row in enumerate(sheet.rows()):
|
||||
if i < rows_to_skip:
|
||||
continue
|
||||
|
||||
row_data = [cell.value for cell in row]
|
||||
|
||||
# Parse datetime
|
||||
try:
|
||||
dt = datetime.strptime(row_data[0], "%Y-%m-%dT%H:%M:%S")
|
||||
date = dt.strftime("%Y-%m-%d")
|
||||
time = dt.strftime("%H:%M:%S")
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Failed to parse datetime in row {i}: {row_data[0]} - {e}")
|
||||
continue
|
||||
|
||||
# Extract values
|
||||
val0 = row_data[2] if len(row_data) > 2 else None
|
||||
val1 = row_data[4] if len(row_data) > 4 else None
|
||||
val2 = row_data[6] if len(row_data) > 6 else None
|
||||
val3 = row_data[8] if len(row_data) > 8 else None
|
||||
|
||||
# Create tuple for database insertion
|
||||
data_rows.append((unit_name, tool_name, node_num, date, time, -1, -273, val0, val1, val2, val3))
|
||||
|
||||
logger.info(f"Parsed {len(data_rows)} data rows from {file_path.name}")
|
||||
return data_rows
|
||||
|
||||
async def _insert_raw_data(self, data_rows: list[tuple]) -> int:
|
||||
"""
|
||||
Insert raw data into the database.
|
||||
|
||||
Args:
|
||||
data_rows: List of data tuples
|
||||
|
||||
Returns:
|
||||
Number of rows inserted
|
||||
"""
|
||||
if not data_rows:
|
||||
logger.warning("No data rows to insert")
|
||||
return 0
|
||||
|
||||
query = """
|
||||
INSERT IGNORE INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, Val0, Val1, Val2, Val3)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
rows_affected = await execute_many(self.conn, query, data_rows)
|
||||
logger.info(f"Inserted {rows_affected} rows into RAWDATACOR")
|
||||
|
||||
return rows_affected
|
||||
|
||||
async def _get_matlab_function(self, unit_name: str, tool_name: str) -> str | None:
|
||||
"""
|
||||
Get the MATLAB function name for this unit/tool combination.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
|
||||
Returns:
|
||||
MATLAB function name or None if not found
|
||||
"""
|
||||
query = """
|
||||
SELECT m.matcall
|
||||
FROM tools AS t
|
||||
JOIN units AS u ON u.id = t.unit_id
|
||||
JOIN matfuncs AS m ON m.id = t.matfunc
|
||||
WHERE u.name = %s AND t.name = %s
|
||||
"""
|
||||
|
||||
result = await execute_query(self.conn, query, (unit_name, tool_name), fetch_one=True)
|
||||
|
||||
if result and result.get("matcall"):
|
||||
matlab_func = result["matcall"]
|
||||
logger.info(f"MATLAB function found: {matlab_func}")
|
||||
return matlab_func
|
||||
|
||||
logger.warning(f"No MATLAB function found for {unit_name}/{tool_name}")
|
||||
return None
|
||||
|
||||
async def process_file(self, file_path: str | Path, trigger_matlab: bool = True) -> bool:
|
||||
"""
|
||||
Process a Hirpinia ODS file and load data into the database.
|
||||
|
||||
Args:
|
||||
file_path: Path to the ODS file to process
|
||||
trigger_matlab: Whether to trigger MATLAB elaboration after loading
|
||||
|
||||
Returns:
|
||||
True if processing was successful, False otherwise
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
|
||||
if not file_path.exists():
|
||||
logger.error(f"File not found: {file_path}")
|
||||
return False
|
||||
|
||||
if file_path.suffix.lower() not in [".ods"]:
|
||||
logger.error(f"Invalid file type: {file_path.suffix}. Expected .ods")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Extract metadata
|
||||
unit_name, tool_name = self._extract_metadata(file_path)
|
||||
|
||||
# Parse ODS file
|
||||
data_rows = self._parse_ods_file(file_path, unit_name, tool_name)
|
||||
|
||||
# Insert data
|
||||
rows_inserted = await self._insert_raw_data(data_rows)
|
||||
|
||||
if rows_inserted > 0:
|
||||
logger.info(f"Successfully loaded {rows_inserted} rows from {file_path.name}")
|
||||
|
||||
# Optionally trigger MATLAB elaboration
|
||||
if trigger_matlab:
|
||||
matlab_func = await self._get_matlab_function(unit_name, tool_name)
|
||||
if matlab_func:
|
||||
logger.warning(
|
||||
f"MATLAB elaboration would be triggered: {matlab_func} for {unit_name}/{tool_name}"
|
||||
)
|
||||
logger.warning("Note: Direct MATLAB execution not implemented in refactored version")
|
||||
# In production, this should integrate with elab_orchestrator instead
|
||||
# of calling MATLAB directly via os.system()
|
||||
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"No new rows inserted from {file_path.name}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
async def main(file_path: str):
|
||||
"""
|
||||
Main entry point for the Hirpinia loader.
|
||||
|
||||
Args:
|
||||
file_path: Path to the ODS file to process
|
||||
"""
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
|
||||
logger.info("Hirpinia Loader started")
|
||||
logger.info(f"Processing file: {file_path}")
|
||||
|
||||
try:
|
||||
# Load configuration
|
||||
db_config = DatabaseConfig()
|
||||
|
||||
# Process file
|
||||
async with HirpiniaLoader(db_config) as loader:
|
||||
success = await loader.process_file(file_path)
|
||||
|
||||
if success:
|
||||
logger.info("Processing completed successfully")
|
||||
return 0
|
||||
else:
|
||||
logger.error("Processing failed")
|
||||
return 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
return 1
|
||||
|
||||
finally:
|
||||
logger.info("Hirpinia Loader finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python hirpinia_loader.py <path_to_ods_file>")
|
||||
sys.exit(1)
|
||||
|
||||
exit_code = asyncio.run(main(sys.argv[1]))
|
||||
sys.exit(exit_code)
|
||||
413
src/refactory_scripts/loaders/sisgeo_loader.py
Normal file
413
src/refactory_scripts/loaders/sisgeo_loader.py
Normal file
@@ -0,0 +1,413 @@
|
||||
"""
|
||||
Sisgeo data loader - Refactored version with async support.
|
||||
|
||||
This script processes Sisgeo sensor data and loads it into the database.
|
||||
Handles different node types with different data formats.
|
||||
Replaces the legacy sisgeoLoadScript.py with modern async/await patterns.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from refactory_scripts.config import DatabaseConfig
|
||||
from refactory_scripts.utils import execute_query, get_db_connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SisgeoLoader:
|
||||
"""Loads Sisgeo sensor data into the database with smart duplicate handling."""
|
||||
|
||||
# Node configuration constants
|
||||
NODE_TYPE_PRESSURE = 1 # Node type 1: Pressure sensor (single value)
|
||||
NODE_TYPE_VIBRATING_WIRE = 2 # Node type 2-5: Vibrating wire sensors (three values)
|
||||
|
||||
# Time threshold for duplicate detection (hours)
|
||||
DUPLICATE_TIME_THRESHOLD_HOURS = 5
|
||||
|
||||
# Default values for missing data
|
||||
DEFAULT_BAT_LEVEL = -1
|
||||
DEFAULT_TEMPERATURE = -273
|
||||
|
||||
def __init__(self, db_config: DatabaseConfig):
|
||||
"""
|
||||
Initialize the Sisgeo loader.
|
||||
|
||||
Args:
|
||||
db_config: Database configuration object
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.conn = None
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
self.conn = await get_db_connection(self.db_config.as_dict())
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
async def _get_latest_record(
|
||||
self, unit_name: str, tool_name: str, node_num: int
|
||||
) -> dict | None:
|
||||
"""
|
||||
Get the latest record for a specific node.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
node_num: Node number
|
||||
|
||||
Returns:
|
||||
Latest record dict or None if not found
|
||||
"""
|
||||
query = """
|
||||
SELECT *
|
||||
FROM RAWDATACOR
|
||||
WHERE UnitName = %s AND ToolNameID = %s AND NodeNum = %s
|
||||
ORDER BY EventDate DESC, EventTime DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = await execute_query(
|
||||
self.conn, query, (unit_name, tool_name, node_num), fetch_one=True
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def _insert_pressure_data(
|
||||
self,
|
||||
unit_name: str,
|
||||
tool_name: str,
|
||||
node_num: int,
|
||||
date: str,
|
||||
time: str,
|
||||
pressure: Decimal,
|
||||
) -> bool:
|
||||
"""
|
||||
Insert or update pressure sensor data (Node type 1).
|
||||
|
||||
Logic:
|
||||
- If no previous record exists, insert new record
|
||||
- If previous record has NULL BatLevelModule:
|
||||
- Check time difference
|
||||
- If >= 5 hours: insert new record
|
||||
- If < 5 hours: update existing record
|
||||
- If previous record has non-NULL BatLevelModule: insert new record
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
node_num: Node number
|
||||
date: Date string (YYYY-MM-DD)
|
||||
time: Time string (HH:MM:SS)
|
||||
pressure: Pressure value (in Pa, will be converted to hPa)
|
||||
|
||||
Returns:
|
||||
True if operation was successful
|
||||
"""
|
||||
# Get latest record
|
||||
latest = await self._get_latest_record(unit_name, tool_name, node_num)
|
||||
|
||||
# Convert pressure from Pa to hPa (*100)
|
||||
pressure_hpa = pressure * 100
|
||||
|
||||
if not latest:
|
||||
# No previous record, insert new
|
||||
query = """
|
||||
INSERT INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, val0, BatLevelModule, TemperatureModule)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
params = (
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
pressure_hpa,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Inserted new pressure record: {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
return True
|
||||
|
||||
# Check BatLevelModule status
|
||||
if latest["BatLevelModule"] is None:
|
||||
# Calculate time difference
|
||||
old_datetime = datetime.strptime(
|
||||
f"{latest['EventDate']} {latest['EventTime']}", "%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
new_datetime = datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M:%S")
|
||||
time_diff = new_datetime - old_datetime
|
||||
|
||||
if time_diff >= timedelta(hours=self.DUPLICATE_TIME_THRESHOLD_HOURS):
|
||||
# Time difference >= 5 hours, insert new record
|
||||
query = """
|
||||
INSERT INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, val0, BatLevelModule, TemperatureModule)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
params = (
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
pressure_hpa,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Inserted new pressure record (time diff: {time_diff}): {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
else:
|
||||
# Time difference < 5 hours, update existing record
|
||||
query = """
|
||||
UPDATE RAWDATACOR
|
||||
SET val0 = %s, EventDate = %s, EventTime = %s
|
||||
WHERE UnitName = %s AND ToolNameID = %s AND NodeNum = %s AND val0 IS NULL
|
||||
ORDER BY EventDate DESC, EventTime DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
params = (pressure_hpa, date, time, unit_name, tool_name, node_num)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Updated existing pressure record (time diff: {time_diff}): {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
|
||||
else:
|
||||
# BatLevelModule is not NULL, insert new record
|
||||
query = """
|
||||
INSERT INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, val0, BatLevelModule, TemperatureModule)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
params = (
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
pressure_hpa,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Inserted new pressure record (BatLevelModule not NULL): {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
async def _insert_vibrating_wire_data(
|
||||
self,
|
||||
unit_name: str,
|
||||
tool_name: str,
|
||||
node_num: int,
|
||||
date: str,
|
||||
time: str,
|
||||
freq_hz: float,
|
||||
therm_ohms: float,
|
||||
freq_digit: float,
|
||||
) -> bool:
|
||||
"""
|
||||
Insert or update vibrating wire sensor data (Node types 2-5).
|
||||
|
||||
Logic:
|
||||
- If no previous record exists, insert new record
|
||||
- If previous record has NULL BatLevelModule: update existing record
|
||||
- If previous record has non-NULL BatLevelModule: insert new record
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
node_num: Node number
|
||||
date: Date string (YYYY-MM-DD)
|
||||
time: Time string (HH:MM:SS)
|
||||
freq_hz: Frequency in Hz
|
||||
therm_ohms: Thermistor in Ohms
|
||||
freq_digit: Frequency in digits
|
||||
|
||||
Returns:
|
||||
True if operation was successful
|
||||
"""
|
||||
# Get latest record
|
||||
latest = await self._get_latest_record(unit_name, tool_name, node_num)
|
||||
|
||||
if not latest:
|
||||
# No previous record, insert new
|
||||
query = """
|
||||
INSERT INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, val0, val1, val2, BatLevelModule, TemperatureModule)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
params = (
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
freq_hz,
|
||||
therm_ohms,
|
||||
freq_digit,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Inserted new vibrating wire record: {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
return True
|
||||
|
||||
# Check BatLevelModule status
|
||||
if latest["BatLevelModule"] is None:
|
||||
# Update existing record
|
||||
query = """
|
||||
UPDATE RAWDATACOR
|
||||
SET val0 = %s, val1 = %s, val2 = %s, EventDate = %s, EventTime = %s
|
||||
WHERE UnitName = %s AND ToolNameID = %s AND NodeNum = %s AND val0 IS NULL
|
||||
ORDER BY EventDate DESC, EventTime DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
params = (freq_hz, therm_ohms, freq_digit, date, time, unit_name, tool_name, node_num)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Updated existing vibrating wire record: {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
|
||||
else:
|
||||
# BatLevelModule is not NULL, insert new record
|
||||
query = """
|
||||
INSERT INTO RAWDATACOR
|
||||
(UnitName, ToolNameID, NodeNum, EventDate, EventTime, BatLevel, Temperature, val0, val1, val2, BatLevelModule, TemperatureModule)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
params = (
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
freq_hz,
|
||||
therm_ohms,
|
||||
freq_digit,
|
||||
self.DEFAULT_BAT_LEVEL,
|
||||
self.DEFAULT_TEMPERATURE,
|
||||
)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.debug(
|
||||
f"Inserted new vibrating wire record (BatLevelModule not NULL): {unit_name}/{tool_name}/node{node_num}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
async def process_data(
|
||||
self, raw_data: list[tuple], elab_data: list[tuple]
|
||||
) -> tuple[int, int]:
|
||||
"""
|
||||
Process raw and elaborated data from Sisgeo sensors.
|
||||
|
||||
Args:
|
||||
raw_data: List of raw data tuples
|
||||
elab_data: List of elaborated data tuples
|
||||
|
||||
Returns:
|
||||
Tuple of (raw_records_processed, elab_records_processed)
|
||||
"""
|
||||
raw_count = 0
|
||||
elab_count = 0
|
||||
|
||||
# Process raw data
|
||||
for record in raw_data:
|
||||
try:
|
||||
if len(record) == 6:
|
||||
# Pressure sensor data (node type 1)
|
||||
unit_name, tool_name, node_num, pressure, date, time = record
|
||||
success = await self._insert_pressure_data(
|
||||
unit_name, tool_name, node_num, date, time, Decimal(pressure)
|
||||
)
|
||||
if success:
|
||||
raw_count += 1
|
||||
|
||||
elif len(record) == 8:
|
||||
# Vibrating wire sensor data (node types 2-5)
|
||||
(
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
freq_hz,
|
||||
therm_ohms,
|
||||
freq_digit,
|
||||
date,
|
||||
time,
|
||||
) = record
|
||||
success = await self._insert_vibrating_wire_data(
|
||||
unit_name,
|
||||
tool_name,
|
||||
node_num,
|
||||
date,
|
||||
time,
|
||||
freq_hz,
|
||||
therm_ohms,
|
||||
freq_digit,
|
||||
)
|
||||
if success:
|
||||
raw_count += 1
|
||||
else:
|
||||
logger.warning(f"Unknown record format: {len(record)} fields")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process raw record: {e}", exc_info=True)
|
||||
logger.debug(f"Record: {record}")
|
||||
|
||||
# Process elaborated data (if needed)
|
||||
# Note: The legacy script had elab_data parameter but didn't use it
|
||||
# This can be implemented if elaborated data processing is needed
|
||||
|
||||
logger.info(f"Processed {raw_count} raw records, {elab_count} elaborated records")
|
||||
return raw_count, elab_count
|
||||
|
||||
|
||||
async def main():
|
||||
"""
|
||||
Main entry point for the Sisgeo loader.
|
||||
|
||||
Note: This is a library module, typically called by other scripts.
|
||||
Direct execution is provided for testing purposes.
|
||||
"""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
logger.info("Sisgeo Loader module loaded")
|
||||
logger.info("This is a library module. Use SisgeoLoader class in your scripts.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
392
src/refactory_scripts/loaders/vulink_loader.py
Normal file
392
src/refactory_scripts/loaders/vulink_loader.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""
|
||||
Vulink data loader - Refactored version with async support.
|
||||
|
||||
This script processes Vulink CSV files and loads data into the database.
|
||||
Handles battery level monitoring and pH threshold alarms.
|
||||
Replaces the legacy vulinkScript.py with modern async/await patterns.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from refactory_scripts.config import DatabaseConfig
|
||||
from refactory_scripts.utils import execute_query, get_db_connection
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VulinkLoader:
|
||||
"""Loads Vulink sensor data from CSV files into the database with alarm management."""
|
||||
|
||||
# Node type constants
|
||||
NODE_TYPE_PIEZO = 2
|
||||
NODE_TYPE_BARO = 3
|
||||
NODE_TYPE_CONDUCTIVITY = 4
|
||||
NODE_TYPE_PH = 5
|
||||
|
||||
# Battery threshold
|
||||
BATTERY_LOW_THRESHOLD = 25.0
|
||||
BATTERY_ALARM_INTERVAL_HOURS = 24
|
||||
|
||||
def __init__(self, db_config: DatabaseConfig):
|
||||
"""
|
||||
Initialize the Vulink loader.
|
||||
|
||||
Args:
|
||||
db_config: Database configuration object
|
||||
"""
|
||||
self.db_config = db_config
|
||||
self.conn = None
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
self.conn = await get_db_connection(self.db_config.as_dict())
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit."""
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
def _extract_metadata(self, file_path: Path) -> str:
|
||||
"""
|
||||
Extract serial number from filename.
|
||||
|
||||
Args:
|
||||
file_path: Path to the CSV file
|
||||
|
||||
Returns:
|
||||
Serial number string
|
||||
"""
|
||||
file_name = file_path.stem
|
||||
serial_number = file_name.split("_")[0]
|
||||
logger.debug(f"Extracted serial number: {serial_number}")
|
||||
return serial_number
|
||||
|
||||
async def _get_unit_and_tool(self, serial_number: str) -> tuple[str, str] | None:
|
||||
"""
|
||||
Get unit name and tool name from serial number.
|
||||
|
||||
Args:
|
||||
serial_number: Device serial number
|
||||
|
||||
Returns:
|
||||
Tuple of (unit_name, tool_name) or None if not found
|
||||
"""
|
||||
query = "SELECT unit_name, tool_name FROM vulink_tools WHERE serial_number = %s"
|
||||
result = await execute_query(self.conn, query, (serial_number,), fetch_one=True)
|
||||
|
||||
if result:
|
||||
unit_name = result["unit_name"]
|
||||
tool_name = result["tool_name"]
|
||||
logger.info(f"Serial {serial_number} -> Unit: {unit_name}, Tool: {tool_name}")
|
||||
return unit_name, tool_name
|
||||
|
||||
logger.error(f"Serial number {serial_number} not found in vulink_tools table")
|
||||
return None
|
||||
|
||||
async def _get_node_configuration(
|
||||
self, unit_name: str, tool_name: str
|
||||
) -> dict[int, dict]:
|
||||
"""
|
||||
Get node configuration including depth and thresholds.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
|
||||
Returns:
|
||||
Dictionary mapping node numbers to their configuration
|
||||
"""
|
||||
query = """
|
||||
SELECT t.soglie, n.num as node_num, n.nodetype_id, n.depth
|
||||
FROM nodes AS n
|
||||
LEFT JOIN tools AS t ON n.tool_id = t.id
|
||||
LEFT JOIN units AS u ON u.id = t.unit_id
|
||||
WHERE u.name = %s AND t.name = %s
|
||||
"""
|
||||
|
||||
results = await execute_query(self.conn, query, (unit_name, tool_name), fetch_all=True)
|
||||
|
||||
node_config = {}
|
||||
for row in results:
|
||||
node_num = row["node_num"]
|
||||
node_config[node_num] = {
|
||||
"nodetype_id": row["nodetype_id"],
|
||||
"depth": row.get("depth"),
|
||||
"thresholds": row.get("soglie"),
|
||||
}
|
||||
|
||||
logger.debug(f"Loaded configuration for {len(node_config)} nodes")
|
||||
return node_config
|
||||
|
||||
async def _check_battery_alarm(self, unit_name: str, date_time: str, battery_perc: float) -> None:
|
||||
"""
|
||||
Check battery level and create alarm if necessary.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
date_time: Current datetime string
|
||||
battery_perc: Battery percentage
|
||||
"""
|
||||
if battery_perc >= self.BATTERY_LOW_THRESHOLD:
|
||||
return # Battery level is fine
|
||||
|
||||
logger.warning(f"Low battery detected for {unit_name}: {battery_perc}%")
|
||||
|
||||
# Check if we already have a recent battery alarm
|
||||
query = """
|
||||
SELECT unit_name, date_time
|
||||
FROM alarms
|
||||
WHERE unit_name = %s AND date_time < %s AND type_id = 2
|
||||
ORDER BY date_time DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = await execute_query(self.conn, query, (unit_name, date_time), fetch_one=True)
|
||||
|
||||
should_create_alarm = False
|
||||
|
||||
if result:
|
||||
alarm_date_time = result["date_time"]
|
||||
dt1 = datetime.strptime(date_time, "%Y-%m-%d %H:%M")
|
||||
|
||||
time_difference = abs(dt1 - alarm_date_time)
|
||||
|
||||
if time_difference > timedelta(hours=self.BATTERY_ALARM_INTERVAL_HOURS):
|
||||
logger.info(f"Previous alarm was more than {self.BATTERY_ALARM_INTERVAL_HOURS}h ago, creating new alarm")
|
||||
should_create_alarm = True
|
||||
else:
|
||||
logger.info("No previous battery alarm found, creating new alarm")
|
||||
should_create_alarm = True
|
||||
|
||||
if should_create_alarm:
|
||||
await self._create_battery_alarm(unit_name, date_time, battery_perc)
|
||||
|
||||
async def _create_battery_alarm(self, unit_name: str, date_time: str, battery_perc: float) -> None:
|
||||
"""
|
||||
Create a battery level alarm.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
date_time: Datetime string
|
||||
battery_perc: Battery percentage
|
||||
"""
|
||||
query = """
|
||||
INSERT IGNORE INTO alarms
|
||||
(type_id, unit_name, date_time, battery_level, description, send_email, send_sms)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
params = (2, unit_name, date_time, battery_perc, "Low battery <25%", 1, 0)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.warning(f"Battery alarm created for {unit_name} at {date_time}: {battery_perc}%")
|
||||
|
||||
async def _check_ph_threshold(
|
||||
self,
|
||||
unit_name: str,
|
||||
tool_name: str,
|
||||
node_num: int,
|
||||
date_time: str,
|
||||
ph_value: float,
|
||||
thresholds_json: str,
|
||||
) -> None:
|
||||
"""
|
||||
Check pH value against thresholds and create alarm if necessary.
|
||||
|
||||
Args:
|
||||
unit_name: Unit name
|
||||
tool_name: Tool name
|
||||
node_num: Node number
|
||||
date_time: Datetime string
|
||||
ph_value: Current pH value
|
||||
thresholds_json: JSON string with threshold configuration
|
||||
"""
|
||||
if not thresholds_json:
|
||||
return
|
||||
|
||||
try:
|
||||
thresholds = json.loads(thresholds_json)
|
||||
ph_config = next((item for item in thresholds if item.get("type") == "PH Link"), None)
|
||||
|
||||
if not ph_config or not ph_config["data"].get("ph"):
|
||||
return # pH monitoring not enabled
|
||||
|
||||
data = ph_config["data"]
|
||||
|
||||
# Get previous pH value
|
||||
query = """
|
||||
SELECT XShift, EventDate, EventTime
|
||||
FROM ELABDATADISP
|
||||
WHERE UnitName = %s AND ToolNameID = %s AND NodeNum = %s
|
||||
AND CONCAT(EventDate, ' ', EventTime) < %s
|
||||
ORDER BY CONCAT(EventDate, ' ', EventTime) DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
result = await execute_query(self.conn, query, (unit_name, tool_name, node_num, date_time), fetch_one=True)
|
||||
|
||||
ph_value_prev = float(result["XShift"]) if result else 0.0
|
||||
|
||||
# Check each threshold level (3 = highest, 1 = lowest)
|
||||
for level, level_name in [(3, "tre"), (2, "due"), (1, "uno")]:
|
||||
enabled_key = f"ph_{level_name}"
|
||||
value_key = f"ph_{level_name}_value"
|
||||
email_key = f"ph_{level_name}_email"
|
||||
sms_key = f"ph_{level_name}_sms"
|
||||
|
||||
if (
|
||||
data.get(enabled_key)
|
||||
and data.get(value_key)
|
||||
and float(ph_value) > float(data[value_key])
|
||||
and ph_value_prev <= float(data[value_key])
|
||||
):
|
||||
# Threshold crossed, create alarm
|
||||
await self._create_ph_alarm(
|
||||
tool_name,
|
||||
unit_name,
|
||||
node_num,
|
||||
date_time,
|
||||
ph_value,
|
||||
level,
|
||||
data[email_key],
|
||||
data[sms_key],
|
||||
)
|
||||
logger.info(f"pH alarm level {level} triggered for {unit_name}/{tool_name}/node{node_num}")
|
||||
break # Only trigger highest level alarm
|
||||
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
logger.error(f"Failed to parse pH thresholds: {e}")
|
||||
|
||||
async def _create_ph_alarm(
|
||||
self,
|
||||
tool_name: str,
|
||||
unit_name: str,
|
||||
node_num: int,
|
||||
date_time: str,
|
||||
ph_value: float,
|
||||
level: int,
|
||||
send_email: bool,
|
||||
send_sms: bool,
|
||||
) -> None:
|
||||
"""
|
||||
Create a pH threshold alarm.
|
||||
|
||||
Args:
|
||||
tool_name: Tool name
|
||||
unit_name: Unit name
|
||||
node_num: Node number
|
||||
date_time: Datetime string
|
||||
ph_value: pH value
|
||||
level: Alarm level (1-3)
|
||||
send_email: Whether to send email
|
||||
send_sms: Whether to send SMS
|
||||
"""
|
||||
query = """
|
||||
INSERT IGNORE INTO alarms
|
||||
(type_id, tool_name, unit_name, date_time, registered_value, node_num, alarm_level, description, send_email, send_sms)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
|
||||
params = (3, tool_name, unit_name, date_time, ph_value, node_num, level, "pH", send_email, send_sms)
|
||||
|
||||
await execute_query(self.conn, query, params)
|
||||
logger.warning(
|
||||
f"pH alarm level {level} created for {unit_name}/{tool_name}/node{node_num}: {ph_value} at {date_time}"
|
||||
)
|
||||
|
||||
async def process_file(self, file_path: str | Path) -> bool:
|
||||
"""
|
||||
Process a Vulink CSV file and load data into the database.
|
||||
|
||||
Args:
|
||||
file_path: Path to the CSV file to process
|
||||
|
||||
Returns:
|
||||
True if processing was successful, False otherwise
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
|
||||
if not file_path.exists():
|
||||
logger.error(f"File not found: {file_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Extract serial number
|
||||
serial_number = self._extract_metadata(file_path)
|
||||
|
||||
# Get unit and tool names
|
||||
unit_tool = await self._get_unit_and_tool(serial_number)
|
||||
if not unit_tool:
|
||||
return False
|
||||
|
||||
unit_name, tool_name = unit_tool
|
||||
|
||||
# Get node configuration
|
||||
node_config = await self._get_node_configuration(unit_name, tool_name)
|
||||
|
||||
if not node_config:
|
||||
logger.error(f"No node configuration found for {unit_name}/{tool_name}")
|
||||
return False
|
||||
|
||||
# Parse CSV file (implementation depends on CSV format)
|
||||
logger.info(f"Processing Vulink file: {file_path.name}")
|
||||
logger.info(f"Unit: {unit_name}, Tool: {tool_name}")
|
||||
logger.info(f"Nodes configured: {len(node_config)}")
|
||||
|
||||
# Note: Actual CSV parsing and data insertion logic would go here
|
||||
# This requires knowledge of the specific Vulink CSV format
|
||||
logger.warning("CSV parsing not fully implemented - requires Vulink CSV format specification")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process file {file_path}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
async def main(file_path: str):
|
||||
"""
|
||||
Main entry point for the Vulink loader.
|
||||
|
||||
Args:
|
||||
file_path: Path to the CSV file to process
|
||||
"""
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
|
||||
logger.info("Vulink Loader started")
|
||||
logger.info(f"Processing file: {file_path}")
|
||||
|
||||
try:
|
||||
db_config = DatabaseConfig()
|
||||
|
||||
async with VulinkLoader(db_config) as loader:
|
||||
success = await loader.process_file(file_path)
|
||||
|
||||
if success:
|
||||
logger.info("Processing completed successfully")
|
||||
return 0
|
||||
else:
|
||||
logger.error("Processing failed")
|
||||
return 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
return 1
|
||||
|
||||
finally:
|
||||
logger.info("Vulink Loader finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python vulink_loader.py <path_to_csv_file>")
|
||||
sys.exit(1)
|
||||
|
||||
exit_code = asyncio.run(main(sys.argv[1]))
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user