Files
ASE/utils/csv/data_preparation.py
2025-07-06 21:52:41 +02:00

210 lines
9.5 KiB
Python

#!.venv/bin/python
from utils.database.nodes_query import get_nodes_type
import utils.timestamp.date_check as date_check
import logging
import re
from itertools import islice
logger = logging.getLogger(__name__)
async def get_data(cfg: object, id: int, pool) -> tuple:
"""
Retrieves unit name, tool name, and tool data for a given record ID from the database.
Args:
cfg (object): Configuration object containing database table name.
id (int): The ID of the record to retrieve.
pool: The database connection pool.
Returns:
tuple: A tuple containing unit_name, tool_name, and tool_data.
"""
async with pool.acquire() as conn:
async with conn.cursor() as cur:
await cur.execute(f'select unit_name, tool_name, tool_data from {cfg.dbrectable} where id = {id}')
unit_name, tool_name, tool_data = await cur.fetchone()
return unit_name, tool_name, tool_data
async def make_pipe_sep_matrix(cfg: object, id: int, pool) -> list:
"""
Processes pipe-separated data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
righe = ToolData.splitlines()
matrice_valori = []
for riga in [riga for riga in righe if ';|;' in riga]:
timestamp, batlevel, temperature, rilevazioni = riga.split(';',3)
EventDate, EventTime = timestamp.split(' ')
if batlevel == '|':
batlevel = temperature
temperature, rilevazioni = rilevazioni.split(';',1)
''' in alcune letture mancano temperatura e livello batteria'''
if temperature == '':
temperature = 0
if batlevel == '':
batlevel = 0
valori_nodi = rilevazioni.lstrip('|;').rstrip(';').split(';|;') # Toglie '|;' iniziali, toglie eventuali ';' finali, dividi per ';|;'
for num_nodo, valori_nodo in enumerate(valori_nodi, start=1):
valori = valori_nodo.split(';')
matrice_valori.append([UnitName, ToolNameID, num_nodo, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + valori + ([None] * (19 - len(valori))))
return matrice_valori
async def make_ain_din_matrix(cfg: object, id: int, pool) -> list:
"""
Processes analog and digital input data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
node_channels, node_types, node_ains, node_dins = get_nodes_type(cfg, ToolNameID, UnitName)
righe = ToolData.splitlines()
matrice_valori = []
pattern = r'^(?:\d{4}\/\d{2}\/\d{2}|\d{2}\/\d{2}\/\d{4}) \d{2}:\d{2}:\d{2}(?:;\d+\.\d+){2}(?:;\d+){4}$'
if node_ains or node_dins:
for riga in [riga for riga in righe if re.match(pattern, riga)]:
timestamp, batlevel, temperature, analog_input1, analog_input2, digital_input1, digital_input2 = riga.split(';')
EventDate, EventTime = timestamp.split(' ')
if any(node_ains):
for node_num, analog_act in enumerate([analog_input1, analog_input2], start=1):
matrice_valori.append([UnitName, ToolNameID, node_num, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + [analog_act] + ([None] * (19 - 1)))
else:
logger.info(f"Nessun Ingresso analogico per {UnitName} {ToolNameID}")
if any(node_dins):
start_node = 3 if any(node_ains) else 1
for node_num, digital_act in enumerate([digital_input1, digital_input2], start=start_node):
matrice_valori.append([UnitName, ToolNameID, node_num, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + [digital_act] + ([None] * (19 - 1)))
else:
logger.info(f"Nessun Ingresso digitale per {UnitName} {ToolNameID}")
return matrice_valori
async def make_channels_matrix(cfg: object, id: int, pool) -> list:
"""
Processes channel-based data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
node_channels, node_types, node_ains, node_dins = get_nodes_type(cfg, ToolNameID, UnitName)
righe = ToolData.splitlines()
matrice_valori = []
for riga in [riga for riga in righe if ';|;' in riga]:
timestamp, batlevel, temperature, rilevazioni = riga.replace(';|;',';').split(';',3)
EventDate, EventTime = timestamp.split(' ')
valori_splitted = [valore for valore in rilevazioni.split(';') if valore != '|']
valori_iter = iter(valori_splitted)
valori_nodi = [list(islice(valori_iter, channels)) for channels in node_channels]
for num_nodo, valori in enumerate(valori_nodi, start=1):
matrice_valori.append([UnitName, ToolNameID, num_nodo, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + valori + ([None] * (19 - len(valori))))
return matrice_valori
async def make_musa_matrix(cfg: object, id: int, pool) -> list:
"""
Processes 'Musa' specific data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
node_channels, node_types, node_ains, node_dins = get_nodes_type(cfg, ToolNameID, UnitName)
righe = ToolData.splitlines()
matrice_valori = []
for riga in [riga for riga in righe if ';|;' in riga]:
timestamp, batlevel, rilevazioni = riga.replace(';|;',';').split(';',2)
if timestamp == '':
continue
EventDate, EventTime = timestamp.split(' ')
temperature = rilevazioni.split(';')[0]
logger.info(f'{temperature}, {rilevazioni}')
valori_splitted = [valore for valore in rilevazioni.split(';') if valore != '|']
valori_iter = iter(valori_splitted)
valori_nodi = [list(islice(valori_iter, channels)) for channels in node_channels]
for num_nodo, valori in enumerate(valori_nodi, start=1):
matrice_valori.append([UnitName, ToolNameID, num_nodo, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + valori + ([None] * (19 - len(valori))))
return matrice_valori
async def make_tlp_matrix(cfg: object, id: int, pool) -> list:
"""
Processes 'TLP' specific data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
righe = ToolData.splitlines()
valori_x_nodo = 2
matrice_valori = []
for riga in righe:
timestamp, batlevel, temperature, barometer, rilevazioni = riga.split(';',4)
EventDate, EventTime = timestamp.split(' ')
lista_rilevazioni = rilevazioni.strip(';').split(';')
lista_rilevazioni.append(barometer)
valori_nodi = [lista_rilevazioni[i:i + valori_x_nodo] for i in range(0, len(lista_rilevazioni), valori_x_nodo)]
for num_nodo, valori in enumerate(valori_nodi, start=1):
matrice_valori.append([UnitName, ToolNameID, num_nodo, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + valori + ([None] * (19 - len(valori))))
return matrice_valori
async def make_gd_matrix(cfg: object, id: int, pool) -> list:
"""
Processes 'GD' specific data from a CSV record into a structured matrix.
Args:
cfg (object): Configuration object.
id (int): The ID of the CSV record.
pool: The database connection pool.
Returns:
list: A list of lists, where each inner list represents a row in the matrix.
"""
UnitName, ToolNameID, ToolData = await get_data(cfg, id, pool)
righe = ToolData.splitlines()
matrice_valori = []
pattern = r'^-\d*dB$'
for riga in [riga for riga in righe if ';|;' in riga]:
timestamp, batlevel, temperature, rilevazioni = riga.split(';',3)
EventDate, EventTime = timestamp.split(' ')
if batlevel == '|':
batlevel = temperature
temperature, rilevazioni = rilevazioni.split(';',1)
if re.match(pattern, rilevazioni):
valori_nodi = rilevazioni.lstrip('|;').rstrip(';').split(';|;') # Toglie '|;' iniziali, toglie eventuali ';' finali, dividi per ';|;'
for num_nodo, valori_nodo in enumerate(valori_nodi, start=1):
valori = valori_nodo.split(';')
matrice_valori.append([UnitName, ToolNameID, num_nodo, date_check.conforma_data(EventDate), EventTime, batlevel, temperature] + valori + ([None] * (19 - len(valori))))
return matrice_valori