This commit is contained in:
2024-11-16 15:20:50 +01:00
commit 97abdc8dfa
5 changed files with 935 additions and 0 deletions

197
csvLoader/CsvLoader.py Executable file
View File

@@ -0,0 +1,197 @@
#!/usr/bin/python3
import sys
import os
import pika
import logging
import csv
import re
import mariadb
import shutil
from utils.timefmt import timestamp_fmt as ts
from utils.timefmt import date_refmt as df
from utils.config import set_config as setting
class sqlraw:
def __init__(self, cfg):
self.config = {"host": cfg.dbhost, "user": cfg.dbuser, "password": cfg.dbpass}
self.dbname = cfg.dbname
self.table = cfg.table
self.sql_head = (
"INSERT IGNORE INTO "
+ self.dbname
+ "."
+ self.table
+ " (`UnitName`,`ToolName`,`eventDT`,`BatteryLevel`,`Temperature`,`NodeNum`,"
+ "`Val0`,`Val1`,`Val2`,`Val3`,`Val4`,`Val5`,`Val6`,`Val7`,"
+ "`Val8`,`Val9`,`ValA`,`ValB`,`ValC`,`ValD`,`ValE`,`ValF`) VALUES "
)
def add_data(self, values):
self.sql = self.sql_head + "(" + "),(".join(values) + ");"
def write_db(self):
try:
conn = mariadb.connect(**self.config, database=self.dbname)
except mariadb.Error as err:
logging.error(
"PID {:>5} >> Error to connet to DB {} - System error {}.".format(
os.getpid(), self.dbname, err
)
)
sys.exit(1)
cur = conn.cursor()
try:
cur.execute(self.sql)
except mariadb.ProgrammingError as err:
logging.error(
"PID {:>5} >> Error write into DB {} - System error {}.".format(
os.getpid(), self.dbname, err
)
)
print(err)
sys.exit(1)
finally:
conn.close()
def callback_ase(ch, method, properties, body, config): # body è di tipo byte
logging.info(
"PID {0:>5} >> Read message {1}".format(os.getpid(), body.decode("utf-8"))
)
msg = body.decode("utf-8").split(";")
sql = sqlraw(config)
stmlst = []
commonData = '"{0}","{1}"'.format(msg[1], msg[2])
tooltype = msg[3]
with open(msg[6], "r") as csvfile:
lines = csvfile.read().splitlines()
for line in lines:
fields = line.split(";|;")
if mG501 := re.match(
r"^(\d\d\d\d\/\d\d\/\d\d\s\d\d:\d\d:\d\d);(.+);(.+)$", fields[0]
):
rowData = ',"{0}",{1},{2}'.format(
mG501.group(1), mG501.group(2), mG501.group(3)
)
fields.pop(0)
elif mG201 := re.match(
r"^(\d\d\/\d\d\/\d\d\d\d\s\d\d:\d\d:\d\d)$", fields[0]
):
mbtG201 = re.match(r"^(.+);(.+)$", fields[1])
rowData = ',"{0}",{1},{2}'.format(
df.dateTimeFmt(mG201.group(1)), mbtG201.group(1), mbtG201.group(2)
)
fields.pop(0)
fields.pop(0)
else:
continue
nodeNum = 0
for field in fields:
nodeNum += 1
vals = field.split(";")
stmlst.append(
commonData
+ rowData
+ ",{0},".format(nodeNum)
+ ",".join('"{0}"'.format(d) for d in vals)
+ ","
+ ",".join(["null"] * (config.valueNum - len(vals)))
)
if config.maxInsertRow < len(stmlst):
sql.add_data(stmlst)
try:
sql.write_db()
stmlst.clear()
except:
print("errore nell'inserimento")
sys.exit(1)
if len(stmlst) > 0:
sql.add_data(stmlst)
try:
sql.write_db()
ch.basic_ack(delivery_tag=method.delivery_tag)
except:
print("errore nell'inserimento")
sys.exit(1)
newFilename = msg[6].replace("received", "loaded")
newPath, filenameExt = os.path.split(newFilename)
try:
os.makedirs(newPath)
logging.info("PID {:>5} >> path {} created.".format(os.getpid(), newPath))
except FileExistsError:
logging.info(
"PID {:>5} >> path {} already exists.".format(os.getpid(), newPath)
)
try:
shutil.move(msg[6], newFilename)
logging.info(
"PID {:>5} >> {} moved into {}.".format(
os.getpid(), filenameExt, newFilename
)
)
except OSError:
logging.error(
"PID {:>5} >> Error to move {} into {}.".format(
os.getpid(), filenameExt, newFilename
)
)
def main():
cfg = setting.config()
logging.basicConfig(
format="%(asctime)s %(message)s",
filename="/var/log/" + cfg.elablog,
level=logging.INFO,
)
parameters = pika.URLParameters(
"amqp://"
+ cfg.mquser
+ ":"
+ cfg.mqpass
+ "@"
+ cfg.mqhost
+ ":"
+ cfg.mqport
+ "/%2F"
)
connection = pika.BlockingConnection(parameters)
channel = connection.channel()
channel.queue_declare(queue=cfg.csv_queue, durable=True)
channel.basic_qos(prefetch_count=1)
channel.basic_consume(
queue=cfg.csv_queue,
on_message_callback=lambda ch, method, properties, body: callback_ase(
ch, method, properties, body, config=cfg
),
)
# channel.basic_consume(queue=cfg.csv_queue, on_message_callback=callback,arguments=cfg)
try:
channel.start_consuming()
except KeyboardInterrupt:
logging.info(
"PID {0:>5} >> Info: {1}.".format(
os.getpid(), "Shutdown requested...exiting"
)
)
if __name__ == "__main__":
main()

102
csvLoader/transform_file.py Normal file
View File

@@ -0,0 +1,102 @@
import json
import psycopg2
from sqlalchemy import create_engine, text
# Configura la connessione al database PostgreSQL
engine = create_engine('postgresql://asepg:batt1l0@10.211.114.101:5432/asedb')
def write_db(engine, records):
with engine.connect() as conn:
conn.execute(text("""
INSERT INTO dataraw (nome_unit, tipo_centralina, nome_tool, tipo_tool, ip_centralina, ip_gateway, event_timestamp, battery_level, temperature, nodes_jsonb)
VALUES
""" + ",".join([
f"(:{i}_nome_unit, :{i}_tipo_centralina, :{i}_nome_tool, :{i}_tipo_tool, :{i}_ip_centralina, :{i}_ip_gateway, :{i}_event_timestamp, :{i}_battery_level, :{i}_temperature, :{i}_nodes_jsonb)"
for i in range(len(records))
]) + """
ON CONFLICT ON CONSTRAINT dataraw_unique
DO UPDATE SET
tipo_centralina = EXCLUDED.tipo_centralina,
tipo_tool = EXCLUDED.tipo_tool,
ip_centralina = EXCLUDED.ip_centralina,
ip_gateway = EXCLUDED.ip_gateway,
battery_level = EXCLUDED.battery_level,
temperature = EXCLUDED.temperature,
nodes_jsonb = EXCLUDED.nodes_jsonb;
"""), {f"{i}_{key}": value for i, record in enumerate(records) for key, value in record.items()})
conn.commit()
# Leggi il file intero e separa l'intestazione dal resto dei dati
with open('DT0029_20241106044856.csv', 'r') as file:
lines = file.readlines()
# Estrarre le informazioni dalle prime 7 righe
if len(lines) >= 7:
tipo_centralina = lines[1].split()[0] # Prima stringa nella seconda riga
nome_unit = lines[1].split()[1] # Seconda stringa nella seconda riga
ip_centralina = lines[2].split()[1] # IP della centralina dalla terza riga
ip_gateway = lines[4].split()[1] # IP del gateway dalla quinta riga
path_tool = lines[5].strip() # Path completo dalla sesta riga
nome_tool = path_tool.split('/')[-1].replace('.csv', '') # Ultima parte del percorso senza estensione
tipo_tool = path_tool.split('/')[-2] # Parte precedente al nome_tool
else:
raise ValueError("Il file non contiene abbastanza righe per estrarre i dati richiesti.")
records = []
# Elabora le righe dei dati a partire dalla riga 8 in poi
for line in lines[7:]:
# Rimuovi spazi bianchi o caratteri di nuova riga
input_data = line.strip()
# Suddividi la stringa in sezioni usando ";|;" come separatore
parts = input_data.split(';|;')
# Verifica che ci siano almeno tre parti (timestamp, misure e nodi)
if len(parts) < 3:
print(f"Riga non valida: {input_data}")
continue
# Estrai la data/ora e le prime misurazioni
timestamp = parts[0]
measurements = parts[1]
# Estrai i valori di ciascun nodo e formatta i dati come JSON
nodes = parts[2:]
node_list = []
for i, node_data in enumerate(nodes, start=1):
node_dict = {"num": i}
# Dividi ogni nodo in valori separati da ";"
node_values = node_data.split(';')
for j, value in enumerate(node_values, start=0):
# Imposta i valori a -9999 se trovi "Dis."
node_dict['val' + str(j)] = -9999 if value == "Dis." else float(value)
node_list.append(node_dict)
# Prepara i dati per l'inserimento/aggiornamento
record = {
"nome_unit": nome_unit.upper(),
"tipo_centralina": tipo_centralina,
"nome_tool": nome_tool.upper(),
"tipo_tool": tipo_tool,
"ip_centralina": ip_centralina,
"ip_gateway": ip_gateway,
"event_timestamp": timestamp,
"battery_level": float(measurements.split(';')[0]),
"temperature": float(measurements.split(';')[1]),
"nodes_jsonb": json.dumps(node_list) # Converti la lista di dizionari in una stringa JSON
}
records.append(record)
# Se abbiamo raggiunto 1000 record, esegui l'inserimento in batch
if len(records) >= 500:
print("raggiunti 500 record scrivo sul db")
write_db(engine, records)
records = []
write_db(engine, records)
print("Tutte le righe del file sono state caricate con successo nella tabella PostgreSQL!")