add mqtt connection tracking

This commit is contained in:
2026-04-20 21:38:49 +02:00
parent cb57da334e
commit f259e0a774
2 changed files with 145 additions and 138 deletions
+95 -119
View File
@@ -11,56 +11,70 @@ import shutil
import requests
from pathlib import Path
import configparser
import logging
from logging.handlers import RotatingFileHandler
# ==========================================
# 0. CONFIGURAZIONE LOGGING & HARDWARE
# ==========================================
logging.basicConfig(
handlers=[
RotatingFileHandler('/opt/node_agent.log', maxBytes=2000000, backupCount=3),
logging.StreamHandler()
],
level=logging.INFO,
format='[%(asctime)s] %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger("NodeAgent")
try:
import RPi.GPIO as GPIO
# This variable must be GLOBAL, so defined at the top!
GPIO_AVAILABLE = True
except ImportError:
GPIO_AVAILABLE = False
print("Warning: RPi.GPIO library not found. Hardware reset disabled.")
logger.warning("Libreria RPi.GPIO non trovata. Reset hardware disabilitato.")
# ==========================================
# 0. UNIFIED CONFIGURATION LOADING
# 1. CARICAMENTO CONFIGURAZIONE UNIFICATA
# ==========================================
CONFIG_PATH = Path("/opt/node_config.json")
def load_config():
try:
if not CONFIG_PATH.exists():
print(f"ERROR: File {CONFIG_PATH} not found!")
logger.error(f"ERRORE: File {CONFIG_PATH} non trovato!")
sys.exit(1)
with open(CONFIG_PATH, 'r') as f:
return json.load(f)
except Exception as e:
print(f" CRITICAL JSON ERROR: {e}")
logger.error(f"ERRORE CRITICO JSON: {e}")
sys.exit(1)
# Load the single necessary configuration
cfg = load_config()
# Identifiers and Topics
# Identificativi e Topic
CLIENT_ID = cfg.get('client_id', 'iv3jdv').lower()
BASE_TOPIC = cfg.get('mqtt', {}).get('base_topic', f"servizi/{CLIENT_ID}")
TOPIC_CMD = f"{BASE_TOPIC}/cmnd"
TOPIC_STAT = f"{BASE_TOPIC}/stat"
# Global State Variables
# Variabili di Stato Globali
boot_recovered = False
current_status = "ONLINE - Ready"
current_status = "ONLINE - Pronto"
auto_healing_counter = {}
# ==========================================
# 1. TELEGRAM NOTIFICATION FUNCTION
# 2. FUNZIONE NOTIFICA TELEGRAM
# ==========================================
def send_telegram_message(message):
t_cfg = cfg.get('telegram', {})
if not t_cfg.get('enabled', False): return
current_hour = int(time.strftime("%H"))
if current_hour >= 23 or current_hour < 7:
print(f"🌙 Late night ({current_hour}:00): Notification skipped.")
ora_attuale = int(time.strftime("%H"))
if ora_attuale >= 23 or ora_attuale < 7:
logger.info(f"🌙 Notte fonda ({ora_attuale}:00): Notifica Telegram evitata.")
return
token = t_cfg.get('token')
@@ -73,58 +87,54 @@ def send_telegram_message(message):
payload = {"chat_id": chat_id, "text": f"[{CLIENT_ID.upper()}]\n{clean_msg}"}
requests.post(url, json=payload, timeout=10)
except Exception as e:
print(f"⚠️ Telegram send error: {e}")
logger.error(f"Errore invio Telegram: {e}")
# ==========================================
# 2. MULTIPLE PROFILE SWITCH LOGIC
# 3. LOGICA CAMBIO PROFILO MULTIPLO
# ==========================================
def get_actual_config_from_disk():
return "ONLINE - From memory"
return "ONLINE - Da memoria"
def switch_config(config_type):
profile = cfg.get('profiles', {}).get(config_type)
if not profile:
return f"ERROR: Profile {config_type} not found in JSON"
return f"ERRORE: Profilo {config_type} non trovato in JSON"
label = profile.get('label', f"Profile {config_type}")
label = profile.get('label', f"Profilo {config_type}")
services = profile.get('services', [])
if not services:
return f"ERROR: No services configured for {config_type}"
return f"ERRORE: Nessun servizio configurato per {config_type}"
try:
# 1. STOP: Stop all involved daemons first to release files
for s in services:
subprocess.run(["sudo", "systemctl", "stop", s['name']], check=False)
# 2. COPY: Verify and copy all configuration files
for s in services:
if not os.path.exists(s['source']):
return f"ERROR: Missing source file {s['source']}"
return f"ERRORE: Manca il file sorgente {s['source']}"
shutil.copy(s['source'], s['target'])
# 3. START: Restart all daemons with the new files
for s in services:
subprocess.run(["sudo", "systemctl", "start", s['name']], check=False)
send_telegram_message(f"Multiple switch completed: {label}")
send_telegram_message(f"Switch multiplo completato: {label}")
return f"ONLINE - {label}"
except Exception as e:
return f"ERROR: {str(e)}"
return f"ERRORE: {str(e)}"
def force_online_if_needed(client):
global boot_recovered, current_status
if not boot_recovered:
print("⚠️ Memory recovery skipped. Setting status from disk...")
logger.info("⚠️ Recupero memoria saltato. Imposto stato da disco...")
current_status = get_actual_config_from_disk()
client.publish(TOPIC_STAT, current_status, retain=True)
boot_recovered = True
# ==========================================
# 3. TELEMETRY AND AUTO-HEALING
# 4. TELEMETRIA E AUTO-HEALING
# ==========================================
def get_cpu_temperature():
temp = 0.0
@@ -147,8 +157,8 @@ def get_system_status():
"processes": {},
"timestamp": time.strftime("%H:%M:%S"),
"profiles": {
"A": cfg.get('profiles', {}).get('A', {}).get('label', 'PROFILE A'),
"B": cfg.get('profiles', {}).get('B', {}).get('label', 'PROFILE B')
"A": cfg.get('profiles', {}).get('A', {}).get('label', 'PROFILO A'),
"B": cfg.get('profiles', {}).get('B', {}).get('label', 'PROFILO B')
}
}
proc_path = Path(cfg['paths'].get('process_list', ''))
@@ -159,7 +169,7 @@ def get_system_status():
for name in target_processes:
name = name.strip().lower()
if name: status["processes"][name] = "online" if name in running_names else "offline"
except Exception as e: print(f"Process error: {e}")
except Exception as e: logger.error(f"Errore controllo processi: {e}")
return status
def check_auto_healing(client, status):
@@ -169,39 +179,12 @@ def check_auto_healing(client, status):
attempts = auto_healing_counter.get(proc_name, 0)
if attempts < 3:
auto_healing_counter[proc_name] = attempts + 1
msg = f"🛠 Auto-healing: {proc_name} offline. Restarting {attempts+1}/3..."
msg = f"🛠 Auto-healing: {proc_name} offline. Riavvio {attempts+1}/3..."
client.publish(f"devices/{CLIENT_ID}/logs", msg)
send_telegram_message(msg)
# --- SPECIAL RULE FOR MMDVMHOST ---
# If the failed daemon is MMDVMHost, perform a hardware reset of the modem first
if proc_name.lower() == "mmdvmhost" and GPIO_AVAILABLE:
try:
RESET_PIN = 21
GPIO.setwarnings(False)
GPIO.setmode(GPIO.BCM)
GPIO.setup(RESET_PIN, GPIO.OUT)
# Send pulse
GPIO.output(RESET_PIN, GPIO.LOW)
time.sleep(0.5)
GPIO.output(RESET_PIN, GPIO.HIGH)
GPIO.cleanup(RESET_PIN)
msg_hw = "🔌 Auto-healing: Hardware HAT Reset sent"
print(f"[{CLIENT_ID}] {msg_hw}")
client.publish(f"devices/{CLIENT_ID}/logs", msg_hw)
# Wait for the modem firmware to boot before starting the daemon
time.sleep(1.5)
except Exception as e:
print(f"Auto-healing GPIO Error: {e}")
# ----------------------------------
# Restart the service (whether MMDVMHost or any other)
subprocess.run(["sudo", "systemctl", "restart", proc_name])
elif attempts == 3:
msg = f"🚨 CRITICAL: {proc_name} failed!"
msg = f"🚨 CRITICO: {proc_name} fallito!"
client.publish(f"devices/{CLIENT_ID}/logs", msg)
send_telegram_message(msg)
auto_healing_counter[proc_name] = 4
@@ -210,8 +193,6 @@ def check_auto_healing(client, status):
def publish_all(client):
status = get_system_status()
# Read file list for Dashboard menu
file_list_path = Path(cfg['paths'].get('file_list', ''))
status["config_files"] = []
status["files"] = []
@@ -219,9 +200,9 @@ def publish_all(client):
if file_list_path.exists():
try:
files = file_list_path.read_text(encoding="utf-8").splitlines()
extracted_names = [Path(f.strip()).stem for f in files if f.strip()]
status["config_files"] = extracted_names
status["files"] = extracted_names
nomi_estrattti = [Path(f.strip()).stem for f in files if f.strip()]
status["config_files"] = nomi_estrattti
status["files"] = nomi_estrattti
except: pass
client.publish(f"devices/{CLIENT_ID}/services", json.dumps(status), qos=1)
@@ -244,47 +225,36 @@ def publish_all_ini_files(client):
with open(file_list_path, 'r') as f:
files_to_parse = [line.strip() for line in f if line.strip()]
except Exception as e:
print(f"Error reading {file_list_path}: {e}")
logger.error(f"Errore lettura {file_list_path}: {e}")
return
for file_path in files_to_parse:
if not os.path.exists(file_path): continue
try:
base_name = os.path.splitext(os.path.basename(file_path))[0]
# --- START MANUAL PARSER (Anti-Duplicate Keys) ---
ini_data = {}
current_section = None
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
# Skip empty lines or comments
if not line or line.startswith(('#', ';')):
continue
# Recognize sections [Section Name]
if not line or line.startswith(('#', ';')): continue
if line.startswith('[') and line.endswith(']'):
current_section = line[1:-1].strip()
ini_data[current_section] = {}
# Recognize keys and values
elif '=' in line and current_section is not None:
k, v = line.split('=', 1)
k, v = k.strip(), v.strip()
# THE MAGIC: If the key already exists, merge it with a comma!
if k in ini_data[current_section]:
ini_data[current_section][k] = str(ini_data[current_section][k]) + "," + v
else:
ini_data[current_section][k] = v
# Publish on MQTT broker
for section, payload in ini_data.items():
topic = f"data/{CLIENT_ID}/{base_name}/{section}"
client.publish(topic, json.dumps(payload), retain=True)
except Exception as e:
print(f"INI parsing error for {file_path}: {e}")
logger.error(f"Errore parsing INI per {file_path}: {e}")
def write_config_from_json(slug, json_payload):
file_list_path = Path(cfg['paths'].get('file_list', ''))
@@ -294,20 +264,21 @@ def write_config_from_json(slug, json_payload):
for f in files:
p = Path(f.strip())
if p.stem.lower() == slug.lower():
new_data = json.loads(json_payload)
nuovi_dati = json.loads(json_payload)
shutil.copy(p, str(p) + ".bak")
with open(p, 'w', encoding="utf-8") as file: file.write(new_data.get("raw_text", ""))
with open(p, 'w', encoding="utf-8") as file: file.write(nuovi_dati.get("raw_text", ""))
os.system(f"sudo systemctl restart {slug}")
send_telegram_message(f"📝 Config {slug.upper()} updated via Web.")
send_telegram_message(f"📝 Config {slug.upper()} aggiornata via Web.")
logger.info(f"Configurazione {slug} aggiornata con successo.")
break
except Exception as e: print(f"Config write error: {e}")
except Exception as e: logger.error(f"Errore scrittura config: {e}")
# ==========================================
# 4. MQTT CALLBACKS
# 5. CALLBACK MQTT
# ==========================================
def on_connect(client, userdata, flags, rc, properties=None):
if rc == 0:
print(f"✅ Connected: {CLIENT_ID.upper()}")
def on_connect(client, userdata, flags, reason_code, properties=None):
if reason_code == 0:
logger.info(f"✅ Connesso al broker MQTT: {CLIENT_ID.upper()}")
client.subscribe([(TOPIC_CMD, 0), (TOPIC_STAT, 0)])
client.subscribe([
("devices/control/request", 0),
@@ -316,7 +287,12 @@ def on_connect(client, userdata, flags, rc, properties=None):
])
threading.Timer(5.0, force_online_if_needed, [client]).start()
publish_all(client)
publish_all_ini_files(client) # Publish INIs as soon as connected
publish_all_ini_files(client)
else:
logger.error(f"❌ Errore connessione MQTT. Codice: {reason_code}")
def on_disconnect(client, userdata, disconnect_flags, reason_code, properties=None):
logger.warning(f"⚠️ Disconnessione dal broker MQTT! Codice: {reason_code}")
def on_message(client, userdata, msg):
global boot_recovered, current_status, cfg
@@ -324,7 +300,7 @@ def on_message(client, userdata, msg):
topic = msg.topic
if topic == TOPIC_STAT and not boot_recovered:
if not any(x in payload.upper() for x in ["OFFLINE", "ERROR", "REBOOT"]):
if not any(x in payload.upper() for x in ["OFFLINE", "ERRORE", "RIAVVIO"]):
current_status = payload
boot_recovered = True
client.publish(TOPIC_STAT, current_status, retain=True)
@@ -337,51 +313,39 @@ def on_message(client, userdata, msg):
boot_recovered = True
publish_all(client)
elif cmd == "REBOOT":
client.publish(TOPIC_STAT, f"OFFLINE - Rebooting {CLIENT_ID.upper()}...", retain=False)
client.publish(TOPIC_STAT, f"OFFLINE - Riavvio {CLIENT_ID.upper()}...", retain=False)
logger.info("Comando REBOOT ricevuto. Riavvio sistema...")
time.sleep(1)
subprocess.run(["sudo", "reboot"], check=True)
elif cmd == 'RESET_HAT':
# Correct GPIO pin for MMDVM board hardware reset
RESET_PIN = 21
if GPIO_AVAILABLE:
try:
GPIO.setwarnings(False)
GPIO.setmode(GPIO.BCM)
GPIO.setup(RESET_PIN, GPIO.OUT)
# 1. Send reset pulse (LOW for 0.5 seconds)
GPIO.output(RESET_PIN, GPIO.LOW)
time.sleep(0.5)
GPIO.output(RESET_PIN, GPIO.HIGH)
# Release GPIO resources
GPIO.cleanup(RESET_PIN)
print(f"[{CLIENT_ID}] RESET pulse sent to GPIO {RESET_PIN}")
# 2. Wait 1.5 seconds to let the microcontroller firmware reboot
logger.info(f"Impulso di RESET inviato al GPIO {RESET_PIN}")
time.sleep(1.5)
# 3. Restart MMDVMHost service to realign serial communication
print(f"[{CLIENT_ID}] Restarting MMDVMHost...")
logger.info("Riavvio di MMDVMHost in corso...")
subprocess.run(["sudo", "systemctl", "restart", "mmdvmhost"], check=False)
# 4. Send confirmations to dashboard
client.publish(f"fleet/{CLIENT_ID}/status", "HAT RESET + MMDVM RESTART OK")
client.publish(f"devices/{CLIENT_ID}/logs", "🔌 HAT Reset + MMDVMHost Restarted")
except Exception as e:
print(f"Error during GPIO/MMDVMHost reset: {e}")
client.publish(f"fleet/{CLIENT_ID}/status", f"RESET ERROR: {e}")
logger.error(f"Errore durante il reset GPIO/MMDVMHost: {e}")
client.publish(f"fleet/{CLIENT_ID}/status", f"ERRORE RESET: {e}")
elif cmd in ["TG:OFF", "TG:ON"]:
new_state = (cmd == "TG:ON")
cfg['telegram']['enabled'] = new_state
nuovo_stato = (cmd == "TG:ON")
cfg['telegram']['enabled'] = nuovo_stato
try:
with open(CONFIG_PATH, 'w') as f: json.dump(cfg, f, indent=4)
client.publish(f"devices/{CLIENT_ID}/logs", f"{'🔔' if new_state else '🔇'} Notifications {'ON' if new_state else 'OFF'}")
if new_state: send_telegram_message("Notifications enabled!")
except: pass
client.publish(f"devices/{CLIENT_ID}/logs", f"{'🔔' if nuovo_stato else '🔇'} Notifiche {'ON' if nuovo_stato else 'OFF'}")
if nuovo_stato: send_telegram_message("Notifiche riattivate!")
except Exception as e: logger.error(f"Errore salvataggio stato Telegram: {e}")
elif topic == "devices/control/request" and payload.lower() in ["status", "update"]:
publish_all(client)
@@ -394,8 +358,11 @@ def on_message(client, userdata, msg):
try:
subprocess.run(["sudo", "systemctl", action.lower(), service.lower()], check=True)
client.publish(f"devices/{CLIENT_ID}/logs", f"{action.upper()}: {service}")
logger.info(f"Comando servizio eseguito: {action.upper()} {service}")
publish_all(client)
except Exception as e: client.publish(f"devices/{CLIENT_ID}/logs", f"❌ ERROR: {str(e)}")
except Exception as e:
client.publish(f"devices/{CLIENT_ID}/logs", f"❌ ERROR: {str(e)}")
logger.error(f"Errore esecuzione comando servizio: {e}")
elif topic.startswith(f"devices/{CLIENT_ID}/config_set/"):
slug = topic.split("/")[-1]
@@ -416,14 +383,23 @@ def start_service():
client.will_set(TOPIC_STAT, payload=f"OFFLINE - {CLIENT_ID.upper()}", qos=1, retain=False)
client.username_pw_set(cfg['mqtt']['user'], cfg['mqtt']['password'])
client.on_connect = on_connect
client.on_disconnect = on_disconnect
client.on_message = on_message
try:
client.connect(cfg['mqtt']['broker'], cfg['mqtt']['port'], 60)
client.loop_start()
threading.Thread(target=auto_publish_task, args=(client,), daemon=True).start()
while True: time.sleep(1)
except Exception: sys.exit(0)
while True:
try:
logger.info("Tentativo di connessione al broker MQTT...")
client.connect(cfg['mqtt']['broker'], cfg['mqtt']['port'], 60)
client.loop_start()
threading.Thread(target=auto_publish_task, args=(client,), daemon=True).start()
# Mantiene il processo vivo
while True:
time.sleep(1)
except Exception as e:
logger.error(f"Impossibile connettersi o connessione persa ({e}). Riprovo in 10 secondi...")
time.sleep(10)
if __name__ == "__main__":
start_service()