add mqtt connection tracking
This commit is contained in:
+95
-119
@@ -11,56 +11,70 @@ import shutil
|
||||
import requests
|
||||
from pathlib import Path
|
||||
import configparser
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
# ==========================================
|
||||
# 0. CONFIGURAZIONE LOGGING & HARDWARE
|
||||
# ==========================================
|
||||
logging.basicConfig(
|
||||
handlers=[
|
||||
RotatingFileHandler('/opt/node_agent.log', maxBytes=2000000, backupCount=3),
|
||||
logging.StreamHandler()
|
||||
],
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger("NodeAgent")
|
||||
|
||||
try:
|
||||
import RPi.GPIO as GPIO
|
||||
# This variable must be GLOBAL, so defined at the top!
|
||||
GPIO_AVAILABLE = True
|
||||
except ImportError:
|
||||
GPIO_AVAILABLE = False
|
||||
print("Warning: RPi.GPIO library not found. Hardware reset disabled.")
|
||||
logger.warning("Libreria RPi.GPIO non trovata. Reset hardware disabilitato.")
|
||||
|
||||
# ==========================================
|
||||
# 0. UNIFIED CONFIGURATION LOADING
|
||||
# 1. CARICAMENTO CONFIGURAZIONE UNIFICATA
|
||||
# ==========================================
|
||||
CONFIG_PATH = Path("/opt/node_config.json")
|
||||
|
||||
def load_config():
|
||||
try:
|
||||
if not CONFIG_PATH.exists():
|
||||
print(f"❌ ERROR: File {CONFIG_PATH} not found!")
|
||||
logger.error(f"ERRORE: File {CONFIG_PATH} non trovato!")
|
||||
sys.exit(1)
|
||||
with open(CONFIG_PATH, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
print(f"❌ CRITICAL JSON ERROR: {e}")
|
||||
logger.error(f"ERRORE CRITICO JSON: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Load the single necessary configuration
|
||||
cfg = load_config()
|
||||
|
||||
# Identifiers and Topics
|
||||
# Identificativi e Topic
|
||||
CLIENT_ID = cfg.get('client_id', 'iv3jdv').lower()
|
||||
BASE_TOPIC = cfg.get('mqtt', {}).get('base_topic', f"servizi/{CLIENT_ID}")
|
||||
|
||||
TOPIC_CMD = f"{BASE_TOPIC}/cmnd"
|
||||
TOPIC_STAT = f"{BASE_TOPIC}/stat"
|
||||
|
||||
# Global State Variables
|
||||
# Variabili di Stato Globali
|
||||
boot_recovered = False
|
||||
current_status = "ONLINE - Ready"
|
||||
current_status = "ONLINE - Pronto"
|
||||
auto_healing_counter = {}
|
||||
|
||||
# ==========================================
|
||||
# 1. TELEGRAM NOTIFICATION FUNCTION
|
||||
# 2. FUNZIONE NOTIFICA TELEGRAM
|
||||
# ==========================================
|
||||
def send_telegram_message(message):
|
||||
t_cfg = cfg.get('telegram', {})
|
||||
if not t_cfg.get('enabled', False): return
|
||||
|
||||
current_hour = int(time.strftime("%H"))
|
||||
if current_hour >= 23 or current_hour < 7:
|
||||
print(f"🌙 Late night ({current_hour}:00): Notification skipped.")
|
||||
ora_attuale = int(time.strftime("%H"))
|
||||
if ora_attuale >= 23 or ora_attuale < 7:
|
||||
logger.info(f"🌙 Notte fonda ({ora_attuale}:00): Notifica Telegram evitata.")
|
||||
return
|
||||
|
||||
token = t_cfg.get('token')
|
||||
@@ -73,58 +87,54 @@ def send_telegram_message(message):
|
||||
payload = {"chat_id": chat_id, "text": f"[{CLIENT_ID.upper()}]\n{clean_msg}"}
|
||||
requests.post(url, json=payload, timeout=10)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Telegram send error: {e}")
|
||||
logger.error(f"Errore invio Telegram: {e}")
|
||||
|
||||
# ==========================================
|
||||
# 2. MULTIPLE PROFILE SWITCH LOGIC
|
||||
# 3. LOGICA CAMBIO PROFILO MULTIPLO
|
||||
# ==========================================
|
||||
|
||||
def get_actual_config_from_disk():
|
||||
return "ONLINE - From memory"
|
||||
return "ONLINE - Da memoria"
|
||||
|
||||
def switch_config(config_type):
|
||||
profile = cfg.get('profiles', {}).get(config_type)
|
||||
|
||||
if not profile:
|
||||
return f"ERROR: Profile {config_type} not found in JSON"
|
||||
return f"ERRORE: Profilo {config_type} non trovato in JSON"
|
||||
|
||||
label = profile.get('label', f"Profile {config_type}")
|
||||
label = profile.get('label', f"Profilo {config_type}")
|
||||
services = profile.get('services', [])
|
||||
|
||||
if not services:
|
||||
return f"ERROR: No services configured for {config_type}"
|
||||
return f"ERRORE: Nessun servizio configurato per {config_type}"
|
||||
|
||||
try:
|
||||
# 1. STOP: Stop all involved daemons first to release files
|
||||
for s in services:
|
||||
subprocess.run(["sudo", "systemctl", "stop", s['name']], check=False)
|
||||
|
||||
# 2. COPY: Verify and copy all configuration files
|
||||
for s in services:
|
||||
if not os.path.exists(s['source']):
|
||||
return f"ERROR: Missing source file {s['source']}"
|
||||
return f"ERRORE: Manca il file sorgente {s['source']}"
|
||||
shutil.copy(s['source'], s['target'])
|
||||
|
||||
# 3. START: Restart all daemons with the new files
|
||||
for s in services:
|
||||
subprocess.run(["sudo", "systemctl", "start", s['name']], check=False)
|
||||
|
||||
send_telegram_message(f"✅ Multiple switch completed: {label}")
|
||||
send_telegram_message(f"✅ Switch multiplo completato: {label}")
|
||||
return f"ONLINE - {label}"
|
||||
|
||||
except Exception as e:
|
||||
return f"ERROR: {str(e)}"
|
||||
return f"ERRORE: {str(e)}"
|
||||
|
||||
def force_online_if_needed(client):
|
||||
global boot_recovered, current_status
|
||||
if not boot_recovered:
|
||||
print("⚠️ Memory recovery skipped. Setting status from disk...")
|
||||
logger.info("⚠️ Recupero memoria saltato. Imposto stato da disco...")
|
||||
current_status = get_actual_config_from_disk()
|
||||
client.publish(TOPIC_STAT, current_status, retain=True)
|
||||
boot_recovered = True
|
||||
|
||||
# ==========================================
|
||||
# 3. TELEMETRY AND AUTO-HEALING
|
||||
# 4. TELEMETRIA E AUTO-HEALING
|
||||
# ==========================================
|
||||
def get_cpu_temperature():
|
||||
temp = 0.0
|
||||
@@ -147,8 +157,8 @@ def get_system_status():
|
||||
"processes": {},
|
||||
"timestamp": time.strftime("%H:%M:%S"),
|
||||
"profiles": {
|
||||
"A": cfg.get('profiles', {}).get('A', {}).get('label', 'PROFILE A'),
|
||||
"B": cfg.get('profiles', {}).get('B', {}).get('label', 'PROFILE B')
|
||||
"A": cfg.get('profiles', {}).get('A', {}).get('label', 'PROFILO A'),
|
||||
"B": cfg.get('profiles', {}).get('B', {}).get('label', 'PROFILO B')
|
||||
}
|
||||
}
|
||||
proc_path = Path(cfg['paths'].get('process_list', ''))
|
||||
@@ -159,7 +169,7 @@ def get_system_status():
|
||||
for name in target_processes:
|
||||
name = name.strip().lower()
|
||||
if name: status["processes"][name] = "online" if name in running_names else "offline"
|
||||
except Exception as e: print(f"Process error: {e}")
|
||||
except Exception as e: logger.error(f"Errore controllo processi: {e}")
|
||||
return status
|
||||
|
||||
def check_auto_healing(client, status):
|
||||
@@ -169,39 +179,12 @@ def check_auto_healing(client, status):
|
||||
attempts = auto_healing_counter.get(proc_name, 0)
|
||||
if attempts < 3:
|
||||
auto_healing_counter[proc_name] = attempts + 1
|
||||
msg = f"🛠 Auto-healing: {proc_name} offline. Restarting {attempts+1}/3..."
|
||||
msg = f"🛠 Auto-healing: {proc_name} offline. Riavvio {attempts+1}/3..."
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", msg)
|
||||
send_telegram_message(msg)
|
||||
|
||||
# --- SPECIAL RULE FOR MMDVMHOST ---
|
||||
# If the failed daemon is MMDVMHost, perform a hardware reset of the modem first
|
||||
if proc_name.lower() == "mmdvmhost" and GPIO_AVAILABLE:
|
||||
try:
|
||||
RESET_PIN = 21
|
||||
GPIO.setwarnings(False)
|
||||
GPIO.setmode(GPIO.BCM)
|
||||
GPIO.setup(RESET_PIN, GPIO.OUT)
|
||||
|
||||
# Send pulse
|
||||
GPIO.output(RESET_PIN, GPIO.LOW)
|
||||
time.sleep(0.5)
|
||||
GPIO.output(RESET_PIN, GPIO.HIGH)
|
||||
GPIO.cleanup(RESET_PIN)
|
||||
|
||||
msg_hw = "🔌 Auto-healing: Hardware HAT Reset sent"
|
||||
print(f"[{CLIENT_ID}] {msg_hw}")
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", msg_hw)
|
||||
|
||||
# Wait for the modem firmware to boot before starting the daemon
|
||||
time.sleep(1.5)
|
||||
except Exception as e:
|
||||
print(f"Auto-healing GPIO Error: {e}")
|
||||
# ----------------------------------
|
||||
|
||||
# Restart the service (whether MMDVMHost or any other)
|
||||
subprocess.run(["sudo", "systemctl", "restart", proc_name])
|
||||
elif attempts == 3:
|
||||
msg = f"🚨 CRITICAL: {proc_name} failed!"
|
||||
msg = f"🚨 CRITICO: {proc_name} fallito!"
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", msg)
|
||||
send_telegram_message(msg)
|
||||
auto_healing_counter[proc_name] = 4
|
||||
@@ -210,8 +193,6 @@ def check_auto_healing(client, status):
|
||||
|
||||
def publish_all(client):
|
||||
status = get_system_status()
|
||||
|
||||
# Read file list for Dashboard menu
|
||||
file_list_path = Path(cfg['paths'].get('file_list', ''))
|
||||
status["config_files"] = []
|
||||
status["files"] = []
|
||||
@@ -219,9 +200,9 @@ def publish_all(client):
|
||||
if file_list_path.exists():
|
||||
try:
|
||||
files = file_list_path.read_text(encoding="utf-8").splitlines()
|
||||
extracted_names = [Path(f.strip()).stem for f in files if f.strip()]
|
||||
status["config_files"] = extracted_names
|
||||
status["files"] = extracted_names
|
||||
nomi_estrattti = [Path(f.strip()).stem for f in files if f.strip()]
|
||||
status["config_files"] = nomi_estrattti
|
||||
status["files"] = nomi_estrattti
|
||||
except: pass
|
||||
|
||||
client.publish(f"devices/{CLIENT_ID}/services", json.dumps(status), qos=1)
|
||||
@@ -244,47 +225,36 @@ def publish_all_ini_files(client):
|
||||
with open(file_list_path, 'r') as f:
|
||||
files_to_parse = [line.strip() for line in f if line.strip()]
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_list_path}: {e}")
|
||||
logger.error(f"Errore lettura {file_list_path}: {e}")
|
||||
return
|
||||
|
||||
for file_path in files_to_parse:
|
||||
if not os.path.exists(file_path): continue
|
||||
|
||||
try:
|
||||
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||
|
||||
# --- START MANUAL PARSER (Anti-Duplicate Keys) ---
|
||||
ini_data = {}
|
||||
current_section = None
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
# Skip empty lines or comments
|
||||
if not line or line.startswith(('#', ';')):
|
||||
continue
|
||||
# Recognize sections [Section Name]
|
||||
if not line or line.startswith(('#', ';')): continue
|
||||
if line.startswith('[') and line.endswith(']'):
|
||||
current_section = line[1:-1].strip()
|
||||
ini_data[current_section] = {}
|
||||
# Recognize keys and values
|
||||
elif '=' in line and current_section is not None:
|
||||
k, v = line.split('=', 1)
|
||||
k, v = k.strip(), v.strip()
|
||||
|
||||
# THE MAGIC: If the key already exists, merge it with a comma!
|
||||
if k in ini_data[current_section]:
|
||||
ini_data[current_section][k] = str(ini_data[current_section][k]) + "," + v
|
||||
else:
|
||||
ini_data[current_section][k] = v
|
||||
|
||||
# Publish on MQTT broker
|
||||
for section, payload in ini_data.items():
|
||||
topic = f"data/{CLIENT_ID}/{base_name}/{section}"
|
||||
client.publish(topic, json.dumps(payload), retain=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"INI parsing error for {file_path}: {e}")
|
||||
logger.error(f"Errore parsing INI per {file_path}: {e}")
|
||||
|
||||
def write_config_from_json(slug, json_payload):
|
||||
file_list_path = Path(cfg['paths'].get('file_list', ''))
|
||||
@@ -294,20 +264,21 @@ def write_config_from_json(slug, json_payload):
|
||||
for f in files:
|
||||
p = Path(f.strip())
|
||||
if p.stem.lower() == slug.lower():
|
||||
new_data = json.loads(json_payload)
|
||||
nuovi_dati = json.loads(json_payload)
|
||||
shutil.copy(p, str(p) + ".bak")
|
||||
with open(p, 'w', encoding="utf-8") as file: file.write(new_data.get("raw_text", ""))
|
||||
with open(p, 'w', encoding="utf-8") as file: file.write(nuovi_dati.get("raw_text", ""))
|
||||
os.system(f"sudo systemctl restart {slug}")
|
||||
send_telegram_message(f"📝 Config {slug.upper()} updated via Web.")
|
||||
send_telegram_message(f"📝 Config {slug.upper()} aggiornata via Web.")
|
||||
logger.info(f"Configurazione {slug} aggiornata con successo.")
|
||||
break
|
||||
except Exception as e: print(f"Config write error: {e}")
|
||||
except Exception as e: logger.error(f"Errore scrittura config: {e}")
|
||||
|
||||
# ==========================================
|
||||
# 4. MQTT CALLBACKS
|
||||
# 5. CALLBACK MQTT
|
||||
# ==========================================
|
||||
def on_connect(client, userdata, flags, rc, properties=None):
|
||||
if rc == 0:
|
||||
print(f"✅ Connected: {CLIENT_ID.upper()}")
|
||||
def on_connect(client, userdata, flags, reason_code, properties=None):
|
||||
if reason_code == 0:
|
||||
logger.info(f"✅ Connesso al broker MQTT: {CLIENT_ID.upper()}")
|
||||
client.subscribe([(TOPIC_CMD, 0), (TOPIC_STAT, 0)])
|
||||
client.subscribe([
|
||||
("devices/control/request", 0),
|
||||
@@ -316,7 +287,12 @@ def on_connect(client, userdata, flags, rc, properties=None):
|
||||
])
|
||||
threading.Timer(5.0, force_online_if_needed, [client]).start()
|
||||
publish_all(client)
|
||||
publish_all_ini_files(client) # Publish INIs as soon as connected
|
||||
publish_all_ini_files(client)
|
||||
else:
|
||||
logger.error(f"❌ Errore connessione MQTT. Codice: {reason_code}")
|
||||
|
||||
def on_disconnect(client, userdata, disconnect_flags, reason_code, properties=None):
|
||||
logger.warning(f"⚠️ Disconnessione dal broker MQTT! Codice: {reason_code}")
|
||||
|
||||
def on_message(client, userdata, msg):
|
||||
global boot_recovered, current_status, cfg
|
||||
@@ -324,7 +300,7 @@ def on_message(client, userdata, msg):
|
||||
topic = msg.topic
|
||||
|
||||
if topic == TOPIC_STAT and not boot_recovered:
|
||||
if not any(x in payload.upper() for x in ["OFFLINE", "ERROR", "REBOOT"]):
|
||||
if not any(x in payload.upper() for x in ["OFFLINE", "ERRORE", "RIAVVIO"]):
|
||||
current_status = payload
|
||||
boot_recovered = True
|
||||
client.publish(TOPIC_STAT, current_status, retain=True)
|
||||
@@ -337,51 +313,39 @@ def on_message(client, userdata, msg):
|
||||
boot_recovered = True
|
||||
publish_all(client)
|
||||
elif cmd == "REBOOT":
|
||||
client.publish(TOPIC_STAT, f"OFFLINE - Rebooting {CLIENT_ID.upper()}...", retain=False)
|
||||
client.publish(TOPIC_STAT, f"OFFLINE - Riavvio {CLIENT_ID.upper()}...", retain=False)
|
||||
logger.info("Comando REBOOT ricevuto. Riavvio sistema...")
|
||||
time.sleep(1)
|
||||
subprocess.run(["sudo", "reboot"], check=True)
|
||||
elif cmd == 'RESET_HAT':
|
||||
# Correct GPIO pin for MMDVM board hardware reset
|
||||
RESET_PIN = 21
|
||||
|
||||
if GPIO_AVAILABLE:
|
||||
try:
|
||||
GPIO.setwarnings(False)
|
||||
GPIO.setmode(GPIO.BCM)
|
||||
GPIO.setup(RESET_PIN, GPIO.OUT)
|
||||
|
||||
# 1. Send reset pulse (LOW for 0.5 seconds)
|
||||
GPIO.output(RESET_PIN, GPIO.LOW)
|
||||
time.sleep(0.5)
|
||||
GPIO.output(RESET_PIN, GPIO.HIGH)
|
||||
|
||||
# Release GPIO resources
|
||||
GPIO.cleanup(RESET_PIN)
|
||||
print(f"[{CLIENT_ID}] RESET pulse sent to GPIO {RESET_PIN}")
|
||||
|
||||
# 2. Wait 1.5 seconds to let the microcontroller firmware reboot
|
||||
logger.info(f"Impulso di RESET inviato al GPIO {RESET_PIN}")
|
||||
time.sleep(1.5)
|
||||
|
||||
# 3. Restart MMDVMHost service to realign serial communication
|
||||
print(f"[{CLIENT_ID}] Restarting MMDVMHost...")
|
||||
logger.info("Riavvio di MMDVMHost in corso...")
|
||||
subprocess.run(["sudo", "systemctl", "restart", "mmdvmhost"], check=False)
|
||||
|
||||
# 4. Send confirmations to dashboard
|
||||
client.publish(f"fleet/{CLIENT_ID}/status", "HAT RESET + MMDVM RESTART OK")
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", "🔌 HAT Reset + MMDVMHost Restarted")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during GPIO/MMDVMHost reset: {e}")
|
||||
client.publish(f"fleet/{CLIENT_ID}/status", f"RESET ERROR: {e}")
|
||||
logger.error(f"Errore durante il reset GPIO/MMDVMHost: {e}")
|
||||
client.publish(f"fleet/{CLIENT_ID}/status", f"ERRORE RESET: {e}")
|
||||
|
||||
elif cmd in ["TG:OFF", "TG:ON"]:
|
||||
new_state = (cmd == "TG:ON")
|
||||
cfg['telegram']['enabled'] = new_state
|
||||
nuovo_stato = (cmd == "TG:ON")
|
||||
cfg['telegram']['enabled'] = nuovo_stato
|
||||
try:
|
||||
with open(CONFIG_PATH, 'w') as f: json.dump(cfg, f, indent=4)
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", f"{'🔔' if new_state else '🔇'} Notifications {'ON' if new_state else 'OFF'}")
|
||||
if new_state: send_telegram_message("Notifications enabled!")
|
||||
except: pass
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", f"{'🔔' if nuovo_stato else '🔇'} Notifiche {'ON' if nuovo_stato else 'OFF'}")
|
||||
if nuovo_stato: send_telegram_message("Notifiche riattivate!")
|
||||
except Exception as e: logger.error(f"Errore salvataggio stato Telegram: {e}")
|
||||
|
||||
elif topic == "devices/control/request" and payload.lower() in ["status", "update"]:
|
||||
publish_all(client)
|
||||
@@ -394,8 +358,11 @@ def on_message(client, userdata, msg):
|
||||
try:
|
||||
subprocess.run(["sudo", "systemctl", action.lower(), service.lower()], check=True)
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", f"✅ {action.upper()}: {service}")
|
||||
logger.info(f"Comando servizio eseguito: {action.upper()} {service}")
|
||||
publish_all(client)
|
||||
except Exception as e: client.publish(f"devices/{CLIENT_ID}/logs", f"❌ ERROR: {str(e)}")
|
||||
except Exception as e:
|
||||
client.publish(f"devices/{CLIENT_ID}/logs", f"❌ ERROR: {str(e)}")
|
||||
logger.error(f"Errore esecuzione comando servizio: {e}")
|
||||
|
||||
elif topic.startswith(f"devices/{CLIENT_ID}/config_set/"):
|
||||
slug = topic.split("/")[-1]
|
||||
@@ -416,14 +383,23 @@ def start_service():
|
||||
client.will_set(TOPIC_STAT, payload=f"OFFLINE - {CLIENT_ID.upper()}", qos=1, retain=False)
|
||||
client.username_pw_set(cfg['mqtt']['user'], cfg['mqtt']['password'])
|
||||
client.on_connect = on_connect
|
||||
client.on_disconnect = on_disconnect
|
||||
client.on_message = on_message
|
||||
|
||||
try:
|
||||
client.connect(cfg['mqtt']['broker'], cfg['mqtt']['port'], 60)
|
||||
client.loop_start()
|
||||
threading.Thread(target=auto_publish_task, args=(client,), daemon=True).start()
|
||||
while True: time.sleep(1)
|
||||
except Exception: sys.exit(0)
|
||||
while True:
|
||||
try:
|
||||
logger.info("Tentativo di connessione al broker MQTT...")
|
||||
client.connect(cfg['mqtt']['broker'], cfg['mqtt']['port'], 60)
|
||||
client.loop_start()
|
||||
threading.Thread(target=auto_publish_task, args=(client,), daemon=True).start()
|
||||
|
||||
# Mantiene il processo vivo
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impossibile connettersi o connessione persa ({e}). Riprovo in 10 secondi...")
|
||||
time.sleep(10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
start_service()
|
||||
|
||||
Reference in New Issue
Block a user