update monitor

This commit is contained in:
2026-01-04 16:22:27 +01:00
parent e0fa3018ea
commit 16bbc930a8
5 changed files with 163 additions and 36 deletions

Binary file not shown.

Binary file not shown.

92
main.py
View File

@@ -4,8 +4,9 @@ from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
from monitor_logic import get_ssh_data, check_web_service from monitor_logic import get_ssh_data, check_web_service, check_testarena_backend_status, recover_testarena_backend, send_email_notification
import asyncio import asyncio
from datetime import datetime
load_dotenv() load_dotenv()
@@ -32,6 +33,9 @@ PI_PORT = 2222
PI_USER = "asf_tb" PI_USER = "asf_tb"
PI_PASS = os.getenv("PI_PASS", "ASF_TB") PI_PASS = os.getenv("PI_PASS", "ASF_TB")
SMTP_USER = "support@nabd-co.com"
SMTP_PASS = "zwziglbpxyfogafc"
WEB_SERVICES = [ WEB_SERVICES = [
{"name": "Gitea", "url": "https://gitea.nabd-co.com/"}, {"name": "Gitea", "url": "https://gitea.nabd-co.com/"},
{"name": "OpenProject", "url": "https://openproject.nabd-co.com/"}, {"name": "OpenProject", "url": "https://openproject.nabd-co.com/"},
@@ -41,34 +45,76 @@ WEB_SERVICES = [
{"name": "Board", "url": "https://board.nabd-co.com/"}, {"name": "Board", "url": "https://board.nabd-co.com/"},
] ]
# Global state to store latest status
LATEST_STATUS = {
"pc": {"status": "offline"},
"pi": {"status": "offline"},
"services": [],
"last_update": None
}
async def monitor_services_task():
while True:
try:
# Run SSH checks
pc_res = await asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
pi_res = await asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
# Run web checks
web_results = []
for s in WEB_SERVICES:
res = await asyncio.to_thread(check_web_service, s["url"])
web_results.append({**s, **res})
if res["status"] != "online":
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, s["name"], res["status"])
# Check TestArena Backend
ta_res = await asyncio.to_thread(check_testarena_backend_status)
if ta_res["status"] != "online":
# Recovery loop
for attempt in range(1, 6):
print(f"Attempting recovery for TestArena Backend (Attempt {attempt})...")
success = await asyncio.to_thread(recover_testarena_backend, PC_HOST, PC_PORT, PC_USER, PC_PASS, attempt)
if success:
# Wait a bit for services to start
await asyncio.sleep(10)
ta_res = await asyncio.to_thread(check_testarena_backend_status)
if ta_res["status"] == "online":
print("TestArena Backend recovered successfully.")
break
if ta_res["status"] != "online":
print("TestArena Backend recovery failed after 5 attempts.")
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, "TestArena Backend", ta_res["status"])
web_results.append({
"name": "TestArena Backend",
"url": "http://asf-server.duckdns.org:8080/api/system/status",
**ta_res
})
LATEST_STATUS["pc"] = pc_res
LATEST_STATUS["pi"] = pi_res
LATEST_STATUS["services"] = web_results
LATEST_STATUS["last_update"] = datetime.now().isoformat()
except Exception as e:
print(f"Monitor task error: {e}")
await asyncio.sleep(300) # Run every 5 minutes
@app.on_event("startup")
async def startup_event():
asyncio.create_task(monitor_services_task())
@app.get("/") @app.get("/")
async def read_index(): async def read_index():
return FileResponse("monitor.html") return FileResponse("monitor.html")
@app.get("/api/status") @app.get("/api/status")
async def get_status(): async def get_status():
# Run SSH checks in parallel return LATEST_STATUS
pc_task = asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
pi_task = asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
# Run web checks in parallel
web_tasks = [asyncio.to_thread(check_web_service, s["url"]) for s in WEB_SERVICES]
pc_res, pi_res, *web_res = await asyncio.gather(pc_task, pi_task, *web_tasks)
services = []
for i, res in enumerate(web_res):
services.append({
"name": WEB_SERVICES[i]["name"],
"url": WEB_SERVICES[i]["url"],
**res
})
return {
"pc": pc_res,
"pi": pi_res,
"services": services
}
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn

View File

@@ -509,18 +509,34 @@
// Update Services // Update Services
const serviceList = document.getElementById('service-list'); const serviceList = document.getElementById('service-list');
serviceList.innerHTML = data.services.map(s => ` serviceList.innerHTML = data.services.map(s => {
let statusColor = 'var(--status-red)';
let statusIcon = 'fa-times-circle';
let statusText = s.status;
if (s.status === 'online') {
statusColor = 'var(--status-green)';
statusIcon = 'fa-check-circle';
statusText = s.latency;
} else if (s.status === 'partial') {
statusColor = 'var(--status-yellow)';
statusIcon = 'fa-exclamation-circle';
statusText = 'Partial';
}
return `
<li class="service-item"> <li class="service-item">
<div class="service-name"> <div class="service-name">
<img src="${getServiceLogo(s.name)}" class="service-logo" onerror="this.src='https://via.placeholder.com/32?text=${s.name.charAt(0)}'"> <img src="${getServiceLogo(s.name)}" class="service-logo" onerror="this.src='https://via.placeholder.com/32?text=${s.name.charAt(0)}'">
${s.name} ${s.name}
</div> </div>
<div class="service-status" style="color: ${s.status === 'online' ? 'var(--status-green)' : 'var(--status-red)'}"> <div class="service-status" style="color: ${statusColor}">
<i class="fas ${s.status === 'online' ? 'fa-check-circle' : 'fa-times-circle'}"></i> <i class="fas ${statusIcon}"></i>
${s.status === 'online' ? s.latency : 'Offline'} ${statusText}
</div> </div>
</li> </li>
`).join(''); `;
}).join('');
document.getElementById('loading').style.opacity = '0'; document.getElementById('loading').style.opacity = '0';
setTimeout(() => document.getElementById('loading').style.display = 'none', 500); setTimeout(() => document.getElementById('loading').style.display = 'none', 500);
@@ -537,7 +553,8 @@
'Draw.io': 'https://app.diagrams.net/images/logo-flat.svg', 'Draw.io': 'https://app.diagrams.net/images/logo-flat.svg',
'TestArena': '/static/testarena.png', 'TestArena': '/static/testarena.png',
'TBM': '/static/tbm.ico', 'TBM': '/static/tbm.ico',
'Board': 'https://excalidraw.com/favicon-32x32.png' 'Board': 'https://excalidraw.com/favicon-32x32.png',
'TestArena Backend': '/static/testarena.png'
}; };
return logos[name] || ''; return logos[name] || '';
} }

View File

@@ -2,6 +2,9 @@ import paramiko
import requests import requests
import time import time
import re import re
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
def get_ssh_data(host, port, user, password): def get_ssh_data(host, port, user, password):
try: try:
@@ -57,3 +60,64 @@ def check_web_service(url):
return {"status": "error", "code": response.status_code} return {"status": "error", "code": response.status_code}
except Exception: except Exception:
return {"status": "offline"} return {"status": "offline"}
def check_testarena_backend_status():
url = "http://asf-server.duckdns.org:8080/api/system/status"
try:
start_time = time.time()
response = requests.get(url, timeout=5)
latency = int((time.time() - start_time) * 1000)
if response.status_code == 200:
data = response.json()
# {"testarena-app":"online","testarena-worker":"online","nginx":"online"}
is_online = all(v == "online" for v in data.values())
if is_online:
return {"status": "online", "latency": f"{latency}ms", "details": data}
else:
return {"status": "partial", "details": data}
else:
return {"status": "error", "code": response.status_code}
except Exception:
return {"status": "offline"}
def recover_testarena_backend(host, port, user, password, attempt):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host, port=port, username=user, password=password, timeout=10)
if attempt == 1:
# system restart
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/restart_services.sh"
else:
# system redeploy
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/deploy.sh"
stdin, stdout, stderr = ssh.exec_command(cmd)
exit_status = stdout.channel.recv_exit_status()
ssh.close()
return exit_status == 0
except Exception as e:
print(f"Recovery failed: {e}")
return False
def send_email_notification(smtp_user, smtp_pass, service_name, status):
try:
msg = MIMEMultipart()
msg['From'] = smtp_user
msg['To'] = smtp_user # Send to self as requested
msg['Subject'] = f"ALERT: Service {service_name} is {status}"
body = f"The service {service_name} is currently {status}. Please check the system."
msg.attach(MIMEText(body, 'plain'))
server = smtplib.SMTP('smtp.gmail.com', 587) # Assuming Gmail based on the password format
server.starttls()
server.login(smtp_user, smtp_pass)
text = msg.as_string()
server.sendmail(smtp_user, smtp_user, text)
server.quit()
return True
except Exception as e:
print(f"Email failed: {e}")
return False