update monitor

This commit is contained in:
2026-01-04 16:22:27 +01:00
parent e0fa3018ea
commit 16bbc930a8
5 changed files with 163 additions and 36 deletions

92
main.py
View File

@@ -4,8 +4,9 @@ from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
import os
from dotenv import load_dotenv
from monitor_logic import get_ssh_data, check_web_service
from monitor_logic import get_ssh_data, check_web_service, check_testarena_backend_status, recover_testarena_backend, send_email_notification
import asyncio
from datetime import datetime
load_dotenv()
@@ -32,6 +33,9 @@ PI_PORT = 2222
PI_USER = "asf_tb"
PI_PASS = os.getenv("PI_PASS", "ASF_TB")
SMTP_USER = "support@nabd-co.com"
SMTP_PASS = "zwziglbpxyfogafc"
WEB_SERVICES = [
{"name": "Gitea", "url": "https://gitea.nabd-co.com/"},
{"name": "OpenProject", "url": "https://openproject.nabd-co.com/"},
@@ -41,34 +45,76 @@ WEB_SERVICES = [
{"name": "Board", "url": "https://board.nabd-co.com/"},
]
# Global state to store latest status
LATEST_STATUS = {
"pc": {"status": "offline"},
"pi": {"status": "offline"},
"services": [],
"last_update": None
}
async def monitor_services_task():
while True:
try:
# Run SSH checks
pc_res = await asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
pi_res = await asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
# Run web checks
web_results = []
for s in WEB_SERVICES:
res = await asyncio.to_thread(check_web_service, s["url"])
web_results.append({**s, **res})
if res["status"] != "online":
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, s["name"], res["status"])
# Check TestArena Backend
ta_res = await asyncio.to_thread(check_testarena_backend_status)
if ta_res["status"] != "online":
# Recovery loop
for attempt in range(1, 6):
print(f"Attempting recovery for TestArena Backend (Attempt {attempt})...")
success = await asyncio.to_thread(recover_testarena_backend, PC_HOST, PC_PORT, PC_USER, PC_PASS, attempt)
if success:
# Wait a bit for services to start
await asyncio.sleep(10)
ta_res = await asyncio.to_thread(check_testarena_backend_status)
if ta_res["status"] == "online":
print("TestArena Backend recovered successfully.")
break
if ta_res["status"] != "online":
print("TestArena Backend recovery failed after 5 attempts.")
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, "TestArena Backend", ta_res["status"])
web_results.append({
"name": "TestArena Backend",
"url": "http://asf-server.duckdns.org:8080/api/system/status",
**ta_res
})
LATEST_STATUS["pc"] = pc_res
LATEST_STATUS["pi"] = pi_res
LATEST_STATUS["services"] = web_results
LATEST_STATUS["last_update"] = datetime.now().isoformat()
except Exception as e:
print(f"Monitor task error: {e}")
await asyncio.sleep(300) # Run every 5 minutes
@app.on_event("startup")
async def startup_event():
asyncio.create_task(monitor_services_task())
@app.get("/")
async def read_index():
return FileResponse("monitor.html")
@app.get("/api/status")
async def get_status():
# Run SSH checks in parallel
pc_task = asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
pi_task = asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
# Run web checks in parallel
web_tasks = [asyncio.to_thread(check_web_service, s["url"]) for s in WEB_SERVICES]
pc_res, pi_res, *web_res = await asyncio.gather(pc_task, pi_task, *web_tasks)
services = []
for i, res in enumerate(web_res):
services.append({
"name": WEB_SERVICES[i]["name"],
"url": WEB_SERVICES[i]["url"],
**res
})
return {
"pc": pc_res,
"pi": pi_res,
"services": services
}
return LATEST_STATUS
if __name__ == "__main__":
import uvicorn