update monitor
This commit is contained in:
BIN
__pycache__/main.cpython-313.pyc
Normal file
BIN
__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/monitor_logic.cpython-313.pyc
Normal file
BIN
__pycache__/monitor_logic.cpython-313.pyc
Normal file
Binary file not shown.
92
main.py
92
main.py
@@ -4,8 +4,9 @@ from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from monitor_logic import get_ssh_data, check_web_service
|
||||
from monitor_logic import get_ssh_data, check_web_service, check_testarena_backend_status, recover_testarena_backend, send_email_notification
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -32,6 +33,9 @@ PI_PORT = 2222
|
||||
PI_USER = "asf_tb"
|
||||
PI_PASS = os.getenv("PI_PASS", "ASF_TB")
|
||||
|
||||
SMTP_USER = "support@nabd-co.com"
|
||||
SMTP_PASS = "zwziglbpxyfogafc"
|
||||
|
||||
WEB_SERVICES = [
|
||||
{"name": "Gitea", "url": "https://gitea.nabd-co.com/"},
|
||||
{"name": "OpenProject", "url": "https://openproject.nabd-co.com/"},
|
||||
@@ -41,34 +45,76 @@ WEB_SERVICES = [
|
||||
{"name": "Board", "url": "https://board.nabd-co.com/"},
|
||||
]
|
||||
|
||||
# Global state to store latest status
|
||||
LATEST_STATUS = {
|
||||
"pc": {"status": "offline"},
|
||||
"pi": {"status": "offline"},
|
||||
"services": [],
|
||||
"last_update": None
|
||||
}
|
||||
|
||||
async def monitor_services_task():
|
||||
while True:
|
||||
try:
|
||||
# Run SSH checks
|
||||
pc_res = await asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
|
||||
pi_res = await asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
|
||||
|
||||
# Run web checks
|
||||
web_results = []
|
||||
for s in WEB_SERVICES:
|
||||
res = await asyncio.to_thread(check_web_service, s["url"])
|
||||
web_results.append({**s, **res})
|
||||
if res["status"] != "online":
|
||||
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, s["name"], res["status"])
|
||||
|
||||
# Check TestArena Backend
|
||||
ta_res = await asyncio.to_thread(check_testarena_backend_status)
|
||||
|
||||
if ta_res["status"] != "online":
|
||||
# Recovery loop
|
||||
for attempt in range(1, 6):
|
||||
print(f"Attempting recovery for TestArena Backend (Attempt {attempt})...")
|
||||
success = await asyncio.to_thread(recover_testarena_backend, PC_HOST, PC_PORT, PC_USER, PC_PASS, attempt)
|
||||
if success:
|
||||
# Wait a bit for services to start
|
||||
await asyncio.sleep(10)
|
||||
ta_res = await asyncio.to_thread(check_testarena_backend_status)
|
||||
if ta_res["status"] == "online":
|
||||
print("TestArena Backend recovered successfully.")
|
||||
break
|
||||
|
||||
if ta_res["status"] != "online":
|
||||
print("TestArena Backend recovery failed after 5 attempts.")
|
||||
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, "TestArena Backend", ta_res["status"])
|
||||
|
||||
web_results.append({
|
||||
"name": "TestArena Backend",
|
||||
"url": "http://asf-server.duckdns.org:8080/api/system/status",
|
||||
**ta_res
|
||||
})
|
||||
|
||||
LATEST_STATUS["pc"] = pc_res
|
||||
LATEST_STATUS["pi"] = pi_res
|
||||
LATEST_STATUS["services"] = web_results
|
||||
LATEST_STATUS["last_update"] = datetime.now().isoformat()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Monitor task error: {e}")
|
||||
|
||||
await asyncio.sleep(300) # Run every 5 minutes
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
asyncio.create_task(monitor_services_task())
|
||||
|
||||
@app.get("/")
|
||||
async def read_index():
|
||||
return FileResponse("monitor.html")
|
||||
|
||||
@app.get("/api/status")
|
||||
async def get_status():
|
||||
# Run SSH checks in parallel
|
||||
pc_task = asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
|
||||
pi_task = asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
|
||||
|
||||
# Run web checks in parallel
|
||||
web_tasks = [asyncio.to_thread(check_web_service, s["url"]) for s in WEB_SERVICES]
|
||||
|
||||
pc_res, pi_res, *web_res = await asyncio.gather(pc_task, pi_task, *web_tasks)
|
||||
|
||||
services = []
|
||||
for i, res in enumerate(web_res):
|
||||
services.append({
|
||||
"name": WEB_SERVICES[i]["name"],
|
||||
"url": WEB_SERVICES[i]["url"],
|
||||
**res
|
||||
})
|
||||
|
||||
return {
|
||||
"pc": pc_res,
|
||||
"pi": pi_res,
|
||||
"services": services
|
||||
}
|
||||
return LATEST_STATUS
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
29
monitor.html
29
monitor.html
@@ -509,18 +509,34 @@
|
||||
|
||||
// Update Services
|
||||
const serviceList = document.getElementById('service-list');
|
||||
serviceList.innerHTML = data.services.map(s => `
|
||||
serviceList.innerHTML = data.services.map(s => {
|
||||
let statusColor = 'var(--status-red)';
|
||||
let statusIcon = 'fa-times-circle';
|
||||
let statusText = s.status;
|
||||
|
||||
if (s.status === 'online') {
|
||||
statusColor = 'var(--status-green)';
|
||||
statusIcon = 'fa-check-circle';
|
||||
statusText = s.latency;
|
||||
} else if (s.status === 'partial') {
|
||||
statusColor = 'var(--status-yellow)';
|
||||
statusIcon = 'fa-exclamation-circle';
|
||||
statusText = 'Partial';
|
||||
}
|
||||
|
||||
return `
|
||||
<li class="service-item">
|
||||
<div class="service-name">
|
||||
<img src="${getServiceLogo(s.name)}" class="service-logo" onerror="this.src='https://via.placeholder.com/32?text=${s.name.charAt(0)}'">
|
||||
${s.name}
|
||||
</div>
|
||||
<div class="service-status" style="color: ${s.status === 'online' ? 'var(--status-green)' : 'var(--status-red)'}">
|
||||
<i class="fas ${s.status === 'online' ? 'fa-check-circle' : 'fa-times-circle'}"></i>
|
||||
${s.status === 'online' ? s.latency : 'Offline'}
|
||||
<div class="service-status" style="color: ${statusColor}">
|
||||
<i class="fas ${statusIcon}"></i>
|
||||
${statusText}
|
||||
</div>
|
||||
</li>
|
||||
`).join('');
|
||||
`;
|
||||
}).join('');
|
||||
|
||||
document.getElementById('loading').style.opacity = '0';
|
||||
setTimeout(() => document.getElementById('loading').style.display = 'none', 500);
|
||||
@@ -537,7 +553,8 @@
|
||||
'Draw.io': 'https://app.diagrams.net/images/logo-flat.svg',
|
||||
'TestArena': '/static/testarena.png',
|
||||
'TBM': '/static/tbm.ico',
|
||||
'Board': 'https://excalidraw.com/favicon-32x32.png'
|
||||
'Board': 'https://excalidraw.com/favicon-32x32.png',
|
||||
'TestArena Backend': '/static/testarena.png'
|
||||
};
|
||||
return logos[name] || '';
|
||||
}
|
||||
|
||||
@@ -2,6 +2,9 @@ import paramiko
|
||||
import requests
|
||||
import time
|
||||
import re
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
|
||||
def get_ssh_data(host, port, user, password):
|
||||
try:
|
||||
@@ -57,3 +60,64 @@ def check_web_service(url):
|
||||
return {"status": "error", "code": response.status_code}
|
||||
except Exception:
|
||||
return {"status": "offline"}
|
||||
|
||||
def check_testarena_backend_status():
|
||||
url = "http://asf-server.duckdns.org:8080/api/system/status"
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.get(url, timeout=5)
|
||||
latency = int((time.time() - start_time) * 1000)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
# {"testarena-app":"online","testarena-worker":"online","nginx":"online"}
|
||||
is_online = all(v == "online" for v in data.values())
|
||||
if is_online:
|
||||
return {"status": "online", "latency": f"{latency}ms", "details": data}
|
||||
else:
|
||||
return {"status": "partial", "details": data}
|
||||
else:
|
||||
return {"status": "error", "code": response.status_code}
|
||||
except Exception:
|
||||
return {"status": "offline"}
|
||||
|
||||
def recover_testarena_backend(host, port, user, password, attempt):
|
||||
try:
|
||||
ssh = paramiko.SSHClient()
|
||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
ssh.connect(host, port=port, username=user, password=password, timeout=10)
|
||||
|
||||
if attempt == 1:
|
||||
# system restart
|
||||
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/restart_services.sh"
|
||||
else:
|
||||
# system redeploy
|
||||
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/deploy.sh"
|
||||
|
||||
stdin, stdout, stderr = ssh.exec_command(cmd)
|
||||
exit_status = stdout.channel.recv_exit_status()
|
||||
ssh.close()
|
||||
return exit_status == 0
|
||||
except Exception as e:
|
||||
print(f"Recovery failed: {e}")
|
||||
return False
|
||||
|
||||
def send_email_notification(smtp_user, smtp_pass, service_name, status):
|
||||
try:
|
||||
msg = MIMEMultipart()
|
||||
msg['From'] = smtp_user
|
||||
msg['To'] = smtp_user # Send to self as requested
|
||||
msg['Subject'] = f"ALERT: Service {service_name} is {status}"
|
||||
|
||||
body = f"The service {service_name} is currently {status}. Please check the system."
|
||||
msg.attach(MIMEText(body, 'plain'))
|
||||
|
||||
server = smtplib.SMTP('smtp.gmail.com', 587) # Assuming Gmail based on the password format
|
||||
server.starttls()
|
||||
server.login(smtp_user, smtp_pass)
|
||||
text = msg.as_string()
|
||||
server.sendmail(smtp_user, smtp_user, text)
|
||||
server.quit()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Email failed: {e}")
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user