update monitor
This commit is contained in:
BIN
__pycache__/main.cpython-313.pyc
Normal file
BIN
__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
__pycache__/monitor_logic.cpython-313.pyc
Normal file
BIN
__pycache__/monitor_logic.cpython-313.pyc
Normal file
Binary file not shown.
92
main.py
92
main.py
@@ -4,8 +4,9 @@ from fastapi.staticfiles import StaticFiles
|
|||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from monitor_logic import get_ssh_data, check_web_service
|
from monitor_logic import get_ssh_data, check_web_service, check_testarena_backend_status, recover_testarena_backend, send_email_notification
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -32,6 +33,9 @@ PI_PORT = 2222
|
|||||||
PI_USER = "asf_tb"
|
PI_USER = "asf_tb"
|
||||||
PI_PASS = os.getenv("PI_PASS", "ASF_TB")
|
PI_PASS = os.getenv("PI_PASS", "ASF_TB")
|
||||||
|
|
||||||
|
SMTP_USER = "support@nabd-co.com"
|
||||||
|
SMTP_PASS = "zwziglbpxyfogafc"
|
||||||
|
|
||||||
WEB_SERVICES = [
|
WEB_SERVICES = [
|
||||||
{"name": "Gitea", "url": "https://gitea.nabd-co.com/"},
|
{"name": "Gitea", "url": "https://gitea.nabd-co.com/"},
|
||||||
{"name": "OpenProject", "url": "https://openproject.nabd-co.com/"},
|
{"name": "OpenProject", "url": "https://openproject.nabd-co.com/"},
|
||||||
@@ -41,34 +45,76 @@ WEB_SERVICES = [
|
|||||||
{"name": "Board", "url": "https://board.nabd-co.com/"},
|
{"name": "Board", "url": "https://board.nabd-co.com/"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Global state to store latest status
|
||||||
|
LATEST_STATUS = {
|
||||||
|
"pc": {"status": "offline"},
|
||||||
|
"pi": {"status": "offline"},
|
||||||
|
"services": [],
|
||||||
|
"last_update": None
|
||||||
|
}
|
||||||
|
|
||||||
|
async def monitor_services_task():
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Run SSH checks
|
||||||
|
pc_res = await asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
|
||||||
|
pi_res = await asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
|
||||||
|
|
||||||
|
# Run web checks
|
||||||
|
web_results = []
|
||||||
|
for s in WEB_SERVICES:
|
||||||
|
res = await asyncio.to_thread(check_web_service, s["url"])
|
||||||
|
web_results.append({**s, **res})
|
||||||
|
if res["status"] != "online":
|
||||||
|
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, s["name"], res["status"])
|
||||||
|
|
||||||
|
# Check TestArena Backend
|
||||||
|
ta_res = await asyncio.to_thread(check_testarena_backend_status)
|
||||||
|
|
||||||
|
if ta_res["status"] != "online":
|
||||||
|
# Recovery loop
|
||||||
|
for attempt in range(1, 6):
|
||||||
|
print(f"Attempting recovery for TestArena Backend (Attempt {attempt})...")
|
||||||
|
success = await asyncio.to_thread(recover_testarena_backend, PC_HOST, PC_PORT, PC_USER, PC_PASS, attempt)
|
||||||
|
if success:
|
||||||
|
# Wait a bit for services to start
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
ta_res = await asyncio.to_thread(check_testarena_backend_status)
|
||||||
|
if ta_res["status"] == "online":
|
||||||
|
print("TestArena Backend recovered successfully.")
|
||||||
|
break
|
||||||
|
|
||||||
|
if ta_res["status"] != "online":
|
||||||
|
print("TestArena Backend recovery failed after 5 attempts.")
|
||||||
|
await asyncio.to_thread(send_email_notification, SMTP_USER, SMTP_PASS, "TestArena Backend", ta_res["status"])
|
||||||
|
|
||||||
|
web_results.append({
|
||||||
|
"name": "TestArena Backend",
|
||||||
|
"url": "http://asf-server.duckdns.org:8080/api/system/status",
|
||||||
|
**ta_res
|
||||||
|
})
|
||||||
|
|
||||||
|
LATEST_STATUS["pc"] = pc_res
|
||||||
|
LATEST_STATUS["pi"] = pi_res
|
||||||
|
LATEST_STATUS["services"] = web_results
|
||||||
|
LATEST_STATUS["last_update"] = datetime.now().isoformat()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Monitor task error: {e}")
|
||||||
|
|
||||||
|
await asyncio.sleep(300) # Run every 5 minutes
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
asyncio.create_task(monitor_services_task())
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def read_index():
|
async def read_index():
|
||||||
return FileResponse("monitor.html")
|
return FileResponse("monitor.html")
|
||||||
|
|
||||||
@app.get("/api/status")
|
@app.get("/api/status")
|
||||||
async def get_status():
|
async def get_status():
|
||||||
# Run SSH checks in parallel
|
return LATEST_STATUS
|
||||||
pc_task = asyncio.to_thread(get_ssh_data, PC_HOST, PC_PORT, PC_USER, PC_PASS)
|
|
||||||
pi_task = asyncio.to_thread(get_ssh_data, PI_HOST, PI_PORT, PI_USER, PI_PASS)
|
|
||||||
|
|
||||||
# Run web checks in parallel
|
|
||||||
web_tasks = [asyncio.to_thread(check_web_service, s["url"]) for s in WEB_SERVICES]
|
|
||||||
|
|
||||||
pc_res, pi_res, *web_res = await asyncio.gather(pc_task, pi_task, *web_tasks)
|
|
||||||
|
|
||||||
services = []
|
|
||||||
for i, res in enumerate(web_res):
|
|
||||||
services.append({
|
|
||||||
"name": WEB_SERVICES[i]["name"],
|
|
||||||
"url": WEB_SERVICES[i]["url"],
|
|
||||||
**res
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
"pc": pc_res,
|
|
||||||
"pi": pi_res,
|
|
||||||
"services": services
|
|
||||||
}
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|||||||
29
monitor.html
29
monitor.html
@@ -509,18 +509,34 @@
|
|||||||
|
|
||||||
// Update Services
|
// Update Services
|
||||||
const serviceList = document.getElementById('service-list');
|
const serviceList = document.getElementById('service-list');
|
||||||
serviceList.innerHTML = data.services.map(s => `
|
serviceList.innerHTML = data.services.map(s => {
|
||||||
|
let statusColor = 'var(--status-red)';
|
||||||
|
let statusIcon = 'fa-times-circle';
|
||||||
|
let statusText = s.status;
|
||||||
|
|
||||||
|
if (s.status === 'online') {
|
||||||
|
statusColor = 'var(--status-green)';
|
||||||
|
statusIcon = 'fa-check-circle';
|
||||||
|
statusText = s.latency;
|
||||||
|
} else if (s.status === 'partial') {
|
||||||
|
statusColor = 'var(--status-yellow)';
|
||||||
|
statusIcon = 'fa-exclamation-circle';
|
||||||
|
statusText = 'Partial';
|
||||||
|
}
|
||||||
|
|
||||||
|
return `
|
||||||
<li class="service-item">
|
<li class="service-item">
|
||||||
<div class="service-name">
|
<div class="service-name">
|
||||||
<img src="${getServiceLogo(s.name)}" class="service-logo" onerror="this.src='https://via.placeholder.com/32?text=${s.name.charAt(0)}'">
|
<img src="${getServiceLogo(s.name)}" class="service-logo" onerror="this.src='https://via.placeholder.com/32?text=${s.name.charAt(0)}'">
|
||||||
${s.name}
|
${s.name}
|
||||||
</div>
|
</div>
|
||||||
<div class="service-status" style="color: ${s.status === 'online' ? 'var(--status-green)' : 'var(--status-red)'}">
|
<div class="service-status" style="color: ${statusColor}">
|
||||||
<i class="fas ${s.status === 'online' ? 'fa-check-circle' : 'fa-times-circle'}"></i>
|
<i class="fas ${statusIcon}"></i>
|
||||||
${s.status === 'online' ? s.latency : 'Offline'}
|
${statusText}
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
`).join('');
|
`;
|
||||||
|
}).join('');
|
||||||
|
|
||||||
document.getElementById('loading').style.opacity = '0';
|
document.getElementById('loading').style.opacity = '0';
|
||||||
setTimeout(() => document.getElementById('loading').style.display = 'none', 500);
|
setTimeout(() => document.getElementById('loading').style.display = 'none', 500);
|
||||||
@@ -537,7 +553,8 @@
|
|||||||
'Draw.io': 'https://app.diagrams.net/images/logo-flat.svg',
|
'Draw.io': 'https://app.diagrams.net/images/logo-flat.svg',
|
||||||
'TestArena': '/static/testarena.png',
|
'TestArena': '/static/testarena.png',
|
||||||
'TBM': '/static/tbm.ico',
|
'TBM': '/static/tbm.ico',
|
||||||
'Board': 'https://excalidraw.com/favicon-32x32.png'
|
'Board': 'https://excalidraw.com/favicon-32x32.png',
|
||||||
|
'TestArena Backend': '/static/testarena.png'
|
||||||
};
|
};
|
||||||
return logos[name] || '';
|
return logos[name] || '';
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,9 @@ import paramiko
|
|||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import smtplib
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
|
||||||
def get_ssh_data(host, port, user, password):
|
def get_ssh_data(host, port, user, password):
|
||||||
try:
|
try:
|
||||||
@@ -57,3 +60,64 @@ def check_web_service(url):
|
|||||||
return {"status": "error", "code": response.status_code}
|
return {"status": "error", "code": response.status_code}
|
||||||
except Exception:
|
except Exception:
|
||||||
return {"status": "offline"}
|
return {"status": "offline"}
|
||||||
|
|
||||||
|
def check_testarena_backend_status():
|
||||||
|
url = "http://asf-server.duckdns.org:8080/api/system/status"
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
response = requests.get(url, timeout=5)
|
||||||
|
latency = int((time.time() - start_time) * 1000)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
# {"testarena-app":"online","testarena-worker":"online","nginx":"online"}
|
||||||
|
is_online = all(v == "online" for v in data.values())
|
||||||
|
if is_online:
|
||||||
|
return {"status": "online", "latency": f"{latency}ms", "details": data}
|
||||||
|
else:
|
||||||
|
return {"status": "partial", "details": data}
|
||||||
|
else:
|
||||||
|
return {"status": "error", "code": response.status_code}
|
||||||
|
except Exception:
|
||||||
|
return {"status": "offline"}
|
||||||
|
|
||||||
|
def recover_testarena_backend(host, port, user, password, attempt):
|
||||||
|
try:
|
||||||
|
ssh = paramiko.SSHClient()
|
||||||
|
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
|
ssh.connect(host, port=port, username=user, password=password, timeout=10)
|
||||||
|
|
||||||
|
if attempt == 1:
|
||||||
|
# system restart
|
||||||
|
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/restart_services.sh"
|
||||||
|
else:
|
||||||
|
# system redeploy
|
||||||
|
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/deploy.sh"
|
||||||
|
|
||||||
|
stdin, stdout, stderr = ssh.exec_command(cmd)
|
||||||
|
exit_status = stdout.channel.recv_exit_status()
|
||||||
|
ssh.close()
|
||||||
|
return exit_status == 0
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Recovery failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def send_email_notification(smtp_user, smtp_pass, service_name, status):
|
||||||
|
try:
|
||||||
|
msg = MIMEMultipart()
|
||||||
|
msg['From'] = smtp_user
|
||||||
|
msg['To'] = smtp_user # Send to self as requested
|
||||||
|
msg['Subject'] = f"ALERT: Service {service_name} is {status}"
|
||||||
|
|
||||||
|
body = f"The service {service_name} is currently {status}. Please check the system."
|
||||||
|
msg.attach(MIMEText(body, 'plain'))
|
||||||
|
|
||||||
|
server = smtplib.SMTP('smtp.gmail.com', 587) # Assuming Gmail based on the password format
|
||||||
|
server.starttls()
|
||||||
|
server.login(smtp_user, smtp_pass)
|
||||||
|
text = msg.as_string()
|
||||||
|
server.sendmail(smtp_user, smtp_user, text)
|
||||||
|
server.quit()
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Email failed: {e}")
|
||||||
|
return False
|
||||||
|
|||||||
Reference in New Issue
Block a user