Files
monitor/monitor_logic.py
2026-01-04 16:22:27 +01:00

124 lines
4.5 KiB
Python

import paramiko
import requests
import time
import re
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
def get_ssh_data(host, port, user, password):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host, port=port, username=user, password=password, timeout=10)
# CPU Usage
stdin, stdout, stderr = ssh.exec_command("top -bn1 | grep 'Cpu(s)' | sed 's/.*, *\\([0-9.]*\\)%* id.*/\\1/' | awk '{print 100 - $1}'")
cpu_usage = stdout.read().decode().strip()
# CPU Temp
# For Ubuntu Server
stdin, stdout, stderr = ssh.exec_command("cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null || vcgencmd measure_temp 2>/dev/null")
temp_raw = stdout.read().decode().strip()
if "temp=" in temp_raw:
cpu_temp = temp_raw.replace("temp=", "").replace("'C", "")
elif temp_raw:
cpu_temp = str(float(temp_raw) / 1000)
else:
cpu_temp = "N/A"
# RAM Status
stdin, stdout, stderr = ssh.exec_command("free -m | awk 'NR==2{printf \"%.2f\", $3*100/$2 }'")
ram_usage = stdout.read().decode().strip()
# Disk Space
stdin, stdout, stderr = ssh.exec_command("df -h / | awk 'NR==2{print $5}' | sed 's/%//'")
disk_usage = stdout.read().decode().strip()
ssh.close()
return {
"status": "online",
"cpu": cpu_usage,
"temp": cpu_temp,
"ram": ram_usage,
"disk": disk_usage
}
except Exception as e:
return {
"status": "offline",
"error": str(e)
}
def check_web_service(url):
try:
start_time = time.time()
response = requests.get(url, timeout=5)
latency = int((time.time() - start_time) * 1000)
if response.status_code == 200:
return {"status": "online", "latency": f"{latency}ms"}
else:
return {"status": "error", "code": response.status_code}
except Exception:
return {"status": "offline"}
def check_testarena_backend_status():
url = "http://asf-server.duckdns.org:8080/api/system/status"
try:
start_time = time.time()
response = requests.get(url, timeout=5)
latency = int((time.time() - start_time) * 1000)
if response.status_code == 200:
data = response.json()
# {"testarena-app":"online","testarena-worker":"online","nginx":"online"}
is_online = all(v == "online" for v in data.values())
if is_online:
return {"status": "online", "latency": f"{latency}ms", "details": data}
else:
return {"status": "partial", "details": data}
else:
return {"status": "error", "code": response.status_code}
except Exception:
return {"status": "offline"}
def recover_testarena_backend(host, port, user, password, attempt):
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(host, port=port, username=user, password=password, timeout=10)
if attempt == 1:
# system restart
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/restart_services.sh"
else:
# system redeploy
cmd = "echo 'ASF' | sudo -S /home/asf/testarena_backend/deploy.sh"
stdin, stdout, stderr = ssh.exec_command(cmd)
exit_status = stdout.channel.recv_exit_status()
ssh.close()
return exit_status == 0
except Exception as e:
print(f"Recovery failed: {e}")
return False
def send_email_notification(smtp_user, smtp_pass, service_name, status):
try:
msg = MIMEMultipart()
msg['From'] = smtp_user
msg['To'] = smtp_user # Send to self as requested
msg['Subject'] = f"ALERT: Service {service_name} is {status}"
body = f"The service {service_name} is currently {status}. Please check the system."
msg.attach(MIMEText(body, 'plain'))
server = smtplib.SMTP('smtp.gmail.com', 587) # Assuming Gmail based on the password format
server.starttls()
server.login(smtp_user, smtp_pass)
text = msg.as_string()
server.sendmail(smtp_user, smtp_user, text)
server.quit()
return True
except Exception as e:
print(f"Email failed: {e}")
return False