diff --git a/README.md b/README.md index 236278d..6fcaed0 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,15 @@ TestArena is an automated build and test execution system for ESP32 projects. It 1. **Deploy**: Run `sudo ./deploy.sh` on your Ubuntu server. 2. **Access**: Open `http://:8080/` in your browser. -3. **Monitor**: Use the dashboard to track test queues and view real-time logs. +3. **Monitor**: Use the dashboard to track test queues, view individual tasks, and check service health. +4. **Restart**: If services need a manual restart, use `sudo ./restart_services.sh`. + +## 🛠️ Key Features + +- **Service Robustness**: Systemd services are configured to auto-restart on failure and after reboot. +- **Monitoring Dashboard**: Real-time status of App and Worker services, plus detailed task tracking for each queue. +- **Task Timeouts**: Running tasks have a 1-hour timeout to prevent queue blocking. +- **Remote Management**: A dedicated restart script for easy remote execution via SSH. ## 📚 Documentation diff --git a/restart_services.sh b/restart_services.sh new file mode 100644 index 0000000..68b04e7 --- /dev/null +++ b/restart_services.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# TestArena Service Restart Script +# This script restarts all components of the TestArena system. +# Usage: sudo ./restart_services.sh + +if [ "$EUID" -ne 0 ]; then + echo "❌ Please run as root (use sudo ./restart_services.sh)" + exit 1 +fi + +echo "🔄 Restarting TestArena Services..." + +echo "🌐 Restarting Nginx..." +systemctl restart nginx + +echo "📱 Restarting TestArena App..." +systemctl restart testarena-app + +echo "⚙️ Restarting TestArena Worker..." +systemctl restart testarena-worker + +echo "✅ All services restarted!" +systemctl status testarena-app testarena-worker nginx --no-pager diff --git a/testarena-app.service b/testarena-app.service index cc8498c..3184f4b 100644 --- a/testarena-app.service +++ b/testarena-app.service @@ -12,6 +12,8 @@ Environment="XDG_RUNTIME_DIR=/tmp" Environment="DATABASE_URL=sqlite:////home/asf/testarena/testarena.db" ExecStart=/home/asf/testarena_backend/venv/bin/uvicorn testarena_app.main:app --host 0.0.0.0 --port 8000 Restart=always +RestartSec=10 +StartLimitIntervalSec=0 [Install] WantedBy=multi-user.target diff --git a/testarena-worker.service b/testarena-worker.service index 642ab87..1133af0 100644 --- a/testarena-worker.service +++ b/testarena-worker.service @@ -12,6 +12,8 @@ Environment="XDG_RUNTIME_DIR=/tmp" Environment="DATABASE_URL=sqlite:////home/asf/testarena/testarena.db" ExecStart=/home/asf/testarena_backend/venv/bin/python3 -m testarena_app.worker Restart=always +RestartSec=10 +StartLimitIntervalSec=0 [Install] WantedBy=multi-user.target diff --git a/testarena_app/main.py b/testarena_app/main.py index 7b44721..04aa3a5 100644 --- a/testarena_app/main.py +++ b/testarena_app/main.py @@ -158,6 +158,26 @@ async def delete_queue(id: str, db: Session = Depends(database.get_db)): raise HTTPException(status_code=404, detail="ID not found") +@app.get("/api/system/status") +async def system_status(): + """Check the status of system services""" + services = ["testarena-app", "testarena-worker", "nginx"] + status = {} + for service in services: + try: + # Use systemctl is-active for a quick check + res = os.system(f"systemctl is-active --quiet {service}") + status[service] = "online" if res == 0 else "offline" + except: + status[service] = "unknown" + return status + +@app.get("/api/queue/{id}/tasks") +async def get_queue_tasks(id: str, db: Session = Depends(database.get_db)): + """Get all tasks for a specific queue""" + tasks = db.query(models.Task).filter(models.Task.queue_id == id).all() + return tasks + @app.get("/") async def root(): return FileResponse(os.path.join(static_dir, "index.html")) diff --git a/testarena_app/static/index.html b/testarena_app/static/index.html index ac53ebc..21e89c0 100644 --- a/testarena_app/static/index.html +++ b/testarena_app/static/index.html @@ -209,6 +209,12 @@ color: #f87171; } + .status-timed-out { + background: rgba(245, 158, 11, 0.1); + color: #fbbf24; + border: 1px solid rgba(245, 158, 11, 0.3); + } + .btn-abort { background: rgba(239, 68, 68, 0.1); color: #f87171; @@ -291,14 +297,24 @@
Connecting... +
+
+
+ App +
+
+
+ Worker +
+

- + @@ -306,7 +322,7 @@ Queue Monitor

-
@@ -323,6 +339,37 @@ + +
@@ -356,6 +403,8 @@ const badge = document.getElementById('connection-status'); badge.querySelector('.dot').classList.add('online'); badge.querySelector('span').textContent = 'System Online'; + + fetchServiceStatus(); } catch (e) { const badge = document.getElementById('connection-status'); badge.querySelector('.dot').classList.remove('online'); @@ -363,6 +412,22 @@ } } + async function fetchServiceStatus() { + try { + const response = await fetch('/api/system/status'); + const status = await response.json(); + + const appDot = document.getElementById('app-dot'); + const workerDot = document.getElementById('worker-dot'); + + if (status['testarena-app'] === 'online') appDot.classList.add('online'); + else appDot.classList.remove('online'); + + if (status['testarena-worker'] === 'online') workerDot.classList.add('online'); + else workerDot.classList.remove('online'); + } catch (e) { } + } + function renderTable() { const searchTerm = document.getElementById('search-input').value.toLowerCase(); const tbody = document.querySelector('#queue-table tbody'); @@ -375,8 +440,9 @@ tr.innerHTML = ` ${q.id} ${q.environment} - ${q.status} + ${q.status} + @@ -426,6 +492,37 @@ } } + async function viewTasks(queueId) { + document.getElementById('tasks-section').style.display = 'block'; + document.getElementById('selected-queue-id').textContent = queueId; + document.getElementById('tasks-section').scrollIntoView({ behavior: 'smooth' }); + + try { + const response = await fetch(`/api/queue/${queueId}/tasks`); + const tasks = await response.json(); + const tbody = document.querySelector('#tasks-table tbody'); + tbody.innerHTML = ''; + + tasks.forEach(t => { + const tr = document.createElement('tr'); + const resultStr = t.result ? JSON.stringify(t.result).substring(0, 50) + '...' : '-'; + tr.innerHTML = ` + ${t.id} + ${t.scenario_path.split('/').pop()} + ${t.status} + ${resultStr} + `; + tbody.appendChild(tr); + }); + } catch (e) { + addLog(`Failed to fetch tasks for ${queueId}`, 'danger'); + } + } + + function hideTasks() { + document.getElementById('tasks-section').style.display = 'none'; + } + function addLog(msg, type = 'info') { const logs = document.getElementById('logs'); const entry = document.createElement('div'); diff --git a/testarena_app/worker.py b/testarena_app/worker.py index f2e6996..dfcdfdf 100644 --- a/testarena_app/worker.py +++ b/testarena_app/worker.py @@ -170,10 +170,12 @@ def run_worker(): task_dir = os.path.join(queue_dir, task.id) os.makedirs(task_dir, exist_ok=True) - ret = run_command_with_logging(cmd, queue_log, cwd=repo_dir) + ret = run_command_with_logging(cmd, queue_log, cwd=repo_dir, timeout=3600) if ret == 0: task.status = "Finished" + elif ret == 124: + task.status = "Timed Out" else: task.status = "Error"