diff --git a/app/routes/jobs.py b/app/routes/jobs.py index 6413482..3a7478e 100644 --- a/app/routes/jobs.py +++ b/app/routes/jobs.py @@ -9,6 +9,7 @@ import requests import random import string import traceback +from datetime import datetime, timedelta def generate_remote_id(length=6): return ''.join(random.choices(string.digits, k=length)) @@ -128,7 +129,7 @@ def submit_step1(): ssh_options = f"-p {ssh_port} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" # First, clone the repository - clone_cmd = f"sshpass -p '{ssh_password}' ssh {ssh_options} {ssh_user}@{ssh_host} './TPF/gitea_repo_controller.sh clone'" + clone_cmd = f"sshpass -p '{ssh_password}' ssh {ssh_options} {ssh_user}@{ssh_host} '. /home/asf/testarena_backend/TPF/gitea_repo_controller.sh clone'" print(f"[DEBUG] Executing clone command: {clone_cmd.replace(ssh_password, '***')}") clone_result = subprocess.run(clone_cmd, shell=True, capture_output=True, text=True, timeout=60) @@ -138,7 +139,7 @@ def submit_step1(): print(f"[DEBUG] Clone stderr: {clone_result.stderr}") # Then, checkout the branch - checkout_cmd = f"sshpass -p '{ssh_password}' ssh {ssh_options} {ssh_user}@{ssh_host} './TPF/gitea_repo_controller.sh checkout {branch_name}'" + checkout_cmd = f"sshpass -p '{ssh_password}' ssh {ssh_options} {ssh_user}@{ssh_host} '. /home/asf/testarena_backend/TPF/gitea_repo_controller.sh checkout {branch_name}'" print(f"[DEBUG] Executing checkout command: {checkout_cmd.replace(ssh_password, '***')}") checkout_result = subprocess.run(checkout_cmd, shell=True, capture_output=True, text=True, timeout=60) @@ -470,6 +471,23 @@ def update_job_status_internal(job): return try: + # 0. Check for Timeout (1 hour) + if job.submitted_at and (datetime.utcnow() - job.submitted_at) > timedelta(hours=1): + print(f"[WARNING] Job {job.id} timed out (1 hour limit reached). Aborting...") + + # Try to abort remotely + try: + remote_url = f"http://asf-server.duckdns.org:8080/api/abort/{job.remote_queue_id}" + requests.post(remote_url, timeout=10) + except Exception as e: + print(f"[ERROR] Failed to abort timed-out job remotely: {e}") + + job.status = 'aborted' + job.queue_log = (job.queue_log or "") + "\n\n[SYSTEM] Job timed out after 1 hour." + from app import db + db.session.commit() + return + # 1. Check Queue Status status_url = f"http://asf-server.duckdns.org:8080/api/status/{job.remote_queue_id}" q_resp = requests.get(status_url, timeout=5) @@ -515,17 +533,31 @@ def update_job_status_internal(job): r_resp = requests.get(res_url, timeout=2) if r_resp.status_code == 200: r_data = r_resp.json() - for key, val in r_data.items(): - if key.upper() == scenario.upper(): - # Construct standardized execution report link + + # Logic Fix 2: Check ALL results in final_summary.json + # If ANY result is FAIL or ERROR, the scenario is FAIL + scenario_status = "PASS" + report_link = "#" + + # Iterate over all tests in the summary + for test_name, val in r_data.items(): + # val is [STATUS, LINK] + status = val[0].upper() + link = val[1] + + # Keep the link of the first test (or the specific scenario if found) + if report_link == "#": + # Construct standardized execution report link report_link = f"http://asf-server.duckdns.org:8080/results/{job.remote_queue_id}/{task_id}/execution_report.html" - - # val[0] is status (PASS/FAIL/ERROR), val[1] was the old link - results[scenario] = [val[0], report_link] - - if val[0] in ['FAIL', 'ERROR']: - any_failed = True - break + + if status in ['FAIL', 'ERROR']: + scenario_status = "FAIL" + + results[scenario] = [scenario_status, report_link] + + if scenario_status == "FAIL": + any_failed = True + else: all_finished = False elif t_data.get('status') == 'Aborted': @@ -544,10 +576,33 @@ def update_job_status_internal(job): any_failed = True if all_finished and task_ids: + # Logic Fix 1: Only mark finished if ALL tasks are finished (handled by all_finished flag) + # If any task has FAIL or ERROR, the job status is 'failed' job.status = 'failed' if any_failed else 'passed' from app import db job.completed_at = db.session.query(db.func.now()).scalar() + + # Logic Fix 3: Run Cleanup Command + try: + print(f"[INFO] Job {job.id} finished. Running cleanup...") + # SSH options + ssh_port = os.environ.get('SSH_PORT', '49152') # Default from user request + ssh_host = "asf-server.duckdns.org" + ssh_user = "asf" + ssh_pass = "ASF" # Hardcoded as per request, ideally env var + + cleanup_cmd = f"sshpass -p '{ssh_pass}' ssh -p {ssh_port} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null {ssh_user}@{ssh_host} \"rm -rf /home/asf/testarena_backend/TPF/Sensor_hub_repo\"" + + cleanup_res = subprocess.run(cleanup_cmd, shell=True, capture_output=True, text=True, timeout=30) + if cleanup_res.returncode != 0: + print(f"[WARNING] Cleanup command failed: {cleanup_res.stderr}") + else: + print(f"[INFO] Cleanup successful.") + + except Exception as e: + print(f"[ERROR] Failed to run cleanup command: {e}") + job.remote_results = json.dumps(results) from app import db