diff --git a/frontend/src/JobEditDialog.vue b/frontend/src/JobEditDialog.vue index 8fc5eb9..3a968df 100644 --- a/frontend/src/JobEditDialog.vue +++ b/frontend/src/JobEditDialog.vue @@ -44,6 +44,9 @@ + + + diff --git a/frontend/src/Jobs.vue b/frontend/src/Jobs.vue index fc29f84..a46881d 100644 --- a/frontend/src/Jobs.vue +++ b/frontend/src/Jobs.vue @@ -219,7 +219,8 @@ export default { branch: true, commit: true, cpuLimit: true, - memoryLimit: true + memoryLimit: true, + autoRestart: true, } this.editJobDialog.type = 'edit' var line = tableData[index] diff --git a/frontend/src/const.js b/frontend/src/const.js index 0ae52cb..3558662 100644 --- a/frontend/src/const.js +++ b/frontend/src/const.js @@ -10,7 +10,8 @@ const defaultJobData = { comments: '', volumeMounts: [], cpuLimit: '1.5', - memoryLimit: '2Gi' + memoryLimit: '2Gi', + autoRestart: false } export { defaultJobData diff --git a/ktqueue/api/job.py b/ktqueue/api/job.py index 3ca6a66..850bc39 100644 --- a/ktqueue/api/job.py +++ b/ktqueue/api/job.py @@ -19,7 +19,7 @@ def generate_job(name, command, node, gpu_num, image, repo, branch, commit_id, - comments, mounts, load_nvidia_driver=None, cpu_limit=None, memory_limit=None): + comments, mounts, load_nvidia_driver=None, cpu_limit=None, memory_limit=None, auto_restart=False): """Generate a job description in JSON format.""" command_kube = 'cd $WORK_DIR && ' + command @@ -131,7 +131,7 @@ def generate_job(name, command, node, gpu_num, image, repo, branch, commit_id, } ], 'volumes': volumes, - 'restartPolicy': 'OnFailure', + 'restartPolicy': 'OnFailure' if auto_restart else 'Never', 'nodeSelector': node_selector, } } @@ -214,13 +214,15 @@ async def post(self): mounts = body_arguments.get('volumeMounts', []) cpu_limit = body_arguments.get('cpuLimit', None) memory_limit = body_arguments.get('memoryLimit', None) + auto_restart = body_arguments.get('autoRestart', False) job_dir = os.path.join('/cephfs/ktqueue/jobs/', name) job = generate_job( name=name, command=command, node=node, gpu_num=gpu_num, image=image, repo=repo, branch=branch, commit_id=commit_id, comments=comments, - mounts=mounts, cpu_limit=cpu_limit, memory_limit=memory_limit + mounts=mounts, cpu_limit=cpu_limit, memory_limit=memory_limit, + auto_restart=auto_restart ) self.jobs_collection.update_one({'name': name}, {'$set': {