@@ -598,7 +598,7 @@ cdef class config:
598
598
Ctl_dict[' cpu_freq_govs' ] = self .__Config_ptr.cpu_freq_govs
599
599
Ctl_dict[' cred_type' ] = slurm.stringOrNone(self .__Config_ptr.cred_type, ' ' )
600
600
Ctl_dict[' debug_flags' ] = self .__Config_ptr.debug_flags
601
- Ctl_dict[' def_mem_per_cp ' ] = self .__Config_ptr.def_mem_per_cpu
601
+ Ctl_dict[' def_mem_per_cpu ' ] = self .__Config_ptr.def_mem_per_cpu
602
602
Ctl_dict[' dependency_params' ] = slurm.stringOrNone(self .__Config_ptr.dependency_params, ' ' )
603
603
Ctl_dict[' eio_timeout' ] = self .__Config_ptr.eio_timeout
604
604
Ctl_dict[' enforce_part_limits' ] = bool (self .__Config_ptr.enforce_part_limits)
@@ -1023,16 +1023,16 @@ cdef class partition:
1023
1023
1024
1024
if record.def_mem_per_cpu & slurm.MEM_PER_CPU:
1025
1025
if record.def_mem_per_cpu == slurm.MEM_PER_CPU:
1026
- Part_dict[' def_mem_per_cp ' ] = " UNLIMITED"
1026
+ Part_dict[' def_mem_per_cpu ' ] = " UNLIMITED"
1027
1027
Part_dict[' def_mem_per_node' ] = None
1028
1028
else :
1029
- Part_dict[' def_mem_per_cp ' ] = record.def_mem_per_cpu & (~ slurm.MEM_PER_CPU)
1029
+ Part_dict[' def_mem_per_cpu ' ] = record.def_mem_per_cpu & (~ slurm.MEM_PER_CPU)
1030
1030
Part_dict[' def_mem_per_node' ] = None
1031
1031
elif record.def_mem_per_cpu == 0 :
1032
- Part_dict[' def_mem_per_cp ' ] = None
1032
+ Part_dict[' def_mem_per_cpu ' ] = None
1033
1033
Part_dict[' def_mem_per_node' ] = " UNLIMITED"
1034
1034
else :
1035
- Part_dict[' def_mem_per_cp ' ] = None
1035
+ Part_dict[' def_mem_per_cpu ' ] = None
1036
1036
Part_dict[' def_mem_per_node' ] = record.def_mem_per_cpu
1037
1037
1038
1038
if record.default_time == slurm.INFINITE:
@@ -1774,35 +1774,55 @@ cdef class job:
1774
1774
1775
1775
return retList
1776
1776
1777
- def find_id (self , jobid ):
1778
- """ Retrieve job ID data.
1777
+ cdef _load_single_job(self , jobid):
1778
+ """
1779
+ Uses slurm_load_job to setup the self._job_ptr for a single job given by the jobid.
1780
+ After calling this, the job pointer can be used in other methods
1781
+ to operate on the informations of the job.
1779
1782
1780
- This method accepts both string and integer formats of the jobid. It
1781
- calls slurm_xlate_job_id() to convert the jobid appropriately.
1782
- This works for single jobs and job arrays.
1783
+ This method accepts both string and integer formate of the jobid. It
1784
+ calls slurm_xlate_job_id to convert the jobid appropriately.
1783
1785
1784
- :param str jobid: Job id key string to search
1785
- :returns: List of dictionary of values for given job id
1786
- :rtype: `list`
1786
+ Raises an value error if the jobid does not correspond to a existing job.
1787
+
1788
+ :param str jobid: The jobid
1789
+ :returns: void
1790
+ :rtype: None.
1787
1791
"""
1788
1792
cdef:
1789
1793
int apiError
1790
1794
int rc
1791
1795
1796
+ # jobid can be given as int or string
1792
1797
if isinstance (jobid, int ) or isinstance (jobid, long ):
1793
1798
jobid = str (jobid).encode(" UTF-8" )
1794
1799
else :
1795
1800
jobid = jobid.encode(" UTF-8" )
1796
-
1801
+ # convert jobid appropriately for slurm
1797
1802
jobid_xlate = slurm.slurm_xlate_job_id(jobid)
1803
+
1804
+ # load the job which sets the self._job_ptr pointer
1798
1805
rc = slurm.slurm_load_job(& self ._job_ptr, jobid_xlate, self ._ShowFlags)
1799
1806
1800
- if rc == slurm.SLURM_SUCCESS:
1801
- return list (self .get_job_ptr().values())
1802
- else :
1807
+ if rc != slurm.SLURM_SUCCESS:
1803
1808
apiError = slurm.slurm_get_errno()
1804
1809
raise ValueError (slurm.stringOrNone(slurm.slurm_strerror(apiError), ' ' ), apiError)
1805
1810
1811
+ def find_id (self , jobid ):
1812
+ """ Retrieve job ID data.
1813
+
1814
+ This method accepts both string and integer formats of the jobid.
1815
+ This works for single jobs and job arrays. It uses the internal
1816
+ helper _load_single_job to do slurm_load_job. If the job corresponding
1817
+ to the jobid does not exist, a ValueError will be raised.
1818
+
1819
+ :param str jobid: Job id key string to search
1820
+ :returns: List of dictionary of values for given job id
1821
+ :rtype: `list`
1822
+ """
1823
+ self ._load_single_job(jobid)
1824
+ return list (self .get_job_ptr().values())
1825
+
1806
1826
def find_user (self , user ):
1807
1827
""" Retrieve a user's job data.
1808
1828
@@ -2879,6 +2899,38 @@ cdef class job:
2879
2899
# return "Submitted batch job %s" % job_id
2880
2900
return job_id
2881
2901
2902
+ def wait_finished (self , jobid ):
2903
+ """
2904
+ Block until the job given by the jobid finishes.
2905
+ This works for single jobs, as well as job arrays.
2906
+ :param jobid: The job id of the slurm job.
2907
+ To reference a job with job array set, use the first/"master" jobid
2908
+ (the same as given by squeue)
2909
+ :returns: The exit code of the slurm job.
2910
+ :rtype: `int`
2911
+ """
2912
+ exit_status = - 9999
2913
+ complete = False
2914
+ while not complete:
2915
+ complete = True
2916
+ p_time.sleep(5 )
2917
+ self ._load_single_job(jobid)
2918
+ for i in range (0 , self ._job_ptr.record_count):
2919
+ self ._record = & self ._job_ptr.job_array[i]
2920
+ if IS_JOB_COMPLETED(self ._job_ptr.job_array[i]):
2921
+ exit_status_arrayjob = None
2922
+ if WIFEXITED(self ._record.exit_code):
2923
+ exit_status_arrayjob = WEXITSTATUS(self ._record.exit_code)
2924
+ else :
2925
+ exit_status_arrayjob = 1
2926
+ # set exit code to the highest of all jobs in job array
2927
+ exit_status = max ([exit_status, exit_status_arrayjob])
2928
+ else :
2929
+ # go on with the next interation, unil all jobs in array are completed
2930
+ complete = False
2931
+ slurm.slurm_free_job_info_msg(self ._job_ptr)
2932
+ return exit_status
2933
+
2882
2934
2883
2935
def slurm_pid2jobid (uint32_t JobPID = 0 ):
2884
2936
""" Get the slurm job id from a process id.
0 commit comments