# Job Time Statistics

In [1]:
import pandas as pd
import subprocess
import sys
import datetime as dt
from io import StringIO

In [2]:
job_name = 'umui-rerun'
start_date = '2020-02-01'

The `sacct` command will let us get info for job names:

In [3]:
cmd = [
    'sacct', '-P', '-o', 'JobID,Account,State,AllocCPUS,CpuTime,CpuTimeRAW,Elapsed,ElapsedRAW',
    '--name', job_name,
    '-S', start_date,
]

In [4]:
child = subprocess.run(cmd, capture_output=True, encoding='utf8')
if child.returncode:
    print(child.stderr, file=sys.stderr)
    raise ValueError('child failed with code ' + str(child.returncode))

acct_text = child.stdout
acct = pd.read_csv(StringIO(acct_text), sep='|')
acct

Unnamed: 0,JobID,Account,State,AllocCPUS,CPUTime,CPUTimeRAW,Elapsed,ElapsedRaw
0,210506,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
1,210506.batch,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
2,210507,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
3,210507.batch,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
4,210508,mekstrand,FAILED,28,11-20:35:48,1024548,10:09:51,36591
...,...,...,...,...,...,...,...,...
62,210930,mekstrand,FAILED,28,00:07:00,420,00:00:15,15
63,210930.batch,mekstrand,FAILED,28,00:07:00,420,00:00:15,15
64,210931,mekstrand,FAILED,28,00:10:44,644,00:00:23,23
65,210975,mekstrand,COMPLETED,28,25-06:19:28,2182768,21:39:16,77956


The jobs ending in `.batch` are duplicates - remove:

In [5]:
jobs = acct[~acct['JobID'].str.endswith('.batch')]
jobs

Unnamed: 0,JobID,Account,State,AllocCPUS,CPUTime,CPUTimeRAW,Elapsed,ElapsedRaw
0,210506,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
2,210507,mekstrand,FAILED,28,00:03:16,196,00:00:07,7
4,210508,mekstrand,FAILED,28,11-20:35:48,1024548,10:09:51,36591
6,210564,mekstrand,FAILED,28,04:03:36,14616,00:08:42,522
8,210594,mekstrand,FAILED,28,00:04:40,280,00:00:10,10
10,210595,mekstrand,FAILED,28,11-16:12:36,1008756,10:00:27,36027
12,210701,mekstrand,FAILED,28,00:04:40,280,00:00:10,10
14,210704,mekstrand,FAILED,28,00:09:48,588,00:00:21,21
16,210705,mekstrand,FAILED,28,00:04:12,252,00:00:09,9
18,210712,mekstrand,CANCELLED by 1051,28,06:39:00,23940,00:14:15,855


What is the CPU time?

In [6]:
cpu = dt.timedelta(seconds=int(jobs['CPUTimeRAW'].sum()))
cpu

datetime.timedelta(days=56, seconds=60284)

How many hours?

In [7]:
cpu.total_seconds() / 3600

1360.7455555555555

What is the wall clock time?

In [8]:
wall = dt.timedelta(seconds=int(jobs['ElapsedRaw'].sum()))
wall

datetime.timedelta(days=2, seconds=2153)

How many hours?

In [9]:
wall.total_seconds() / 3600

48.598055555555554