Created
May 13, 2020 09:16
-
-
Save jerinphilip/2abe6cfd756071587d98ec80f7fa596d to your computer and use it in GitHub Desktop.
Command to detect non-blocked nodes in SLURM (which ones have free CPU, GPU, Mem to gain alloc)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess as sp | |
from collections import defaultdict | |
from pprint import pprint | |
import humanfriendly | |
def run(cmd): | |
p = sp.check_output(cmd, shell=True) | |
return p.decode("utf-8") | |
required = ["CfgTRES", "AllocTRES", "NodeName"] | |
output = run("scontrol show nodes -o") | |
class TRES: | |
def __init__(self, cpu, mem, gpu): | |
self.cpu = cpu | |
self.mem = mem | |
self.gpu = gpu | |
@classmethod | |
def build(cls, keyvals): | |
state = defaultdict(lambda : '0') | |
for keyval in keyvals.split(','): | |
if keyval: | |
key, val = keyval.split('=') | |
state[key] = val | |
# print(state) | |
cpu = int(state["cpu"]) | |
mem = humanfriendly.parse_size(state["mem"]) | |
gpu = int(state["gres/gpu"]) | |
return cls(cpu, mem, gpu) | |
def __repr__(self): | |
mem = humanfriendly.format_size(self.mem) | |
return 'cpu={},mem={},gres/gpu={}'.format(self.cpu, mem, self.gpu) | |
def __sub__(self, other): | |
return TRES(self.cpu - other.cpu, self.mem - other.mem, self.gpu-other.gpu) | |
class Node: | |
def __init__(self, NodeName, CfgTRES, AllocTRES): | |
self.name = NodeName | |
self.cfg = TRES.build(CfgTRES) | |
self.alloc = TRES.build(AllocTRES) | |
@property | |
def free(self): | |
return self.cfg - self.alloc | |
def __repr__(self): | |
flag = '[green]' if self.gainable() else '[nopes]' | |
return '{}: {}\t{}\t{}\t{}'.format(flag, self.name, self.cfg, self.alloc, self.free) | |
@property | |
def gainable(self): | |
return not (self.free.gpu == 0 or \ | |
self.free.cpu == 0 or \ | |
self.free.mem == 0) | |
nodes = [] | |
for line in output.splitlines(): | |
keyvals = line.split(' ') | |
d = {} | |
for keyval in keyvals: | |
key, *val = keyval.split("=") | |
val = '='.join(val) | |
if key in required: | |
d[key] = val | |
if set(d.keys()) == set(required): | |
node = Node(**d) | |
nodes.append(node) | |
else: | |
print(line) | |
nodes = sorted(nodes, key = lambda x: (x.gainable, x.free.gpu, x.free.cpu, x.free.mem), | |
reverse=True) | |
gainable_nodes = list(filter(lambda x: x.gainable, nodes)) | |
for node in gainable_nodes: | |
print(node.name, node.free) | |
blocked_nodes = list(filter(lambda x: not x.gainable, nodes)) | |
print('-'*10) | |
for node in blocked_nodes: | |
print(node.name, node.free) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment