Trainer last summary fix + memory utils

This commit is contained in:
Corentin 2021-02-25 02:18:02 +09:00
commit 50c395a07f
3 changed files with 54 additions and 3 deletions

View file

@ -28,11 +28,26 @@ def parameter_summary(network: torch.nn.Module) -> List[Tuple[str, Tuple[int], s
def resource_usage() -> Tuple[int, str]:
memory_peak = int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
return memory_peak, gpu_used_memory()
def gpu_used_memory() -> str:
gpu_memory = subprocess.check_output(
'nvidia-smi --query-gpu=memory.used --format=csv,noheader', shell=True).decode()
'nvidia-smi --query-gpu=memory.used --format=csv,noheader', shell=True).decode().strip()
if 'CUDA_VISIBLE_DEVICES' in os.environ:
gpu_memory = gpu_memory.split('\n')[int(os.environ['CUDA_VISIBLE_DEVICES'])]
else:
gpu_memory = ' '.join(gpu_memory.split('\n'))
gpu_memory = ','.join(gpu_memory.split('\n'))
return memory_peak, gpu_memory
return gpu_memory
def gpu_total_memory() -> str:
gpu_memory = subprocess.check_output(
'nvidia-smi --query-gpu=memory.total --format=csv,noheader', shell=True).decode().strip()
if 'CUDA_VISIBLE_DEVICES' in os.environ:
gpu_memory = gpu_memory.split('\n')[int(os.environ['CUDA_VISIBLE_DEVICES'])]
else:
gpu_memory = ','.join(gpu_memory.split('\n'))
return gpu_memory