Initial commit

This commit is contained in:
Corentin 2021-09-28 00:41:53 +09:00
commit fbf6898dd9
6 changed files with 220 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.pyc
*.temp
output

41
benchmark.py Normal file
View file

@ -0,0 +1,41 @@
from argparse import ArgumentParser
from pathlib import Path
from src.base import DataType
from src.torch.matmul import TorchMatmulBench
def main():
parser = ArgumentParser()
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
arguments = parser.parse_args()
output_path: Path = arguments.output
if not output_path.exists():
output_path.mkdir(parents=True)
for data_type in DataType:
TorchMatmulBench(output_path).run(
[
((100, 100), (100, 100)),
((100, 200), (200, 100)),
((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200)),
],
12,
data_type)
print('Benchmark done')
if __name__ == '__main__':
main()

22
src/base.py Normal file
View file

@ -0,0 +1,22 @@
from pathlib import Path
from enum import Enum
class Device(Enum):
CPU = 'cpu'
GPU = 'gpu'
class DataType(Enum):
FLOAT16 = 'float16'
FLOAT32 = 'float32'
FLOAT64 = 'float64'
class Base():
def __init__(self, output_path: Path):
self._base_output_path = output_path
self.output_path = output_path
def set_output_path(self, device: Device, device_name: str):
self.output_path = self._base_output_path / f'{device.value}_{device_name}'

23
src/torch/base.py Normal file
View file

@ -0,0 +1,23 @@
from pathlib import Path
import torch
from src.base import Base, Device
from src.utils import get_cpu_name, get_nvidia_name
class TorchBase(Base):
def __init__(self, output_path: Path):
super().__init__(output_path)
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
if torch.cuda.device_count() > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
self.set_output_path(Device.GPU, get_nvidia_name())
torch.backends.cudnn.benchmark = True
else:
self.set_output_path(Device.CPU, get_cpu_name())
if not self.output_path.exists():
self.output_path.mkdir(parents=True)

112
src/torch/matmul.py Normal file
View file

@ -0,0 +1,112 @@
import time
from src.base import DataType
from src.torch.base import TorchBase
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
class TorchMatmulBench(TorchBase):
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
sns.set_theme(style="ticks")
dtype = None
if data_type == DataType.FLOAT16:
dtype = torch.float16
elif data_type == DataType.FLOAT32:
dtype = torch.float32
elif data_type == DataType.FLOAT64:
dtype = torch.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented')
print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
experiment_names = []
experiment_lengths = []
experiment_times = []
experiment_mop = []
for shape_1, shape_2 in experiment_args:
tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
# warmup
for _ in range(20):
_ = tensor_1 @ tensor_2
# speed evalutaion
counter = 0
start_time = time.time()
while(time.time() - start_time < 0.2):
_ = tensor_1 @ tensor_2
counter += 1
end_time = time.time()
target_time = 0.5 / experiment_count # in s
experiment_speed = counter / (end_time - start_time) # in op/s
experiment_length = max(int(target_time * experiment_speed), 2)
run_times = []
for _ in range(experiment_count):
start_time = time.time()
for _ in range(experiment_length):
_ = tensor_1 @ tensor_2
run_times.append(time.time() - start_time)
experiment_times += run_times
experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
experiment_lengths += [experiment_length] * experiment_count
experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
print(f'Run {experiment_names[-1]} (x{experiment_length})'
f' in {experiment_times[-1] * 1000:0.1f}ms')
data = pd.DataFrame(
{
'run times (s)': experiment_times,
'count': experiment_lengths,
'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
'Mop/matmul': experiment_mop,
'GFLOPS': [(mop * l) / (t * 1000.0)
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
},
index=pd.Index(experiment_names, name='experiment'))
data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
for axe in axes[:-1]:
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
axes[0].set_yscale("log")
for p, value in zip(chart.patches, max_data['Mop/matmul']):
chart.annotate(f'{value:0.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
for p, value in zip(chart.patches, mean_data['ms/matmul']):
chart.annotate(f'{value:.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
for p, value in zip(chart.patches, mean_data['GFLOPS']):
chart.annotate(f'{value:.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
plt.xticks(rotation=20)
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
plt.savefig(self.output_path / f'matmul_{data_type.value}.png')

18
src/utils.py Normal file
View file

@ -0,0 +1,18 @@
import subprocess
def get_cpu_name() -> str:
raw_out = subprocess.check_output(['lscpu']).decode()
architecture = 'unkown'
model = 'noname'
for out_line in raw_out.split('\n'):
line_info = out_line.strip().split(':')
if line_info[0].strip() == 'Architecture':
architecture = line_info[1].strip()
if line_info[0].strip() == 'Model name':
model = line_info[1].strip()
return f'{architecture}_{model}'
def get_nvidia_name() -> str:
return subprocess.check_output(['nvidia-smi', '--format=csv,noheader', '--query-gpu=name']).decode().strip()