Initial commit
This commit is contained in:
commit
fbf6898dd9
6 changed files with 220 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
*.pyc
|
||||
*.temp
|
||||
|
||||
output
|
||||
41
benchmark.py
Normal file
41
benchmark.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from src.base import DataType
|
||||
from src.torch.matmul import TorchMatmulBench
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
|
||||
arguments = parser.parse_args()
|
||||
|
||||
output_path: Path = arguments.output
|
||||
|
||||
if not output_path.exists():
|
||||
output_path.mkdir(parents=True)
|
||||
|
||||
for data_type in DataType:
|
||||
TorchMatmulBench(output_path).run(
|
||||
[
|
||||
((100, 100), (100, 100)),
|
||||
((100, 200), (200, 100)),
|
||||
((128, 128), (128, 128)),
|
||||
((200, 100), (100, 200)),
|
||||
((200, 200), (200, 200)),
|
||||
((256, 256), (256, 256)),
|
||||
((256, 512), (512, 256)),
|
||||
((400, 400), (400, 400)),
|
||||
((512, 256), (256, 512)),
|
||||
((512, 512), (512, 512)),
|
||||
((800, 800), (800, 800)),
|
||||
((1000, 1000), (1000, 1000)),
|
||||
((1200, 1200), (1200, 1200)),
|
||||
],
|
||||
12,
|
||||
data_type)
|
||||
print('Benchmark done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
22
src/base.py
Normal file
22
src/base.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from pathlib import Path
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Device(Enum):
|
||||
CPU = 'cpu'
|
||||
GPU = 'gpu'
|
||||
|
||||
|
||||
class DataType(Enum):
|
||||
FLOAT16 = 'float16'
|
||||
FLOAT32 = 'float32'
|
||||
FLOAT64 = 'float64'
|
||||
|
||||
|
||||
class Base():
|
||||
def __init__(self, output_path: Path):
|
||||
self._base_output_path = output_path
|
||||
self.output_path = output_path
|
||||
|
||||
def set_output_path(self, device: Device, device_name: str):
|
||||
self.output_path = self._base_output_path / f'{device.value}_{device_name}'
|
||||
23
src/torch/base.py
Normal file
23
src/torch/base.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
from src.base import Base, Device
|
||||
from src.utils import get_cpu_name, get_nvidia_name
|
||||
|
||||
|
||||
class TorchBase(Base):
|
||||
def __init__(self, output_path: Path):
|
||||
super().__init__(output_path)
|
||||
|
||||
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||
if torch.cuda.is_available():
|
||||
if torch.cuda.device_count() > 1:
|
||||
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
|
||||
self.set_output_path(Device.GPU, get_nvidia_name())
|
||||
torch.backends.cudnn.benchmark = True
|
||||
else:
|
||||
self.set_output_path(Device.CPU, get_cpu_name())
|
||||
|
||||
if not self.output_path.exists():
|
||||
self.output_path.mkdir(parents=True)
|
||||
112
src/torch/matmul.py
Normal file
112
src/torch/matmul.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
import time
|
||||
|
||||
from src.base import DataType
|
||||
from src.torch.base import TorchBase
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import torch
|
||||
|
||||
|
||||
class TorchMatmulBench(TorchBase):
|
||||
|
||||
def run(self,
|
||||
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
|
||||
experiment_count: int,
|
||||
data_type: DataType):
|
||||
sns.set_theme(style="ticks")
|
||||
|
||||
dtype = None
|
||||
if data_type == DataType.FLOAT16:
|
||||
dtype = torch.float16
|
||||
elif data_type == DataType.FLOAT32:
|
||||
dtype = torch.float32
|
||||
elif data_type == DataType.FLOAT64:
|
||||
dtype = torch.float64
|
||||
else:
|
||||
raise RuntimeError(f'data_type {data_type.value} not implemented')
|
||||
print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
|
||||
|
||||
experiment_names = []
|
||||
experiment_lengths = []
|
||||
experiment_times = []
|
||||
experiment_mop = []
|
||||
for shape_1, shape_2 in experiment_args:
|
||||
tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
|
||||
tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
|
||||
|
||||
# warmup
|
||||
for _ in range(20):
|
||||
_ = tensor_1 @ tensor_2
|
||||
|
||||
# speed evalutaion
|
||||
counter = 0
|
||||
start_time = time.time()
|
||||
while(time.time() - start_time < 0.2):
|
||||
_ = tensor_1 @ tensor_2
|
||||
counter += 1
|
||||
end_time = time.time()
|
||||
|
||||
target_time = 0.5 / experiment_count # in s
|
||||
experiment_speed = counter / (end_time - start_time) # in op/s
|
||||
experiment_length = max(int(target_time * experiment_speed), 2)
|
||||
|
||||
run_times = []
|
||||
for _ in range(experiment_count):
|
||||
start_time = time.time()
|
||||
for _ in range(experiment_length):
|
||||
_ = tensor_1 @ tensor_2
|
||||
run_times.append(time.time() - start_time)
|
||||
experiment_times += run_times
|
||||
experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
|
||||
experiment_lengths += [experiment_length] * experiment_count
|
||||
experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
|
||||
print(f'Run {experiment_names[-1]} (x{experiment_length})'
|
||||
f' in {experiment_times[-1] * 1000:0.1f}ms')
|
||||
|
||||
data = pd.DataFrame(
|
||||
{
|
||||
'run times (s)': experiment_times,
|
||||
'count': experiment_lengths,
|
||||
'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
|
||||
'Mop/matmul': experiment_mop,
|
||||
'GFLOPS': [(mop * l) / (t * 1000.0)
|
||||
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
|
||||
},
|
||||
index=pd.Index(experiment_names, name='experiment'))
|
||||
data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
|
||||
|
||||
mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
|
||||
max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
|
||||
|
||||
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
|
||||
figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
|
||||
for axe in axes[:-1]:
|
||||
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
||||
|
||||
chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
|
||||
axes[0].set_yscale("log")
|
||||
for p, value in zip(chart.patches, max_data['Mop/matmul']):
|
||||
chart.annotate(f'{value:0.3f}',
|
||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||
textcoords='offset points')
|
||||
|
||||
chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
|
||||
for p, value in zip(chart.patches, mean_data['ms/matmul']):
|
||||
chart.annotate(f'{value:.3f}',
|
||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||
textcoords='offset points')
|
||||
|
||||
chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
|
||||
for p, value in zip(chart.patches, mean_data['GFLOPS']):
|
||||
chart.annotate(f'{value:.3f}',
|
||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||
textcoords='offset points')
|
||||
|
||||
plt.xticks(rotation=20)
|
||||
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
|
||||
plt.savefig(self.output_path / f'matmul_{data_type.value}.png')
|
||||
18
src/utils.py
Normal file
18
src/utils.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import subprocess
|
||||
|
||||
|
||||
def get_cpu_name() -> str:
|
||||
raw_out = subprocess.check_output(['lscpu']).decode()
|
||||
architecture = 'unkown'
|
||||
model = 'noname'
|
||||
for out_line in raw_out.split('\n'):
|
||||
line_info = out_line.strip().split(':')
|
||||
if line_info[0].strip() == 'Architecture':
|
||||
architecture = line_info[1].strip()
|
||||
if line_info[0].strip() == 'Model name':
|
||||
model = line_info[1].strip()
|
||||
return f'{architecture}_{model}'
|
||||
|
||||
|
||||
def get_nvidia_name() -> str:
|
||||
return subprocess.check_output(['nvidia-smi', '--format=csv,noheader', '--query-gpu=name']).decode().strip()
|
||||
Loading…
Add table
Add a link
Reference in a new issue