Initial commit
This commit is contained in:
commit
fbf6898dd9
6 changed files with 220 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
*.pyc
|
||||||
|
*.temp
|
||||||
|
|
||||||
|
output
|
||||||
41
benchmark.py
Normal file
41
benchmark.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from src.base import DataType
|
||||||
|
from src.torch.matmul import TorchMatmulBench
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = ArgumentParser()
|
||||||
|
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
|
||||||
|
arguments = parser.parse_args()
|
||||||
|
|
||||||
|
output_path: Path = arguments.output
|
||||||
|
|
||||||
|
if not output_path.exists():
|
||||||
|
output_path.mkdir(parents=True)
|
||||||
|
|
||||||
|
for data_type in DataType:
|
||||||
|
TorchMatmulBench(output_path).run(
|
||||||
|
[
|
||||||
|
((100, 100), (100, 100)),
|
||||||
|
((100, 200), (200, 100)),
|
||||||
|
((128, 128), (128, 128)),
|
||||||
|
((200, 100), (100, 200)),
|
||||||
|
((200, 200), (200, 200)),
|
||||||
|
((256, 256), (256, 256)),
|
||||||
|
((256, 512), (512, 256)),
|
||||||
|
((400, 400), (400, 400)),
|
||||||
|
((512, 256), (256, 512)),
|
||||||
|
((512, 512), (512, 512)),
|
||||||
|
((800, 800), (800, 800)),
|
||||||
|
((1000, 1000), (1000, 1000)),
|
||||||
|
((1200, 1200), (1200, 1200)),
|
||||||
|
],
|
||||||
|
12,
|
||||||
|
data_type)
|
||||||
|
print('Benchmark done')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
22
src/base.py
Normal file
22
src/base.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Device(Enum):
|
||||||
|
CPU = 'cpu'
|
||||||
|
GPU = 'gpu'
|
||||||
|
|
||||||
|
|
||||||
|
class DataType(Enum):
|
||||||
|
FLOAT16 = 'float16'
|
||||||
|
FLOAT32 = 'float32'
|
||||||
|
FLOAT64 = 'float64'
|
||||||
|
|
||||||
|
|
||||||
|
class Base():
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
self._base_output_path = output_path
|
||||||
|
self.output_path = output_path
|
||||||
|
|
||||||
|
def set_output_path(self, device: Device, device_name: str):
|
||||||
|
self.output_path = self._base_output_path / f'{device.value}_{device_name}'
|
||||||
23
src/torch/base.py
Normal file
23
src/torch/base.py
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.base import Base, Device
|
||||||
|
from src.utils import get_cpu_name, get_nvidia_name
|
||||||
|
|
||||||
|
|
||||||
|
class TorchBase(Base):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path)
|
||||||
|
|
||||||
|
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
if torch.cuda.device_count() > 1:
|
||||||
|
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
|
||||||
|
self.set_output_path(Device.GPU, get_nvidia_name())
|
||||||
|
torch.backends.cudnn.benchmark = True
|
||||||
|
else:
|
||||||
|
self.set_output_path(Device.CPU, get_cpu_name())
|
||||||
|
|
||||||
|
if not self.output_path.exists():
|
||||||
|
self.output_path.mkdir(parents=True)
|
||||||
112
src/torch/matmul.py
Normal file
112
src/torch/matmul.py
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from src.base import DataType
|
||||||
|
from src.torch.base import TorchBase
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class TorchMatmulBench(TorchBase):
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
sns.set_theme(style="ticks")
|
||||||
|
|
||||||
|
dtype = None
|
||||||
|
if data_type == DataType.FLOAT16:
|
||||||
|
dtype = torch.float16
|
||||||
|
elif data_type == DataType.FLOAT32:
|
||||||
|
dtype = torch.float32
|
||||||
|
elif data_type == DataType.FLOAT64:
|
||||||
|
dtype = torch.float64
|
||||||
|
else:
|
||||||
|
raise RuntimeError(f'data_type {data_type.value} not implemented')
|
||||||
|
print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
|
||||||
|
|
||||||
|
experiment_names = []
|
||||||
|
experiment_lengths = []
|
||||||
|
experiment_times = []
|
||||||
|
experiment_mop = []
|
||||||
|
for shape_1, shape_2 in experiment_args:
|
||||||
|
tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
|
||||||
|
tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
|
||||||
|
|
||||||
|
# warmup
|
||||||
|
for _ in range(20):
|
||||||
|
_ = tensor_1 @ tensor_2
|
||||||
|
|
||||||
|
# speed evalutaion
|
||||||
|
counter = 0
|
||||||
|
start_time = time.time()
|
||||||
|
while(time.time() - start_time < 0.2):
|
||||||
|
_ = tensor_1 @ tensor_2
|
||||||
|
counter += 1
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
target_time = 0.5 / experiment_count # in s
|
||||||
|
experiment_speed = counter / (end_time - start_time) # in op/s
|
||||||
|
experiment_length = max(int(target_time * experiment_speed), 2)
|
||||||
|
|
||||||
|
run_times = []
|
||||||
|
for _ in range(experiment_count):
|
||||||
|
start_time = time.time()
|
||||||
|
for _ in range(experiment_length):
|
||||||
|
_ = tensor_1 @ tensor_2
|
||||||
|
run_times.append(time.time() - start_time)
|
||||||
|
experiment_times += run_times
|
||||||
|
experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
|
||||||
|
experiment_lengths += [experiment_length] * experiment_count
|
||||||
|
experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
|
||||||
|
print(f'Run {experiment_names[-1]} (x{experiment_length})'
|
||||||
|
f' in {experiment_times[-1] * 1000:0.1f}ms')
|
||||||
|
|
||||||
|
data = pd.DataFrame(
|
||||||
|
{
|
||||||
|
'run times (s)': experiment_times,
|
||||||
|
'count': experiment_lengths,
|
||||||
|
'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
|
||||||
|
'Mop/matmul': experiment_mop,
|
||||||
|
'GFLOPS': [(mop * l) / (t * 1000.0)
|
||||||
|
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
|
||||||
|
},
|
||||||
|
index=pd.Index(experiment_names, name='experiment'))
|
||||||
|
data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
|
||||||
|
|
||||||
|
mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
|
||||||
|
max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
|
||||||
|
|
||||||
|
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
|
||||||
|
figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
|
||||||
|
for axe in axes[:-1]:
|
||||||
|
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
||||||
|
|
||||||
|
chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
|
||||||
|
axes[0].set_yscale("log")
|
||||||
|
for p, value in zip(chart.patches, max_data['Mop/matmul']):
|
||||||
|
chart.annotate(f'{value:0.3f}',
|
||||||
|
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
|
||||||
|
for p, value in zip(chart.patches, mean_data['ms/matmul']):
|
||||||
|
chart.annotate(f'{value:.3f}',
|
||||||
|
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
|
||||||
|
for p, value in zip(chart.patches, mean_data['GFLOPS']):
|
||||||
|
chart.annotate(f'{value:.3f}',
|
||||||
|
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
plt.xticks(rotation=20)
|
||||||
|
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
|
||||||
|
plt.savefig(self.output_path / f'matmul_{data_type.value}.png')
|
||||||
18
src/utils.py
Normal file
18
src/utils.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu_name() -> str:
|
||||||
|
raw_out = subprocess.check_output(['lscpu']).decode()
|
||||||
|
architecture = 'unkown'
|
||||||
|
model = 'noname'
|
||||||
|
for out_line in raw_out.split('\n'):
|
||||||
|
line_info = out_line.strip().split(':')
|
||||||
|
if line_info[0].strip() == 'Architecture':
|
||||||
|
architecture = line_info[1].strip()
|
||||||
|
if line_info[0].strip() == 'Model name':
|
||||||
|
model = line_info[1].strip()
|
||||||
|
return f'{architecture}_{model}'
|
||||||
|
|
||||||
|
|
||||||
|
def get_nvidia_name() -> str:
|
||||||
|
return subprocess.check_output(['nvidia-smi', '--format=csv,noheader', '--query-gpu=name']).decode().strip()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue