commit fbf6898dd9771bdd904c6e1426e526367373aa1d Author: Corentin Date: Tue Sep 28 00:41:53 2021 +0900 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0092946 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +*.temp + +output \ No newline at end of file diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000..c7f42ac --- /dev/null +++ b/benchmark.py @@ -0,0 +1,41 @@ +from argparse import ArgumentParser +from pathlib import Path + +from src.base import DataType +from src.torch.matmul import TorchMatmulBench + + +def main(): + parser = ArgumentParser() + parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files') + arguments = parser.parse_args() + + output_path: Path = arguments.output + + if not output_path.exists(): + output_path.mkdir(parents=True) + + for data_type in DataType: + TorchMatmulBench(output_path).run( + [ + ((100, 100), (100, 100)), + ((100, 200), (200, 100)), + ((128, 128), (128, 128)), + ((200, 100), (100, 200)), + ((200, 200), (200, 200)), + ((256, 256), (256, 256)), + ((256, 512), (512, 256)), + ((400, 400), (400, 400)), + ((512, 256), (256, 512)), + ((512, 512), (512, 512)), + ((800, 800), (800, 800)), + ((1000, 1000), (1000, 1000)), + ((1200, 1200), (1200, 1200)), + ], + 12, + data_type) + print('Benchmark done') + + +if __name__ == '__main__': + main() diff --git a/src/base.py b/src/base.py new file mode 100644 index 0000000..94b4924 --- /dev/null +++ b/src/base.py @@ -0,0 +1,22 @@ +from pathlib import Path +from enum import Enum + + +class Device(Enum): + CPU = 'cpu' + GPU = 'gpu' + + +class DataType(Enum): + FLOAT16 = 'float16' + FLOAT32 = 'float32' + FLOAT64 = 'float64' + + +class Base(): + def __init__(self, output_path: Path): + self._base_output_path = output_path + self.output_path = output_path + + def set_output_path(self, device: Device, device_name: str): + self.output_path = self._base_output_path / f'{device.value}_{device_name}' diff --git a/src/torch/base.py b/src/torch/base.py new file mode 100644 index 0000000..6007243 --- /dev/null +++ b/src/torch/base.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import torch + +from src.base import Base, Device +from src.utils import get_cpu_name, get_nvidia_name + + +class TorchBase(Base): + def __init__(self, output_path: Path): + super().__init__(output_path) + + self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + if torch.cuda.is_available(): + if torch.cuda.device_count() > 1: + print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') + self.set_output_path(Device.GPU, get_nvidia_name()) + torch.backends.cudnn.benchmark = True + else: + self.set_output_path(Device.CPU, get_cpu_name()) + + if not self.output_path.exists(): + self.output_path.mkdir(parents=True) diff --git a/src/torch/matmul.py b/src/torch/matmul.py new file mode 100644 index 0000000..2dd91fe --- /dev/null +++ b/src/torch/matmul.py @@ -0,0 +1,112 @@ +import time + +from src.base import DataType +from src.torch.base import TorchBase + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import torch + + +class TorchMatmulBench(TorchBase): + + def run(self, + experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], + experiment_count: int, + data_type: DataType): + sns.set_theme(style="ticks") + + dtype = None + if data_type == DataType.FLOAT16: + dtype = torch.float16 + elif data_type == DataType.FLOAT32: + dtype = torch.float32 + elif data_type == DataType.FLOAT64: + dtype = torch.float64 + else: + raise RuntimeError(f'data_type {data_type.value} not implemented') + print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}') + + experiment_names = [] + experiment_lengths = [] + experiment_times = [] + experiment_mop = [] + for shape_1, shape_2 in experiment_args: + tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device) + tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0) + + # warmup + for _ in range(20): + _ = tensor_1 @ tensor_2 + + # speed evalutaion + counter = 0 + start_time = time.time() + while(time.time() - start_time < 0.2): + _ = tensor_1 @ tensor_2 + counter += 1 + end_time = time.time() + + target_time = 0.5 / experiment_count # in s + experiment_speed = counter / (end_time - start_time) # in op/s + experiment_length = max(int(target_time * experiment_speed), 2) + + run_times = [] + for _ in range(experiment_count): + start_time = time.time() + for _ in range(experiment_length): + _ = tensor_1 @ tensor_2 + run_times.append(time.time() - start_time) + experiment_times += run_times + experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count + experiment_lengths += [experiment_length] * experiment_count + experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count + print(f'Run {experiment_names[-1]} (x{experiment_length})' + f' in {experiment_times[-1] * 1000:0.1f}ms') + + data = pd.DataFrame( + { + 'run times (s)': experiment_times, + 'count': experiment_lengths, + 'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)], + 'Mop/matmul': experiment_mop, + 'GFLOPS': [(mop * l) / (t * 1000.0) + for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)] + }, + index=pd.Index(experiment_names, name='experiment')) + data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t') + + mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean() + max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max() + + figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12)) + figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16) + for axe in axes[:-1]: + axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) + + chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique()) + axes[0].set_yscale("log") + for p, value in zip(chart.patches, max_data['Mop/matmul']): + chart.annotate(f'{value:0.3f}', + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1]) + for p, value in zip(chart.patches, mean_data['ms/matmul']): + chart.annotate(f'{value:.3f}', + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2]) + for p, value in zip(chart.patches, mean_data['GFLOPS']): + chart.annotate(f'{value:.3f}', + (p.get_x() + p.get_width() / 2.0, p.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + plt.xticks(rotation=20) + plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05) + plt.savefig(self.output_path / f'matmul_{data_type.value}.png') diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..cb359da --- /dev/null +++ b/src/utils.py @@ -0,0 +1,18 @@ +import subprocess + + +def get_cpu_name() -> str: + raw_out = subprocess.check_output(['lscpu']).decode() + architecture = 'unkown' + model = 'noname' + for out_line in raw_out.split('\n'): + line_info = out_line.strip().split(':') + if line_info[0].strip() == 'Architecture': + architecture = line_info[1].strip() + if line_info[0].strip() == 'Model name': + model = line_info[1].strip() + return f'{architecture}_{model}' + + +def get_nvidia_name() -> str: + return subprocess.check_output(['nvidia-smi', '--format=csv,noheader', '--query-gpu=name']).decode().strip()