Initial commit

2021-09-28 00:41:53 +09:00 · 2021-09-28 00:41:53 +09:00 · fbf6898dd9
commit fbf6898dd9
6 changed files with 220 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
 *.pyc
 *.temp
 output
--- a/benchmark.py
+++ b/benchmark.py
@ -0,0 +1,41 @@
 from argparse import ArgumentParser
 from pathlib import Path
 from src.base import DataType
 from src.torch.matmul import TorchMatmulBench
 def main():
    parser = ArgumentParser()
    parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
    arguments = parser.parse_args()
    output_path: Path = arguments.output
    if not output_path.exists():
        output_path.mkdir(parents=True)
    for data_type in DataType:
        TorchMatmulBench(output_path).run(
            [
                ((100, 100), (100, 100)),
                ((100, 200), (200, 100)),
                ((128, 128), (128, 128)),
                ((200, 100), (100, 200)),
                ((200, 200), (200, 200)),
                ((256, 256), (256, 256)),
                ((256, 512), (512, 256)),
                ((400, 400), (400, 400)),
                ((512, 256), (256, 512)),
                ((512, 512), (512, 512)),
                ((800, 800), (800, 800)),
                ((1000, 1000), (1000, 1000)),
                ((1200, 1200), (1200, 1200)),
            ],
            12,
            data_type)
    print('Benchmark done')
 if __name__ == '__main__':
    main()
--- a/src/base.py
+++ b/src/base.py
@ -0,0 +1,22 @@
 from pathlib import Path
 from enum import Enum
 class Device(Enum):
    CPU = 'cpu'
    GPU = 'gpu'
 class DataType(Enum):
    FLOAT16 = 'float16'
    FLOAT32 = 'float32'
    FLOAT64 = 'float64'
 class Base():
    def __init__(self, output_path: Path):
        self._base_output_path = output_path
        self.output_path = output_path
    def set_output_path(self, device: Device, device_name: str):
        self.output_path = self._base_output_path / f'{device.value}_{device_name}'
--- a/src/torch/base.py
+++ b/src/torch/base.py
@ -0,0 +1,23 @@
 from pathlib import Path
 import torch
 from src.base import Base, Device
 from src.utils import get_cpu_name, get_nvidia_name
 class TorchBase(Base):
    def __init__(self, output_path: Path):
        super().__init__(output_path)
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        if torch.cuda.is_available():
            if torch.cuda.device_count() > 1:
                print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
            self.set_output_path(Device.GPU, get_nvidia_name())
            torch.backends.cudnn.benchmark = True
        else:
            self.set_output_path(Device.CPU, get_cpu_name())
        if not self.output_path.exists():
            self.output_path.mkdir(parents=True)
--- a/src/torch/matmul.py
+++ b/src/torch/matmul.py
@ -0,0 +1,112 @@
 import time
 from src.base import DataType
 from src.torch.base import TorchBase
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 import torch
 class TorchMatmulBench(TorchBase):
    def run(self,
            experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
            experiment_count: int,
            data_type: DataType):
        sns.set_theme(style="ticks")
        dtype = None
        if data_type == DataType.FLOAT16:
            dtype = torch.float16
        elif data_type == DataType.FLOAT32:
            dtype = torch.float32
        elif data_type == DataType.FLOAT64:
            dtype = torch.float64
        else:
            raise RuntimeError(f'data_type {data_type.value} not implemented')
        print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
        experiment_names = []
        experiment_lengths = []
        experiment_times = []
        experiment_mop = []
        for shape_1, shape_2 in experiment_args:
            tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
            tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
            # warmup
            for _ in range(20):
                _ = tensor_1 @ tensor_2
            # speed evalutaion
            counter = 0
            start_time = time.time()
            while(time.time() - start_time < 0.2):
                _ = tensor_1 @ tensor_2
                counter += 1
            end_time = time.time()
            target_time = 0.5 / experiment_count  # in s
            experiment_speed = counter / (end_time - start_time)  # in op/s
            experiment_length = max(int(target_time * experiment_speed), 2)
            run_times = []
            for _ in range(experiment_count):
                start_time = time.time()
                for _ in range(experiment_length):
                    _ = tensor_1 @ tensor_2
                run_times.append(time.time() - start_time)
            experiment_times += run_times
            experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
            experiment_lengths += [experiment_length] * experiment_count
            experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
            print(f'Run {experiment_names[-1]} (x{experiment_length})'
                  f' in {experiment_times[-1] * 1000:0.1f}ms')
        data = pd.DataFrame(
            {
                'run times (s)': experiment_times,
                'count': experiment_lengths,
                'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
                'Mop/matmul': experiment_mop,
                'GFLOPS': [(mop * l) / (t * 1000.0)
                           for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
            },
            index=pd.Index(experiment_names, name='experiment'))
        data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
        mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
        max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
        figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
        figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
        for axe in axes[:-1]:
            axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
        axes[0].set_yscale("log")
        for p, value in zip(chart.patches, max_data['Mop/matmul']):
            chart.annotate(f'{value:0.3f}',
                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                           textcoords='offset points')
        chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
        for p, value in zip(chart.patches, mean_data['ms/matmul']):
            chart.annotate(f'{value:.3f}',
                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                           textcoords='offset points')
        chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
        for p, value in zip(chart.patches, mean_data['GFLOPS']):
            chart.annotate(f'{value:.3f}',
                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                           textcoords='offset points')
        plt.xticks(rotation=20)
        plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
        plt.savefig(self.output_path / f'matmul_{data_type.value}.png')
--- a/src/utils.py
+++ b/src/utils.py
@ -0,0 +1,18 @@
 import subprocess
 def get_cpu_name() -> str:
    raw_out = subprocess.check_output(['lscpu']).decode()
    architecture = 'unkown'
    model = 'noname'
    for out_line in raw_out.split('\n'):
        line_info = out_line.strip().split(':')
        if line_info[0].strip() == 'Architecture':
            architecture = line_info[1].strip()
        if line_info[0].strip() == 'Model name':
            model = line_info[1].strip()
    return f'{architecture}_{model}'
 def get_nvidia_name() -> str:
    return subprocess.check_output(['nvidia-smi', '--format=csv,noheader', '--query-gpu=name']).decode().strip()