diff --git a/benchmark.py b/benchmark.py index c7f42ac..8aa5f1a 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,39 +1,99 @@ from argparse import ArgumentParser +import multiprocessing as mp +import os from pathlib import Path +from typing import Type -from src.base import DataType -from src.torch.matmul import TorchMatmulBench +from src.base import BenchBase +from src.common import DataType, Op, Platform + + +def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op, + bench_args, bench_count: int): + if platform == Platform.TF2: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + from src.tf_2.ops import tf2_ops + if bench_op not in tf2_ops: + print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') + else: + tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type) + print() + elif platform == Platform.TORCH: + from src.pytorch.ops import torch_ops + if bench_op not in torch_ops: + print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') + else: + torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type) + print() + else: + print(f'Platform {platform.value} is not implemented yet') def main(): parser = ArgumentParser() parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files') + parser.add_argument('--count', type=int, default=30, + help='Number of experiments per benchmark (for stastistical analysis)') + parser.add_argument('--platform', nargs='*', type=Platform, + help='List of platform to benchmark [TF1, TF2, Torch] (else all are used)') + parser.add_argument('--data', nargs='*', type=DataType, + help='List of data type to benchmark [float16, float32, float64] (else all are used)') + parser.add_argument('--op', nargs='*', type=Op, + help='List of operation to benchmark [add, mul, div, matmul] (else all are used)') arguments = parser.parse_args() output_path: Path = arguments.output + bench_count: int = arguments.count + platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform) + data: list[DataType] = arguments.data if arguments.data is not None else list(DataType) + bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op) if not output_path.exists(): output_path.mkdir(parents=True) - for data_type in DataType: - TorchMatmulBench(output_path).run( - [ - ((100, 100), (100, 100)), - ((100, 200), (200, 100)), - ((128, 128), (128, 128)), - ((200, 100), (100, 200)), - ((200, 200), (200, 200)), - ((256, 256), (256, 256)), - ((256, 512), (512, 256)), - ((400, 400), (400, 400)), - ((512, 256), (256, 512)), - ((512, 512), (512, 512)), - ((800, 800), (800, 800)), - ((1000, 1000), (1000, 1000)), - ((1200, 1200), (1200, 1200)), - ], - 12, - data_type) + benchmarks: list[dict[Op, Type[BenchBase]]] = [] + element_wise_args = [ + (100, 100), + (100, 200), + (128, 128), + (200, 100), + (200, 200), + (256, 256), + (256, 512), + (512, 256), + (400, 400), + (512, 512), + (800, 800), + (1024, 1024), + (1800, 1800)] + matmul_args = [ + ((100, 100), (100, 100)), + ((100, 200), (200, 100)), + ((128, 128), (128, 128)), + ((200, 100), (100, 200)), + ((200, 200), (200, 200)), + ((256, 256), (256, 256)), + ((256, 512), (512, 256)), + ((400, 400), (400, 400)), + ((512, 256), (256, 512)), + ((512, 512), (512, 512)), + ((800, 800), (800, 800)), + ((1000, 1000), (1000, 1000)), + ((1200, 1200), (1200, 1200))] + + for platform in platforms: + for data_type in data: + for bench_op in [Op.ADD, Op.MUL, Op.DIV]: + if bench_op in bench_ops: + benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count)) + if Op.MATMUL in bench_ops: + benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count)) + + for benchmark in benchmarks: + process = mp.Process(target=run_benchmark, args=benchmark) + process.start() + process.join() + print('Benchmark done') diff --git a/src/base.py b/src/base.py index 94b4924..643a18f 100644 --- a/src/base.py +++ b/src/base.py @@ -1,22 +1,106 @@ from pathlib import Path -from enum import Enum +import time + +import numpy as np +import pandas as pd + +from src.common import DataKey, DataType, Device, Op, Platform +from src.plot import plot_experiments +from src.utils import get_cpu_name, get_nvidia_name -class Device(Enum): - CPU = 'cpu' - GPU = 'gpu' - - -class DataType(Enum): - FLOAT16 = 'float16' - FLOAT32 = 'float32' - FLOAT64 = 'float64' - - -class Base(): - def __init__(self, output_path: Path): +class BenchBase(): + def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device): self._base_output_path = output_path self.output_path = output_path + self.platform = platform + self.bench_op = bench_op + self.device_type = device_type + self.device = device + self.dtype = None + def set_output_path(self, device: Device, device_name: str): - self.output_path = self._base_output_path / f'{device.value}_{device_name}' + self.output_path = ( + self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value) + + def get_dtype(self, data_type: DataType): + raise NotImplementedError() + + def experiment(self, _experiment_args, _length, _dtype, _device): + raise NotImplementedError() + + def name(self, _experiment_args) -> str: + raise NotImplementedError() + + def mop(self, _experiment_args) -> float: + raise NotImplementedError() + + def run(self, experiment_args, experiment_count: int, data_type: DataType): + self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name()) + + if not self.output_path.exists(): + self.output_path.mkdir(parents=True) + + dtype = self.get_dtype(data_type) + + print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}') + + experiment_names = [] + experiment_lengths = [] + experiment_times = [] + experiment_mop = [] + for args in experiment_args: + # warmup + for _ in range(4): + self.experiment(args, 5, dtype, self.device) + + # speed evalutaion + counter = 0 + start_time = time.time() + while time.time() - start_time < 0.2: + self.experiment(args, 10, dtype, self.device) + counter += 10 + end_time = time.time() + + target_time = 1.0 # in s + experiment_speed = counter / (end_time - start_time) # in op/s + experiment_length = max(int(target_time / experiment_count * experiment_speed), 2) + # print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s' + # f' => {experiment_speed:.03f}{self.bench_op.value}/s' + # f', estimate {target_time:.03f}s with {experiment_length}x{experiment_count} exps') + + run_times = [] + for _ in range(experiment_count): + start_time = time.time() + self.experiment(args, experiment_length, dtype, self.device) + run_times.append(time.time() - start_time) + experiment_times += run_times + experiment_names += [self.name(args)] * experiment_count + experiment_lengths += [experiment_length] * experiment_count + experiment_mop += [self.mop(args)] * experiment_count + + total_time = np.array(run_times, dtype=np.float64).sum() + total_glop = self.mop(args) * experiment_length * experiment_count / 1000 + print(f'Run {experiment_names[-1]} (x{experiment_length})' + f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS') + + data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type) + plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform) + + def save_experiments( + self, experiment_names: list[str], experiment_times: list[float], + experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame: + key = DataKey(self.bench_op) + data = pd.DataFrame( + { + key.experiment: experiment_names, + key.time: experiment_times, + key.count: experiment_lengths, + key.speed: [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)], + key.mop: experiment_mop, + key.gflops: [(mop * l) / (t * 1000.0) + for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)] + }) + data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t') + return data diff --git a/src/common.py b/src/common.py new file mode 100644 index 0000000..a487e9c --- /dev/null +++ b/src/common.py @@ -0,0 +1,36 @@ +from enum import Enum + + +class Device(Enum): + CPU = 'cpu' + GPU = 'gpu' + + +class DataType(Enum): + FLOAT16 = 'float16' + FLOAT32 = 'float32' + FLOAT64 = 'float64' + + +class Op(Enum): + NO_OP = 'noop' + ADD = 'add' + DIV = 'div' + MUL = 'mul' + MATMUL = 'matmul' + + +class Platform(Enum): + TF1 = 'TF1' + TF2 = 'TF2' + TORCH = 'Torch' + + +class DataKey(): + def __init__(self, bench_op: Op): + self.experiment = 'experiment' + self.time = 'run times (s)' + self.count = 'count' + self.mop = f'Mop/{bench_op.value}' + self.speed = f'ms/{bench_op.value}' + self.gflops = 'GFLOPS' diff --git a/src/plot.py b/src/plot.py new file mode 100644 index 0000000..f4cf907 --- /dev/null +++ b/src/plot.py @@ -0,0 +1,51 @@ +from pathlib import Path + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + + +from src.common import DataKey, DataType, Op, Platform + + +def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform): + key = DataKey(bench_op) + sum_data = data[[key.experiment, key.time, key.count]].groupby( + key.experiment, as_index=False, sort=False).sum() + mean_data = data[[key.experiment, key.speed]].groupby( + key.experiment, as_index=False, sort=False).mean() + max_data = data[[key.experiment, key.mop]].groupby( + key.experiment, as_index=False, sort=False).max() + + sns.set_theme(style="ticks") + figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12)) + figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16) + for axe in axes[:-1]: + axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) + + chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique()) + axes[0].set_yscale("log") + for patch, value in zip(chart.patches, max_data[key.mop]): + chart.annotate(f'{value:0.3f}', + (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1]) + for patch, value in zip(chart.patches, mean_data[key.speed]): + chart.annotate(f'{value:.3f}', + (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2]) + for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]): + chart.annotate(f'{(mop * count / 1000) / value:.3f}', + (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), + ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), + textcoords='offset points') + + plt.xticks(rotation=20) + plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05) + plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png') diff --git a/src/pytorch/add.py b/src/pytorch/add.py new file mode 100644 index 0000000..08f6b84 --- /dev/null +++ b/src/pytorch/add.py @@ -0,0 +1,33 @@ +from pathlib import Path + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class TorchAddBench(TorchBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.ADD) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + shape_1 = experiment_args + tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + + for _ in range(length): + _ = tensor_1 + tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/pytorch/base.py b/src/pytorch/base.py new file mode 100644 index 0000000..335042d --- /dev/null +++ b/src/pytorch/base.py @@ -0,0 +1,39 @@ +from pathlib import Path + +import torch + +from src.base import BenchBase +from src.common import DataType, Device, Op, Platform + + +class TorchBase(BenchBase): + def __init__(self, output_path: Path, bench_op: Op): + if torch.cuda.is_available(): + if torch.cuda.device_count() > 1: + print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') + torch.backends.cudnn.benchmark = True + device_type = Device.GPU + device = torch.device('cuda:0') + else: + device_type = Device.CPU + device = torch.device('cpu') + + super().__init__(output_path, Platform.TORCH, bench_op, device_type, device) + + def get_dtype(self, data_type: DataType) -> torch.dtype: + if data_type == DataType.FLOAT16: + return torch.float16 + if data_type == DataType.FLOAT32: + return torch.float32 + if data_type == DataType.FLOAT64: + return torch.float64 + raise NotImplementedError(f'data_type {data_type.value} not implemented') + + def experiment(self, _experiment_args, _length, _dtype, _device): + raise NotImplementedError() + + def name(self, _experiment_args) -> str: + raise NotImplementedError() + + def mop(self, _experiment_args) -> float: + raise NotImplementedError() diff --git a/src/pytorch/div.py b/src/pytorch/div.py new file mode 100644 index 0000000..9a0b309 --- /dev/null +++ b/src/pytorch/div.py @@ -0,0 +1,33 @@ +from pathlib import Path + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class TorchDivBench(TorchBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.DIV) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + shape_1 = experiment_args + tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + + for _ in range(length): + _ = tensor_1 / tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/pytorch/matmul.py b/src/pytorch/matmul.py new file mode 100644 index 0000000..c40c261 --- /dev/null +++ b/src/pytorch/matmul.py @@ -0,0 +1,33 @@ +from pathlib import Path + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class TorchMatmulBench(TorchBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.MATMUL) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + shape_1, shape_2 = experiment_args + tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False) + + for _ in range(length): + _ = tensor_1 @ tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1, shape_2 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1, shape_2 = experiment_args + return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1) + + def run(self, + experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/pytorch/mul.py b/src/pytorch/mul.py new file mode 100644 index 0000000..7208a6d --- /dev/null +++ b/src/pytorch/mul.py @@ -0,0 +1,33 @@ +from pathlib import Path + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class TorchMulBench(TorchBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.MUL) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + shape_1 = experiment_args + tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + + for _ in range(length): + _ = tensor_1 * tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/pytorch/ops.py b/src/pytorch/ops.py new file mode 100644 index 0000000..22e4e96 --- /dev/null +++ b/src/pytorch/ops.py @@ -0,0 +1,16 @@ +from typing import Type + +from src.common import Op +from src.pytorch.add import TorchAddBench +from src.pytorch.base import TorchBase +from src.pytorch.div import TorchDivBench +from src.pytorch.mul import TorchMulBench +from src.pytorch.matmul import TorchMatmulBench + + +torch_ops: dict[Op, Type[TorchBase]] = { + Op.ADD: TorchAddBench, + Op.MUL: TorchMulBench, + Op.DIV: TorchDivBench, + Op.MATMUL: TorchMatmulBench +} diff --git a/src/tf_2/add.py b/src/tf_2/add.py new file mode 100644 index 0000000..7850157 --- /dev/null +++ b/src/tf_2/add.py @@ -0,0 +1,34 @@ +from pathlib import Path + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class TFAddBench(TFBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.ADD) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + shape_1 = experiment_args + with device: + tensor_1 = tf.ones(shape_1, dtype=dtype) + tensor_2 = tf.ones(shape_1, dtype=dtype) + + for _ in range(length): + _ = tensor_1 + tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/tf_2/base.py b/src/tf_2/base.py new file mode 100644 index 0000000..808ee45 --- /dev/null +++ b/src/tf_2/base.py @@ -0,0 +1,43 @@ +from pathlib import Path + +import tensorflow as tf + +from src.base import BenchBase +from src.common import DataType, Device, Op, Platform + + +class TFBase(BenchBase): + def __init__(self, output_path: Path, bench_op: Op): + gpus = tf.config.list_physical_devices('GPU') + if gpus: + if len(gpus) > 1: + print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') + + tf.config.experimental.set_memory_growth(gpus[0], True) + tf.config.set_visible_devices(gpus[0], 'GPU') + # logical_gpus = tf.config.list_logical_devices('GPU') + device_type = Device.GPU + device = tf.device('/GPU:0') + else: + device_type = Device.CPU + device = tf.device('/CPU:0') + + super().__init__(output_path, Platform.TF2, bench_op, device_type, device) + + def get_dtype(self, data_type: DataType) -> tf.DType: + if data_type == DataType.FLOAT16: + return tf.float16 + if data_type == DataType.FLOAT32: + return tf.float32 + if data_type == DataType.FLOAT64: + return tf.float64 + raise RuntimeError(f'data_type {data_type.value} not implemented') + + def experiment(self, _experiment_args, _length, _dtype, _device): + raise NotImplementedError() + + def name(self, _experiment_args) -> str: + raise NotImplementedError() + + def mop(self, _experiment_args) -> float: + raise NotImplementedError() diff --git a/src/tf_2/div.py b/src/tf_2/div.py new file mode 100644 index 0000000..21dd9b4 --- /dev/null +++ b/src/tf_2/div.py @@ -0,0 +1,34 @@ +from pathlib import Path + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class TFDivBench(TFBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.DIV) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + shape_1 = experiment_args + with device: + tensor_1 = tf.ones(shape_1, dtype=dtype) + tensor_2 = tf.ones(shape_1, dtype=dtype) + + for _ in range(length): + _ = tensor_1 / tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/tf_2/matmul.py b/src/tf_2/matmul.py new file mode 100644 index 0000000..70308b3 --- /dev/null +++ b/src/tf_2/matmul.py @@ -0,0 +1,34 @@ +from pathlib import Path + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class TFMatmulBench(TFBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.MATMUL) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + shape_1, shape_2 = experiment_args + with device: + tensor_1 = tf.ones(shape_1, dtype=dtype) + tensor_2 = tf.ones(shape_2, dtype=dtype) + + for _ in range(length): + _ = tensor_1 @ tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1, shape_2 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1, shape_2 = experiment_args + return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1) + + def run(self, + experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/tf_2/mul.py b/src/tf_2/mul.py new file mode 100644 index 0000000..12ca880 --- /dev/null +++ b/src/tf_2/mul.py @@ -0,0 +1,34 @@ +from pathlib import Path + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class TFMulBench(TFBase): + def __init__(self, output_path: Path): + super().__init__(output_path, Op.MUL) + + def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + shape_1 = experiment_args + with device: + tensor_1 = tf.ones(shape_1, dtype=dtype) + tensor_2 = tf.ones(shape_1, dtype=dtype) + + for _ in range(length): + _ = tensor_1 * tensor_2 + + def name(self, experiment_args: tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}' + + def mop(self, experiment_args: tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1000_000 + + def run(self, + experiment_args: list[tuple[int, int]], + experiment_count: int, + data_type: DataType): + super().run(experiment_args, experiment_count, data_type) diff --git a/src/tf_2/ops.py b/src/tf_2/ops.py new file mode 100644 index 0000000..7c3d12a --- /dev/null +++ b/src/tf_2/ops.py @@ -0,0 +1,16 @@ +from typing import Type + +from src.common import Op +from src.tf_2.add import TFAddBench +from src.tf_2.base import TFBase +from src.tf_2.div import TFDivBench +from src.tf_2.mul import TFMulBench +from src.tf_2.matmul import TFMatmulBench + + +tf2_ops: dict[Op, Type[TFBase]] = { + Op.ADD: TFAddBench, + Op.MUL: TFMulBench, + Op.DIV: TFDivBench, + Op.MATMUL: TFMatmulBench +} diff --git a/src/torch/base.py b/src/torch/base.py deleted file mode 100644 index 6007243..0000000 --- a/src/torch/base.py +++ /dev/null @@ -1,23 +0,0 @@ -from pathlib import Path - -import torch - -from src.base import Base, Device -from src.utils import get_cpu_name, get_nvidia_name - - -class TorchBase(Base): - def __init__(self, output_path: Path): - super().__init__(output_path) - - self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') - if torch.cuda.is_available(): - if torch.cuda.device_count() > 1: - print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') - self.set_output_path(Device.GPU, get_nvidia_name()) - torch.backends.cudnn.benchmark = True - else: - self.set_output_path(Device.CPU, get_cpu_name()) - - if not self.output_path.exists(): - self.output_path.mkdir(parents=True) diff --git a/src/torch/matmul.py b/src/torch/matmul.py deleted file mode 100644 index 2dd91fe..0000000 --- a/src/torch/matmul.py +++ /dev/null @@ -1,112 +0,0 @@ -import time - -from src.base import DataType -from src.torch.base import TorchBase - -import matplotlib.pyplot as plt -import pandas as pd -import seaborn as sns -import torch - - -class TorchMatmulBench(TorchBase): - - def run(self, - experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], - experiment_count: int, - data_type: DataType): - sns.set_theme(style="ticks") - - dtype = None - if data_type == DataType.FLOAT16: - dtype = torch.float16 - elif data_type == DataType.FLOAT32: - dtype = torch.float32 - elif data_type == DataType.FLOAT64: - dtype = torch.float64 - else: - raise RuntimeError(f'data_type {data_type.value} not implemented') - print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}') - - experiment_names = [] - experiment_lengths = [] - experiment_times = [] - experiment_mop = [] - for shape_1, shape_2 in experiment_args: - tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device) - tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0) - - # warmup - for _ in range(20): - _ = tensor_1 @ tensor_2 - - # speed evalutaion - counter = 0 - start_time = time.time() - while(time.time() - start_time < 0.2): - _ = tensor_1 @ tensor_2 - counter += 1 - end_time = time.time() - - target_time = 0.5 / experiment_count # in s - experiment_speed = counter / (end_time - start_time) # in op/s - experiment_length = max(int(target_time * experiment_speed), 2) - - run_times = [] - for _ in range(experiment_count): - start_time = time.time() - for _ in range(experiment_length): - _ = tensor_1 @ tensor_2 - run_times.append(time.time() - start_time) - experiment_times += run_times - experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count - experiment_lengths += [experiment_length] * experiment_count - experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count - print(f'Run {experiment_names[-1]} (x{experiment_length})' - f' in {experiment_times[-1] * 1000:0.1f}ms') - - data = pd.DataFrame( - { - 'run times (s)': experiment_times, - 'count': experiment_lengths, - 'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)], - 'Mop/matmul': experiment_mop, - 'GFLOPS': [(mop * l) / (t * 1000.0) - for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)] - }, - index=pd.Index(experiment_names, name='experiment')) - data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t') - - mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean() - max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max() - - figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12)) - figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16) - for axe in axes[:-1]: - axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) - - chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique()) - axes[0].set_yscale("log") - for p, value in zip(chart.patches, max_data['Mop/matmul']): - chart.annotate(f'{value:0.3f}', - (p.get_x() + p.get_width() / 2.0, p.get_height()), - ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), - textcoords='offset points') - - chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1]) - for p, value in zip(chart.patches, mean_data['ms/matmul']): - chart.annotate(f'{value:.3f}', - (p.get_x() + p.get_width() / 2.0, p.get_height()), - ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), - textcoords='offset points') - - chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2]) - for p, value in zip(chart.patches, mean_data['GFLOPS']): - chart.annotate(f'{value:.3f}', - (p.get_x() + p.get_width() / 2.0, p.get_height()), - ha='center', va='center', fontsize=10, color='black', xytext=(0, 5), - textcoords='offset points') - - plt.xticks(rotation=20) - plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05) - plt.savefig(self.output_path / f'matmul_{data_type.value}.png')