Implement TF2 and add, mul and div benchmark

This commit is contained in:
Corentin 2021-09-28 02:59:53 +09:00
commit 4b2bcfe7e8
18 changed files with 649 additions and 171 deletions

View file

@ -1,39 +1,99 @@
from argparse import ArgumentParser from argparse import ArgumentParser
import multiprocessing as mp
import os
from pathlib import Path from pathlib import Path
from typing import Type
from src.base import DataType from src.base import BenchBase
from src.torch.matmul import TorchMatmulBench from src.common import DataType, Op, Platform
def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
bench_args, bench_count: int):
if platform == Platform.TF2:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from src.tf_2.ops import tf2_ops
if bench_op not in tf2_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
print()
elif platform == Platform.TORCH:
from src.pytorch.ops import torch_ops
if bench_op not in torch_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
print()
else:
print(f'Platform {platform.value} is not implemented yet')
def main(): def main():
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files') parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
parser.add_argument('--count', type=int, default=30,
help='Number of experiments per benchmark (for stastistical analysis)')
parser.add_argument('--platform', nargs='*', type=Platform,
help='List of platform to benchmark [TF1, TF2, Torch] (else all are used)')
parser.add_argument('--data', nargs='*', type=DataType,
help='List of data type to benchmark [float16, float32, float64] (else all are used)')
parser.add_argument('--op', nargs='*', type=Op,
help='List of operation to benchmark [add, mul, div, matmul] (else all are used)')
arguments = parser.parse_args() arguments = parser.parse_args()
output_path: Path = arguments.output output_path: Path = arguments.output
bench_count: int = arguments.count
platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
data: list[DataType] = arguments.data if arguments.data is not None else list(DataType)
bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op)
if not output_path.exists(): if not output_path.exists():
output_path.mkdir(parents=True) output_path.mkdir(parents=True)
for data_type in DataType: benchmarks: list[dict[Op, Type[BenchBase]]] = []
TorchMatmulBench(output_path).run( element_wise_args = [
[ (100, 100),
((100, 100), (100, 100)), (100, 200),
((100, 200), (200, 100)), (128, 128),
((128, 128), (128, 128)), (200, 100),
((200, 100), (100, 200)), (200, 200),
((200, 200), (200, 200)), (256, 256),
((256, 256), (256, 256)), (256, 512),
((256, 512), (512, 256)), (512, 256),
((400, 400), (400, 400)), (400, 400),
((512, 256), (256, 512)), (512, 512),
((512, 512), (512, 512)), (800, 800),
((800, 800), (800, 800)), (1024, 1024),
((1000, 1000), (1000, 1000)), (1800, 1800)]
((1200, 1200), (1200, 1200)), matmul_args = [
], ((100, 100), (100, 100)),
12, ((100, 200), (200, 100)),
data_type) ((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200))]
for platform in platforms:
for data_type in data:
for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count))
if Op.MATMUL in bench_ops:
benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count))
for benchmark in benchmarks:
process = mp.Process(target=run_benchmark, args=benchmark)
process.start()
process.join()
print('Benchmark done') print('Benchmark done')

View file

@ -1,22 +1,106 @@
from pathlib import Path from pathlib import Path
from enum import Enum import time
import numpy as np
import pandas as pd
from src.common import DataKey, DataType, Device, Op, Platform
from src.plot import plot_experiments
from src.utils import get_cpu_name, get_nvidia_name
class Device(Enum): class BenchBase():
CPU = 'cpu' def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device):
GPU = 'gpu'
class DataType(Enum):
FLOAT16 = 'float16'
FLOAT32 = 'float32'
FLOAT64 = 'float64'
class Base():
def __init__(self, output_path: Path):
self._base_output_path = output_path self._base_output_path = output_path
self.output_path = output_path self.output_path = output_path
self.platform = platform
self.bench_op = bench_op
self.device_type = device_type
self.device = device
self.dtype = None
def set_output_path(self, device: Device, device_name: str): def set_output_path(self, device: Device, device_name: str):
self.output_path = self._base_output_path / f'{device.value}_{device_name}' self.output_path = (
self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value)
def get_dtype(self, data_type: DataType):
raise NotImplementedError()
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()
def run(self, experiment_args, experiment_count: int, data_type: DataType):
self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name())
if not self.output_path.exists():
self.output_path.mkdir(parents=True)
dtype = self.get_dtype(data_type)
print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}')
experiment_names = []
experiment_lengths = []
experiment_times = []
experiment_mop = []
for args in experiment_args:
# warmup
for _ in range(4):
self.experiment(args, 5, dtype, self.device)
# speed evalutaion
counter = 0
start_time = time.time()
while time.time() - start_time < 0.2:
self.experiment(args, 10, dtype, self.device)
counter += 10
end_time = time.time()
target_time = 1.0 # in s
experiment_speed = counter / (end_time - start_time) # in op/s
experiment_length = max(int(target_time / experiment_count * experiment_speed), 2)
# print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s'
# f' => {experiment_speed:.03f}{self.bench_op.value}/s'
# f', estimate {target_time:.03f}s with {experiment_length}x{experiment_count} exps')
run_times = []
for _ in range(experiment_count):
start_time = time.time()
self.experiment(args, experiment_length, dtype, self.device)
run_times.append(time.time() - start_time)
experiment_times += run_times
experiment_names += [self.name(args)] * experiment_count
experiment_lengths += [experiment_length] * experiment_count
experiment_mop += [self.mop(args)] * experiment_count
total_time = np.array(run_times, dtype=np.float64).sum()
total_glop = self.mop(args) * experiment_length * experiment_count / 1000
print(f'Run {experiment_names[-1]} (x{experiment_length})'
f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS')
data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type)
plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform)
def save_experiments(
self, experiment_names: list[str], experiment_times: list[float],
experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame:
key = DataKey(self.bench_op)
data = pd.DataFrame(
{
key.experiment: experiment_names,
key.time: experiment_times,
key.count: experiment_lengths,
key.speed: [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
key.mop: experiment_mop,
key.gflops: [(mop * l) / (t * 1000.0)
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
})
data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t')
return data

36
src/common.py Normal file
View file

@ -0,0 +1,36 @@
from enum import Enum
class Device(Enum):
CPU = 'cpu'
GPU = 'gpu'
class DataType(Enum):
FLOAT16 = 'float16'
FLOAT32 = 'float32'
FLOAT64 = 'float64'
class Op(Enum):
NO_OP = 'noop'
ADD = 'add'
DIV = 'div'
MUL = 'mul'
MATMUL = 'matmul'
class Platform(Enum):
TF1 = 'TF1'
TF2 = 'TF2'
TORCH = 'Torch'
class DataKey():
def __init__(self, bench_op: Op):
self.experiment = 'experiment'
self.time = 'run times (s)'
self.count = 'count'
self.mop = f'Mop/{bench_op.value}'
self.speed = f'ms/{bench_op.value}'
self.gflops = 'GFLOPS'

51
src/plot.py Normal file
View file

@ -0,0 +1,51 @@
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from src.common import DataKey, DataType, Op, Platform
def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform):
key = DataKey(bench_op)
sum_data = data[[key.experiment, key.time, key.count]].groupby(
key.experiment, as_index=False, sort=False).sum()
mean_data = data[[key.experiment, key.speed]].groupby(
key.experiment, as_index=False, sort=False).mean()
max_data = data[[key.experiment, key.mop]].groupby(
key.experiment, as_index=False, sort=False).max()
sns.set_theme(style="ticks")
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16)
for axe in axes[:-1]:
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique())
axes[0].set_yscale("log")
for patch, value in zip(chart.patches, max_data[key.mop]):
chart.annotate(f'{value:0.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1])
for patch, value in zip(chart.patches, mean_data[key.speed]):
chart.annotate(f'{value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2])
for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]):
chart.annotate(f'{(mop * count / 1000) / value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
plt.xticks(rotation=20)
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png')

33
src/pytorch/add.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class TorchAddBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.ADD)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
for _ in range(length):
_ = tensor_1 + tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

39
src/pytorch/base.py Normal file
View file

@ -0,0 +1,39 @@
from pathlib import Path
import torch
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class TorchBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op):
if torch.cuda.is_available():
if torch.cuda.device_count() > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
torch.backends.cudnn.benchmark = True
device_type = Device.GPU
device = torch.device('cuda:0')
else:
device_type = Device.CPU
device = torch.device('cpu')
super().__init__(output_path, Platform.TORCH, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> torch.dtype:
if data_type == DataType.FLOAT16:
return torch.float16
if data_type == DataType.FLOAT32:
return torch.float32
if data_type == DataType.FLOAT64:
return torch.float64
raise NotImplementedError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()

33
src/pytorch/div.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class TorchDivBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.DIV)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
for _ in range(length):
_ = tensor_1 / tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

33
src/pytorch/matmul.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class TorchMatmulBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MATMUL)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
shape_1, shape_2 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False)
for _ in range(length):
_ = tensor_1 @ tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

33
src/pytorch/mul.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class TorchMulBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MUL)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
for _ in range(length):
_ = tensor_1 * tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

16
src/pytorch/ops.py Normal file
View file

@ -0,0 +1,16 @@
from typing import Type
from src.common import Op
from src.pytorch.add import TorchAddBench
from src.pytorch.base import TorchBase
from src.pytorch.div import TorchDivBench
from src.pytorch.mul import TorchMulBench
from src.pytorch.matmul import TorchMatmulBench
torch_ops: dict[Op, Type[TorchBase]] = {
Op.ADD: TorchAddBench,
Op.MUL: TorchMulBench,
Op.DIV: TorchDivBench,
Op.MATMUL: TorchMatmulBench
}

34
src/tf_2/add.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class TFAddBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.ADD)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
for _ in range(length):
_ = tensor_1 + tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

43
src/tf_2/base.py Normal file
View file

@ -0,0 +1,43 @@
from pathlib import Path
import tensorflow as tf
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class TFBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op):
gpus = tf.config.list_physical_devices('GPU')
if gpus:
if len(gpus) > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
tf.config.experimental.set_memory_growth(gpus[0], True)
tf.config.set_visible_devices(gpus[0], 'GPU')
# logical_gpus = tf.config.list_logical_devices('GPU')
device_type = Device.GPU
device = tf.device('/GPU:0')
else:
device_type = Device.CPU
device = tf.device('/CPU:0')
super().__init__(output_path, Platform.TF2, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> tf.DType:
if data_type == DataType.FLOAT16:
return tf.float16
if data_type == DataType.FLOAT32:
return tf.float32
if data_type == DataType.FLOAT64:
return tf.float64
raise RuntimeError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()

34
src/tf_2/div.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class TFDivBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.DIV)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
for _ in range(length):
_ = tensor_1 / tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

34
src/tf_2/matmul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class TFMatmulBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MATMUL)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
shape_1, shape_2 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_2, dtype=dtype)
for _ in range(length):
_ = tensor_1 @ tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

34
src/tf_2/mul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class TFMulBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MUL)
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
for _ in range(length):
_ = tensor_1 * tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

16
src/tf_2/ops.py Normal file
View file

@ -0,0 +1,16 @@
from typing import Type
from src.common import Op
from src.tf_2.add import TFAddBench
from src.tf_2.base import TFBase
from src.tf_2.div import TFDivBench
from src.tf_2.mul import TFMulBench
from src.tf_2.matmul import TFMatmulBench
tf2_ops: dict[Op, Type[TFBase]] = {
Op.ADD: TFAddBench,
Op.MUL: TFMulBench,
Op.DIV: TFDivBench,
Op.MATMUL: TFMatmulBench
}

View file

@ -1,23 +0,0 @@
from pathlib import Path
import torch
from src.base import Base, Device
from src.utils import get_cpu_name, get_nvidia_name
class TorchBase(Base):
def __init__(self, output_path: Path):
super().__init__(output_path)
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
if torch.cuda.device_count() > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
self.set_output_path(Device.GPU, get_nvidia_name())
torch.backends.cudnn.benchmark = True
else:
self.set_output_path(Device.CPU, get_cpu_name())
if not self.output_path.exists():
self.output_path.mkdir(parents=True)

View file

@ -1,112 +0,0 @@
import time
from src.base import DataType
from src.torch.base import TorchBase
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
class TorchMatmulBench(TorchBase):
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
sns.set_theme(style="ticks")
dtype = None
if data_type == DataType.FLOAT16:
dtype = torch.float16
elif data_type == DataType.FLOAT32:
dtype = torch.float32
elif data_type == DataType.FLOAT64:
dtype = torch.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented')
print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
experiment_names = []
experiment_lengths = []
experiment_times = []
experiment_mop = []
for shape_1, shape_2 in experiment_args:
tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
# warmup
for _ in range(20):
_ = tensor_1 @ tensor_2
# speed evalutaion
counter = 0
start_time = time.time()
while(time.time() - start_time < 0.2):
_ = tensor_1 @ tensor_2
counter += 1
end_time = time.time()
target_time = 0.5 / experiment_count # in s
experiment_speed = counter / (end_time - start_time) # in op/s
experiment_length = max(int(target_time * experiment_speed), 2)
run_times = []
for _ in range(experiment_count):
start_time = time.time()
for _ in range(experiment_length):
_ = tensor_1 @ tensor_2
run_times.append(time.time() - start_time)
experiment_times += run_times
experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
experiment_lengths += [experiment_length] * experiment_count
experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
print(f'Run {experiment_names[-1]} (x{experiment_length})'
f' in {experiment_times[-1] * 1000:0.1f}ms')
data = pd.DataFrame(
{
'run times (s)': experiment_times,
'count': experiment_lengths,
'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
'Mop/matmul': experiment_mop,
'GFLOPS': [(mop * l) / (t * 1000.0)
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
},
index=pd.Index(experiment_names, name='experiment'))
data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
for axe in axes[:-1]:
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
axes[0].set_yscale("log")
for p, value in zip(chart.patches, max_data['Mop/matmul']):
chart.annotate(f'{value:0.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
for p, value in zip(chart.patches, mean_data['ms/matmul']):
chart.annotate(f'{value:.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
for p, value in zip(chart.patches, mean_data['GFLOPS']):
chart.annotate(f'{value:.3f}',
(p.get_x() + p.get_width() / 2.0, p.get_height()),
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
textcoords='offset points')
plt.xticks(rotation=20)
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
plt.savefig(self.output_path / f'matmul_{data_type.value}.png')