Implement TF2 and add, mul and div benchmark
This commit is contained in:
parent
fbf6898dd9
commit
4b2bcfe7e8
18 changed files with 649 additions and 171 deletions
102
benchmark.py
102
benchmark.py
|
|
@ -1,39 +1,99 @@
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
import multiprocessing as mp
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
from src.base import DataType
|
from src.base import BenchBase
|
||||||
from src.torch.matmul import TorchMatmulBench
|
from src.common import DataType, Op, Platform
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
|
||||||
|
bench_args, bench_count: int):
|
||||||
|
if platform == Platform.TF2:
|
||||||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
from src.tf_2.ops import tf2_ops
|
||||||
|
if bench_op not in tf2_ops:
|
||||||
|
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
|
||||||
|
else:
|
||||||
|
tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
|
||||||
|
print()
|
||||||
|
elif platform == Platform.TORCH:
|
||||||
|
from src.pytorch.ops import torch_ops
|
||||||
|
if bench_op not in torch_ops:
|
||||||
|
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
|
||||||
|
else:
|
||||||
|
torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
print(f'Platform {platform.value} is not implemented yet')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
|
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
|
||||||
|
parser.add_argument('--count', type=int, default=30,
|
||||||
|
help='Number of experiments per benchmark (for stastistical analysis)')
|
||||||
|
parser.add_argument('--platform', nargs='*', type=Platform,
|
||||||
|
help='List of platform to benchmark [TF1, TF2, Torch] (else all are used)')
|
||||||
|
parser.add_argument('--data', nargs='*', type=DataType,
|
||||||
|
help='List of data type to benchmark [float16, float32, float64] (else all are used)')
|
||||||
|
parser.add_argument('--op', nargs='*', type=Op,
|
||||||
|
help='List of operation to benchmark [add, mul, div, matmul] (else all are used)')
|
||||||
arguments = parser.parse_args()
|
arguments = parser.parse_args()
|
||||||
|
|
||||||
output_path: Path = arguments.output
|
output_path: Path = arguments.output
|
||||||
|
bench_count: int = arguments.count
|
||||||
|
platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
|
||||||
|
data: list[DataType] = arguments.data if arguments.data is not None else list(DataType)
|
||||||
|
bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op)
|
||||||
|
|
||||||
if not output_path.exists():
|
if not output_path.exists():
|
||||||
output_path.mkdir(parents=True)
|
output_path.mkdir(parents=True)
|
||||||
|
|
||||||
for data_type in DataType:
|
benchmarks: list[dict[Op, Type[BenchBase]]] = []
|
||||||
TorchMatmulBench(output_path).run(
|
element_wise_args = [
|
||||||
[
|
(100, 100),
|
||||||
((100, 100), (100, 100)),
|
(100, 200),
|
||||||
((100, 200), (200, 100)),
|
(128, 128),
|
||||||
((128, 128), (128, 128)),
|
(200, 100),
|
||||||
((200, 100), (100, 200)),
|
(200, 200),
|
||||||
((200, 200), (200, 200)),
|
(256, 256),
|
||||||
((256, 256), (256, 256)),
|
(256, 512),
|
||||||
((256, 512), (512, 256)),
|
(512, 256),
|
||||||
((400, 400), (400, 400)),
|
(400, 400),
|
||||||
((512, 256), (256, 512)),
|
(512, 512),
|
||||||
((512, 512), (512, 512)),
|
(800, 800),
|
||||||
((800, 800), (800, 800)),
|
(1024, 1024),
|
||||||
((1000, 1000), (1000, 1000)),
|
(1800, 1800)]
|
||||||
((1200, 1200), (1200, 1200)),
|
matmul_args = [
|
||||||
],
|
((100, 100), (100, 100)),
|
||||||
12,
|
((100, 200), (200, 100)),
|
||||||
data_type)
|
((128, 128), (128, 128)),
|
||||||
|
((200, 100), (100, 200)),
|
||||||
|
((200, 200), (200, 200)),
|
||||||
|
((256, 256), (256, 256)),
|
||||||
|
((256, 512), (512, 256)),
|
||||||
|
((400, 400), (400, 400)),
|
||||||
|
((512, 256), (256, 512)),
|
||||||
|
((512, 512), (512, 512)),
|
||||||
|
((800, 800), (800, 800)),
|
||||||
|
((1000, 1000), (1000, 1000)),
|
||||||
|
((1200, 1200), (1200, 1200))]
|
||||||
|
|
||||||
|
for platform in platforms:
|
||||||
|
for data_type in data:
|
||||||
|
for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
|
||||||
|
if bench_op in bench_ops:
|
||||||
|
benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count))
|
||||||
|
if Op.MATMUL in bench_ops:
|
||||||
|
benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count))
|
||||||
|
|
||||||
|
for benchmark in benchmarks:
|
||||||
|
process = mp.Process(target=run_benchmark, args=benchmark)
|
||||||
|
process.start()
|
||||||
|
process.join()
|
||||||
|
|
||||||
print('Benchmark done')
|
print('Benchmark done')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
114
src/base.py
114
src/base.py
|
|
@ -1,22 +1,106 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from enum import Enum
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from src.common import DataKey, DataType, Device, Op, Platform
|
||||||
|
from src.plot import plot_experiments
|
||||||
|
from src.utils import get_cpu_name, get_nvidia_name
|
||||||
|
|
||||||
|
|
||||||
class Device(Enum):
|
class BenchBase():
|
||||||
CPU = 'cpu'
|
def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device):
|
||||||
GPU = 'gpu'
|
|
||||||
|
|
||||||
|
|
||||||
class DataType(Enum):
|
|
||||||
FLOAT16 = 'float16'
|
|
||||||
FLOAT32 = 'float32'
|
|
||||||
FLOAT64 = 'float64'
|
|
||||||
|
|
||||||
|
|
||||||
class Base():
|
|
||||||
def __init__(self, output_path: Path):
|
|
||||||
self._base_output_path = output_path
|
self._base_output_path = output_path
|
||||||
self.output_path = output_path
|
self.output_path = output_path
|
||||||
|
|
||||||
|
self.platform = platform
|
||||||
|
self.bench_op = bench_op
|
||||||
|
self.device_type = device_type
|
||||||
|
self.device = device
|
||||||
|
self.dtype = None
|
||||||
|
|
||||||
def set_output_path(self, device: Device, device_name: str):
|
def set_output_path(self, device: Device, device_name: str):
|
||||||
self.output_path = self._base_output_path / f'{device.value}_{device_name}'
|
self.output_path = (
|
||||||
|
self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value)
|
||||||
|
|
||||||
|
def get_dtype(self, data_type: DataType):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def experiment(self, _experiment_args, _length, _dtype, _device):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def name(self, _experiment_args) -> str:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def mop(self, _experiment_args) -> float:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def run(self, experiment_args, experiment_count: int, data_type: DataType):
|
||||||
|
self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name())
|
||||||
|
|
||||||
|
if not self.output_path.exists():
|
||||||
|
self.output_path.mkdir(parents=True)
|
||||||
|
|
||||||
|
dtype = self.get_dtype(data_type)
|
||||||
|
|
||||||
|
print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}')
|
||||||
|
|
||||||
|
experiment_names = []
|
||||||
|
experiment_lengths = []
|
||||||
|
experiment_times = []
|
||||||
|
experiment_mop = []
|
||||||
|
for args in experiment_args:
|
||||||
|
# warmup
|
||||||
|
for _ in range(4):
|
||||||
|
self.experiment(args, 5, dtype, self.device)
|
||||||
|
|
||||||
|
# speed evalutaion
|
||||||
|
counter = 0
|
||||||
|
start_time = time.time()
|
||||||
|
while time.time() - start_time < 0.2:
|
||||||
|
self.experiment(args, 10, dtype, self.device)
|
||||||
|
counter += 10
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
target_time = 1.0 # in s
|
||||||
|
experiment_speed = counter / (end_time - start_time) # in op/s
|
||||||
|
experiment_length = max(int(target_time / experiment_count * experiment_speed), 2)
|
||||||
|
# print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s'
|
||||||
|
# f' => {experiment_speed:.03f}{self.bench_op.value}/s'
|
||||||
|
# f', estimate {target_time:.03f}s with {experiment_length}x{experiment_count} exps')
|
||||||
|
|
||||||
|
run_times = []
|
||||||
|
for _ in range(experiment_count):
|
||||||
|
start_time = time.time()
|
||||||
|
self.experiment(args, experiment_length, dtype, self.device)
|
||||||
|
run_times.append(time.time() - start_time)
|
||||||
|
experiment_times += run_times
|
||||||
|
experiment_names += [self.name(args)] * experiment_count
|
||||||
|
experiment_lengths += [experiment_length] * experiment_count
|
||||||
|
experiment_mop += [self.mop(args)] * experiment_count
|
||||||
|
|
||||||
|
total_time = np.array(run_times, dtype=np.float64).sum()
|
||||||
|
total_glop = self.mop(args) * experiment_length * experiment_count / 1000
|
||||||
|
print(f'Run {experiment_names[-1]} (x{experiment_length})'
|
||||||
|
f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS')
|
||||||
|
|
||||||
|
data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type)
|
||||||
|
plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform)
|
||||||
|
|
||||||
|
def save_experiments(
|
||||||
|
self, experiment_names: list[str], experiment_times: list[float],
|
||||||
|
experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame:
|
||||||
|
key = DataKey(self.bench_op)
|
||||||
|
data = pd.DataFrame(
|
||||||
|
{
|
||||||
|
key.experiment: experiment_names,
|
||||||
|
key.time: experiment_times,
|
||||||
|
key.count: experiment_lengths,
|
||||||
|
key.speed: [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
|
||||||
|
key.mop: experiment_mop,
|
||||||
|
key.gflops: [(mop * l) / (t * 1000.0)
|
||||||
|
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
|
||||||
|
})
|
||||||
|
data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t')
|
||||||
|
return data
|
||||||
|
|
|
||||||
36
src/common.py
Normal file
36
src/common.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Device(Enum):
|
||||||
|
CPU = 'cpu'
|
||||||
|
GPU = 'gpu'
|
||||||
|
|
||||||
|
|
||||||
|
class DataType(Enum):
|
||||||
|
FLOAT16 = 'float16'
|
||||||
|
FLOAT32 = 'float32'
|
||||||
|
FLOAT64 = 'float64'
|
||||||
|
|
||||||
|
|
||||||
|
class Op(Enum):
|
||||||
|
NO_OP = 'noop'
|
||||||
|
ADD = 'add'
|
||||||
|
DIV = 'div'
|
||||||
|
MUL = 'mul'
|
||||||
|
MATMUL = 'matmul'
|
||||||
|
|
||||||
|
|
||||||
|
class Platform(Enum):
|
||||||
|
TF1 = 'TF1'
|
||||||
|
TF2 = 'TF2'
|
||||||
|
TORCH = 'Torch'
|
||||||
|
|
||||||
|
|
||||||
|
class DataKey():
|
||||||
|
def __init__(self, bench_op: Op):
|
||||||
|
self.experiment = 'experiment'
|
||||||
|
self.time = 'run times (s)'
|
||||||
|
self.count = 'count'
|
||||||
|
self.mop = f'Mop/{bench_op.value}'
|
||||||
|
self.speed = f'ms/{bench_op.value}'
|
||||||
|
self.gflops = 'GFLOPS'
|
||||||
51
src/plot.py
Normal file
51
src/plot.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
|
||||||
|
from src.common import DataKey, DataType, Op, Platform
|
||||||
|
|
||||||
|
|
||||||
|
def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform):
|
||||||
|
key = DataKey(bench_op)
|
||||||
|
sum_data = data[[key.experiment, key.time, key.count]].groupby(
|
||||||
|
key.experiment, as_index=False, sort=False).sum()
|
||||||
|
mean_data = data[[key.experiment, key.speed]].groupby(
|
||||||
|
key.experiment, as_index=False, sort=False).mean()
|
||||||
|
max_data = data[[key.experiment, key.mop]].groupby(
|
||||||
|
key.experiment, as_index=False, sort=False).max()
|
||||||
|
|
||||||
|
sns.set_theme(style="ticks")
|
||||||
|
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
|
||||||
|
figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16)
|
||||||
|
for axe in axes[:-1]:
|
||||||
|
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
||||||
|
|
||||||
|
chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique())
|
||||||
|
axes[0].set_yscale("log")
|
||||||
|
for patch, value in zip(chart.patches, max_data[key.mop]):
|
||||||
|
chart.annotate(f'{value:0.3f}',
|
||||||
|
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1])
|
||||||
|
for patch, value in zip(chart.patches, mean_data[key.speed]):
|
||||||
|
chart.annotate(f'{value:.3f}',
|
||||||
|
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2])
|
||||||
|
for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]):
|
||||||
|
chart.annotate(f'{(mop * count / 1000) / value:.3f}',
|
||||||
|
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
|
||||||
|
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
||||||
|
textcoords='offset points')
|
||||||
|
|
||||||
|
plt.xticks(rotation=20)
|
||||||
|
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
|
||||||
|
plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png')
|
||||||
33
src/pytorch/add.py
Normal file
33
src/pytorch/add.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.pytorch.base import TorchBase
|
||||||
|
|
||||||
|
|
||||||
|
class TorchAddBench(TorchBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.ADD)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 + tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
39
src/pytorch/base.py
Normal file
39
src/pytorch/base.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.base import BenchBase
|
||||||
|
from src.common import DataType, Device, Op, Platform
|
||||||
|
|
||||||
|
|
||||||
|
class TorchBase(BenchBase):
|
||||||
|
def __init__(self, output_path: Path, bench_op: Op):
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
if torch.cuda.device_count() > 1:
|
||||||
|
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
|
||||||
|
torch.backends.cudnn.benchmark = True
|
||||||
|
device_type = Device.GPU
|
||||||
|
device = torch.device('cuda:0')
|
||||||
|
else:
|
||||||
|
device_type = Device.CPU
|
||||||
|
device = torch.device('cpu')
|
||||||
|
|
||||||
|
super().__init__(output_path, Platform.TORCH, bench_op, device_type, device)
|
||||||
|
|
||||||
|
def get_dtype(self, data_type: DataType) -> torch.dtype:
|
||||||
|
if data_type == DataType.FLOAT16:
|
||||||
|
return torch.float16
|
||||||
|
if data_type == DataType.FLOAT32:
|
||||||
|
return torch.float32
|
||||||
|
if data_type == DataType.FLOAT64:
|
||||||
|
return torch.float64
|
||||||
|
raise NotImplementedError(f'data_type {data_type.value} not implemented')
|
||||||
|
|
||||||
|
def experiment(self, _experiment_args, _length, _dtype, _device):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def name(self, _experiment_args) -> str:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def mop(self, _experiment_args) -> float:
|
||||||
|
raise NotImplementedError()
|
||||||
33
src/pytorch/div.py
Normal file
33
src/pytorch/div.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.pytorch.base import TorchBase
|
||||||
|
|
||||||
|
|
||||||
|
class TorchDivBench(TorchBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.DIV)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 / tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
33
src/pytorch/matmul.py
Normal file
33
src/pytorch/matmul.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.pytorch.base import TorchBase
|
||||||
|
|
||||||
|
|
||||||
|
class TorchMatmulBench(TorchBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.MATMUL)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 @ tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
33
src/pytorch/mul.py
Normal file
33
src/pytorch/mul.py
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.pytorch.base import TorchBase
|
||||||
|
|
||||||
|
|
||||||
|
class TorchMulBench(TorchBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.MUL)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 * tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
16
src/pytorch/ops.py
Normal file
16
src/pytorch/ops.py
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
from src.common import Op
|
||||||
|
from src.pytorch.add import TorchAddBench
|
||||||
|
from src.pytorch.base import TorchBase
|
||||||
|
from src.pytorch.div import TorchDivBench
|
||||||
|
from src.pytorch.mul import TorchMulBench
|
||||||
|
from src.pytorch.matmul import TorchMatmulBench
|
||||||
|
|
||||||
|
|
||||||
|
torch_ops: dict[Op, Type[TorchBase]] = {
|
||||||
|
Op.ADD: TorchAddBench,
|
||||||
|
Op.MUL: TorchMulBench,
|
||||||
|
Op.DIV: TorchDivBench,
|
||||||
|
Op.MATMUL: TorchMatmulBench
|
||||||
|
}
|
||||||
34
src/tf_2/add.py
Normal file
34
src/tf_2/add.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.tf_2.base import TFBase
|
||||||
|
|
||||||
|
|
||||||
|
class TFAddBench(TFBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.ADD)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
with device:
|
||||||
|
tensor_1 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
tensor_2 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 + tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
43
src/tf_2/base.py
Normal file
43
src/tf_2/base.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from src.base import BenchBase
|
||||||
|
from src.common import DataType, Device, Op, Platform
|
||||||
|
|
||||||
|
|
||||||
|
class TFBase(BenchBase):
|
||||||
|
def __init__(self, output_path: Path, bench_op: Op):
|
||||||
|
gpus = tf.config.list_physical_devices('GPU')
|
||||||
|
if gpus:
|
||||||
|
if len(gpus) > 1:
|
||||||
|
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
|
||||||
|
|
||||||
|
tf.config.experimental.set_memory_growth(gpus[0], True)
|
||||||
|
tf.config.set_visible_devices(gpus[0], 'GPU')
|
||||||
|
# logical_gpus = tf.config.list_logical_devices('GPU')
|
||||||
|
device_type = Device.GPU
|
||||||
|
device = tf.device('/GPU:0')
|
||||||
|
else:
|
||||||
|
device_type = Device.CPU
|
||||||
|
device = tf.device('/CPU:0')
|
||||||
|
|
||||||
|
super().__init__(output_path, Platform.TF2, bench_op, device_type, device)
|
||||||
|
|
||||||
|
def get_dtype(self, data_type: DataType) -> tf.DType:
|
||||||
|
if data_type == DataType.FLOAT16:
|
||||||
|
return tf.float16
|
||||||
|
if data_type == DataType.FLOAT32:
|
||||||
|
return tf.float32
|
||||||
|
if data_type == DataType.FLOAT64:
|
||||||
|
return tf.float64
|
||||||
|
raise RuntimeError(f'data_type {data_type.value} not implemented')
|
||||||
|
|
||||||
|
def experiment(self, _experiment_args, _length, _dtype, _device):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def name(self, _experiment_args) -> str:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def mop(self, _experiment_args) -> float:
|
||||||
|
raise NotImplementedError()
|
||||||
34
src/tf_2/div.py
Normal file
34
src/tf_2/div.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.tf_2.base import TFBase
|
||||||
|
|
||||||
|
|
||||||
|
class TFDivBench(TFBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.DIV)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
with device:
|
||||||
|
tensor_1 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
tensor_2 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 / tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
34
src/tf_2/matmul.py
Normal file
34
src/tf_2/matmul.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.tf_2.base import TFBase
|
||||||
|
|
||||||
|
|
||||||
|
class TFMatmulBench(TFBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.MATMUL)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
with device:
|
||||||
|
tensor_1 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
tensor_2 = tf.ones(shape_2, dtype=dtype)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 @ tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1, shape_2 = experiment_args
|
||||||
|
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
34
src/tf_2/mul.py
Normal file
34
src/tf_2/mul.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from src.common import DataType, Op
|
||||||
|
from src.tf_2.base import TFBase
|
||||||
|
|
||||||
|
|
||||||
|
class TFMulBench(TFBase):
|
||||||
|
def __init__(self, output_path: Path):
|
||||||
|
super().__init__(output_path, Op.MUL)
|
||||||
|
|
||||||
|
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
|
||||||
|
shape_1 = experiment_args
|
||||||
|
with device:
|
||||||
|
tensor_1 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
tensor_2 = tf.ones(shape_1, dtype=dtype)
|
||||||
|
|
||||||
|
for _ in range(length):
|
||||||
|
_ = tensor_1 * tensor_2
|
||||||
|
|
||||||
|
def name(self, experiment_args: tuple[int, int]) -> str:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
|
||||||
|
|
||||||
|
def mop(self, experiment_args: tuple[int, int]) -> float:
|
||||||
|
shape_1 = experiment_args
|
||||||
|
return shape_1[0] * shape_1[1] / 1000_000
|
||||||
|
|
||||||
|
def run(self,
|
||||||
|
experiment_args: list[tuple[int, int]],
|
||||||
|
experiment_count: int,
|
||||||
|
data_type: DataType):
|
||||||
|
super().run(experiment_args, experiment_count, data_type)
|
||||||
16
src/tf_2/ops.py
Normal file
16
src/tf_2/ops.py
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
from typing import Type
|
||||||
|
|
||||||
|
from src.common import Op
|
||||||
|
from src.tf_2.add import TFAddBench
|
||||||
|
from src.tf_2.base import TFBase
|
||||||
|
from src.tf_2.div import TFDivBench
|
||||||
|
from src.tf_2.mul import TFMulBench
|
||||||
|
from src.tf_2.matmul import TFMatmulBench
|
||||||
|
|
||||||
|
|
||||||
|
tf2_ops: dict[Op, Type[TFBase]] = {
|
||||||
|
Op.ADD: TFAddBench,
|
||||||
|
Op.MUL: TFMulBench,
|
||||||
|
Op.DIV: TFDivBench,
|
||||||
|
Op.MATMUL: TFMatmulBench
|
||||||
|
}
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from src.base import Base, Device
|
|
||||||
from src.utils import get_cpu_name, get_nvidia_name
|
|
||||||
|
|
||||||
|
|
||||||
class TorchBase(Base):
|
|
||||||
def __init__(self, output_path: Path):
|
|
||||||
super().__init__(output_path)
|
|
||||||
|
|
||||||
self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
if torch.cuda.device_count() > 1:
|
|
||||||
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
|
|
||||||
self.set_output_path(Device.GPU, get_nvidia_name())
|
|
||||||
torch.backends.cudnn.benchmark = True
|
|
||||||
else:
|
|
||||||
self.set_output_path(Device.CPU, get_cpu_name())
|
|
||||||
|
|
||||||
if not self.output_path.exists():
|
|
||||||
self.output_path.mkdir(parents=True)
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
import time
|
|
||||||
|
|
||||||
from src.base import DataType
|
|
||||||
from src.torch.base import TorchBase
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
import seaborn as sns
|
|
||||||
import torch
|
|
||||||
|
|
||||||
|
|
||||||
class TorchMatmulBench(TorchBase):
|
|
||||||
|
|
||||||
def run(self,
|
|
||||||
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
|
|
||||||
experiment_count: int,
|
|
||||||
data_type: DataType):
|
|
||||||
sns.set_theme(style="ticks")
|
|
||||||
|
|
||||||
dtype = None
|
|
||||||
if data_type == DataType.FLOAT16:
|
|
||||||
dtype = torch.float16
|
|
||||||
elif data_type == DataType.FLOAT32:
|
|
||||||
dtype = torch.float32
|
|
||||||
elif data_type == DataType.FLOAT64:
|
|
||||||
dtype = torch.float64
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f'data_type {data_type.value} not implemented')
|
|
||||||
print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
|
|
||||||
|
|
||||||
experiment_names = []
|
|
||||||
experiment_lengths = []
|
|
||||||
experiment_times = []
|
|
||||||
experiment_mop = []
|
|
||||||
for shape_1, shape_2 in experiment_args:
|
|
||||||
tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
|
|
||||||
tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
|
|
||||||
|
|
||||||
# warmup
|
|
||||||
for _ in range(20):
|
|
||||||
_ = tensor_1 @ tensor_2
|
|
||||||
|
|
||||||
# speed evalutaion
|
|
||||||
counter = 0
|
|
||||||
start_time = time.time()
|
|
||||||
while(time.time() - start_time < 0.2):
|
|
||||||
_ = tensor_1 @ tensor_2
|
|
||||||
counter += 1
|
|
||||||
end_time = time.time()
|
|
||||||
|
|
||||||
target_time = 0.5 / experiment_count # in s
|
|
||||||
experiment_speed = counter / (end_time - start_time) # in op/s
|
|
||||||
experiment_length = max(int(target_time * experiment_speed), 2)
|
|
||||||
|
|
||||||
run_times = []
|
|
||||||
for _ in range(experiment_count):
|
|
||||||
start_time = time.time()
|
|
||||||
for _ in range(experiment_length):
|
|
||||||
_ = tensor_1 @ tensor_2
|
|
||||||
run_times.append(time.time() - start_time)
|
|
||||||
experiment_times += run_times
|
|
||||||
experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
|
|
||||||
experiment_lengths += [experiment_length] * experiment_count
|
|
||||||
experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
|
|
||||||
print(f'Run {experiment_names[-1]} (x{experiment_length})'
|
|
||||||
f' in {experiment_times[-1] * 1000:0.1f}ms')
|
|
||||||
|
|
||||||
data = pd.DataFrame(
|
|
||||||
{
|
|
||||||
'run times (s)': experiment_times,
|
|
||||||
'count': experiment_lengths,
|
|
||||||
'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
|
|
||||||
'Mop/matmul': experiment_mop,
|
|
||||||
'GFLOPS': [(mop * l) / (t * 1000.0)
|
|
||||||
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
|
|
||||||
},
|
|
||||||
index=pd.Index(experiment_names, name='experiment'))
|
|
||||||
data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
|
|
||||||
|
|
||||||
mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
|
|
||||||
max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
|
|
||||||
|
|
||||||
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
|
|
||||||
figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
|
|
||||||
for axe in axes[:-1]:
|
|
||||||
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
|
||||||
|
|
||||||
chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
|
|
||||||
axes[0].set_yscale("log")
|
|
||||||
for p, value in zip(chart.patches, max_data['Mop/matmul']):
|
|
||||||
chart.annotate(f'{value:0.3f}',
|
|
||||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
|
||||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
|
||||||
textcoords='offset points')
|
|
||||||
|
|
||||||
chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
|
|
||||||
for p, value in zip(chart.patches, mean_data['ms/matmul']):
|
|
||||||
chart.annotate(f'{value:.3f}',
|
|
||||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
|
||||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
|
||||||
textcoords='offset points')
|
|
||||||
|
|
||||||
chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
|
|
||||||
for p, value in zip(chart.patches, mean_data['GFLOPS']):
|
|
||||||
chart.annotate(f'{value:.3f}',
|
|
||||||
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
|
||||||
ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
|
|
||||||
textcoords='offset points')
|
|
||||||
|
|
||||||
plt.xticks(rotation=20)
|
|
||||||
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
|
|
||||||
plt.savefig(self.output_path / f'matmul_{data_type.value}.png')
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue