Jax implementation, code factorisation

* Compatibility for older python version (typing)
This commit is contained in:
Corentin 2021-10-01 20:14:00 +09:00
commit 16b7239cd7
37 changed files with 1007 additions and 293 deletions

View file

@ -2,28 +2,46 @@ from argparse import ArgumentParser
import multiprocessing as mp
import os
from pathlib import Path
from typing import Type
import sys
from typing import List, Type
from config.benchmark import Config
from src.base import BenchBase
from src.common import DataType, Op, Platform
from src.plot import compare
def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
bench_args, bench_count: int):
if platform == Platform.TF2:
if platform == Platform.JAX:
from src.jax.ops import jax_ops
if bench_op not in jax_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
jax_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
elif platform == Platform.TF2:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from src.tf_2.ops import tf2_ops
if bench_op not in tf2_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
tf2_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
elif platform == Platform.TF2_V1:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from src.tf_2_v1.ops import tf2v1_ops
if bench_op not in tf2v1_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
tf2v1_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
elif platform == Platform.TORCH:
from src.pytorch.ops import torch_ops
if bench_op not in torch_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
torch_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
else:
print(f'Platform {platform.value} is not implemented yet')
@ -32,6 +50,8 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be
def main():
parser = ArgumentParser()
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
parser.add_argument('--no-benchmark', action='store_true', default=False, help='Avoid running benchmarks')
parser.add_argument('--no-compare', action='store_true', default=False, help='Avoid running platform comparaison')
parser.add_argument('--count', type=int, default=30,
help='Number of experiments per benchmark (for stastistical analysis)')
parser.add_argument('--platform', nargs='*', type=Platform,
@ -39,62 +59,56 @@ def main():
parser.add_argument('--data', nargs='*', type=DataType,
help='List of data type to benchmark [float16, float32, float64] (else all are used)')
parser.add_argument('--op', nargs='*', type=Op,
help='List of operation to benchmark [add, mul, div, matmul] (else all are used)')
help='List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)')
parser.add_argument('--list-op', action='store_true',
help='List all possible operation to benchmark (no further action will be done)')
parser.add_argument(
'--experiment-time', type=float,
help=f'Change time (in s) per experiment (default={Config.EXPERIMENT_TIME:0.3f}s)')
arguments = parser.parse_args()
if arguments.list_op:
print(', '.join([op.value for op in Op]))
sys.exit(0)
output_path: Path = arguments.output
no_benchmark: bool = arguments.no_benchmark
no_compare: bool = arguments.no_compare
bench_count: int = arguments.count
platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
data: list[DataType] = arguments.data if arguments.data is not None else list(DataType)
bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op)
platforms: List[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
data: List[DataType] = arguments.data if arguments.data is not None else list(DataType)
bench_ops: List[Op] = arguments.op if arguments.op is not None else list(Op)
if arguments.experiment_time:
Config.EXPERIMENT_TIME = arguments.experiment_time
if not output_path.exists():
output_path.mkdir(parents=True)
benchmarks: list[dict[Op, Type[BenchBase]]] = []
element_wise_args = [
(100, 100),
(100, 200),
(128, 128),
(200, 100),
(200, 200),
(256, 256),
(256, 512),
(512, 256),
(400, 400),
(512, 512),
(800, 800),
(1024, 1024),
(1800, 1800)]
matmul_args = [
((100, 100), (100, 100)),
((100, 200), (200, 100)),
((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200))]
if not no_benchmark:
benchmarks: List[dict[Op, Type[BenchBase]]] = []
for platform in platforms:
for data_type in data:
for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op,
Config.ELEMENT_WISE_ARGS, bench_count))
for bench_op in [Op.MATMUL, Op.NN_MATMUL]:
if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
if Op.NN_DENSE in bench_ops:
benchmarks.append((output_path, platform, data_type, Op.NN_DENSE, Config.NN_1D_ARGS, bench_count))
for platform in platforms:
for data_type in data:
for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count))
if Op.MATMUL in bench_ops:
benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count))
if benchmarks:
for benchmark in benchmarks:
process = mp.Process(target=run_benchmark, args=benchmark)
process.start()
process.join()
print('Benchmark done')
for benchmark in benchmarks:
process = mp.Process(target=run_benchmark, args=benchmark)
process.start()
process.join()
print('Benchmark done')
if not no_compare:
compare(output_path)
print('Compare done')
if __name__ == '__main__':

41
config/benchmark.py Normal file
View file

@ -0,0 +1,41 @@
class Config:
EXPERIMENT_TIME = 1.0
ELEMENT_WISE_ARGS = [
(100, 100),
(100, 200),
(128, 128),
(200, 100),
(200, 200),
(256, 256),
(256, 512),
(512, 256),
(400, 400),
(512, 512),
(800, 800),
(1024, 1024),
(1800, 1800)]
MATMUL_ARGS = [
((100, 100), (100, 100)),
((100, 200), (200, 100)),
((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200))]
NN_1D_ARGS = [
(1, 16), (16, 16), (64, 16),
(1, 64), (16, 64),
(1, 150), (16, 150),
(1, 256), (16, 256),
(1, 400), (16, 400), (64, 400),
(1, 512), (16, 512), (64, 512),
(1, 800), (16, 800), (64, 800),
(1, 1024), (16, 1024),
(1, 2000), (16, 2000), (64, 2000),
(1, 4000), (16, 4000), (64, 4000)]

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
matplotlib
seaborn
tensorflow
torch

View file

@ -1,69 +1,71 @@
from pathlib import Path
import time
from typing import List
import numpy as np
import pandas as pd
from config.benchmark import Config
from src.common import DataKey, DataType, Device, Op, Platform
from src.plot import plot_experiments
from src.op_info import op_infos
from src.utils import get_cpu_name, get_nvidia_name
class BenchBase():
def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device):
def __init__(self, output_path: Path, platform: Platform, bench_op: Op,
device_type: Device, device,
data_type: DataType, dtype):
self._base_output_path = output_path
self.output_path = output_path
self.platform = platform
self.bench_op = bench_op
self.device_type = device_type
self.device = device
self.dtype = None
self.device_name = get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name()
self.data_type = data_type
self.dtype = dtype
self.info = op_infos[bench_op]
def set_output_path(self, device: Device, device_name: str):
self.output_path = (
self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value)
self._base_output_path / f'{self.device_type.value}_{self.device_name}'
/ self.platform.value / self.bench_op.value) # noqa
def get_dtype(self, data_type: DataType):
def pre_experiment(self, _experiment_args):
pass
def experiment(self):
raise NotImplementedError()
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()
def run(self, experiment_args, experiment_count: int, data_type: DataType):
self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name())
def post_experiment(self):
pass
def run(self, experiment_args, experiment_count: int):
if not self.output_path.exists():
self.output_path.mkdir(parents=True)
dtype = self.get_dtype(data_type)
print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}')
print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark'
f' with data type: {self.data_type.value}')
experiment_names = []
experiment_lengths = []
experiment_times = []
experiment_mop = []
for args in experiment_args:
self.pre_experiment(args)
# warmup
for _ in range(4):
self.experiment(args, 5, dtype, self.device)
for _ in range(20):
self.experiment()
# speed evalutaion
counter = 0
start_time = time.time()
while time.time() - start_time < 0.2:
self.experiment(args, 10, dtype, self.device)
counter += 10
while (time.time() - start_time) < (Config.EXPERIMENT_TIME / 5):
self.experiment()
counter += 1
end_time = time.time()
target_time = 1.0 # in s
target_time = Config.EXPERIMENT_TIME # in s
experiment_speed = counter / (end_time - start_time) # in op/s
experiment_length = max(int(target_time / experiment_count * experiment_speed), 2)
# print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s'
@ -73,24 +75,28 @@ class BenchBase():
run_times = []
for _ in range(experiment_count):
start_time = time.time()
self.experiment(args, experiment_length, dtype, self.device)
for _ in range(experiment_length):
self.experiment()
run_times.append(time.time() - start_time)
experiment_times += run_times
experiment_names += [self.name(args)] * experiment_count
experiment_names += [self.info.name(args)] * experiment_count
experiment_lengths += [experiment_length] * experiment_count
experiment_mop += [self.mop(args)] * experiment_count
experiment_mop += [self.info.mop(args)] * experiment_count
total_time = np.array(run_times, dtype=np.float64).sum()
total_glop = self.mop(args) * experiment_length * experiment_count / 1000
total_glop = self.info.mop(args) * experiment_length * experiment_count / 1000
print(f'Run {experiment_names[-1]} (x{experiment_length})'
f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS')
self.post_experiment()
data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type)
plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform)
data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop)
# Avoid circular import
from src.plot import plot_experiments # pylint: disable=import-outside-toplevel
plot_experiments(self, data)
def save_experiments(
self, experiment_names: list[str], experiment_times: list[float],
experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame:
self, experiment_names: List[str], experiment_times: List[float],
experiment_lengths: List[int], experiment_mop: List[float]) -> pd.DataFrame:
key = DataKey(self.bench_op)
data = pd.DataFrame(
{
@ -102,5 +108,5 @@ class BenchBase():
key.gflops: [(mop * l) / (t * 1000.0)
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
})
data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t')
data.to_csv(self.output_path / f'{self.bench_op.value}_{self.data_type.value}.csv', sep='\t')
return data

View file

@ -13,16 +13,19 @@ class DataType(Enum):
class Op(Enum):
NO_OP = 'noop'
ADD = 'add'
DIV = 'div'
MUL = 'mul'
MATMUL = 'matmul'
NN_MATMUL = 'nn_matmul'
NN_DENSE = 'nn_dense'
class Platform(Enum):
TF1 = 'TF1'
JAX = 'jax'
# TF1 = 'TF1'
TF2 = 'TF2'
TF2_V1 = 'TF2_V1'
TORCH = 'Torch'

25
src/jax/add.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxAddBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready()

34
src/jax/base.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import jax.numpy as jnp
import jax
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class JaxBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
gpu_devices = jax.devices('gpu')
if gpu_devices:
if len(gpu_devices) > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
device_type = Device.GPU
device = gpu_devices[0]
else:
device_type = Device.CPU
device = jax.devices('cpu')[0]
if data_type == DataType.FLOAT16:
dtype = jnp.float16
elif data_type == DataType.FLOAT32:
dtype = jnp.float32
elif data_type == DataType.FLOAT64:
dtype = jnp.float64
else:
raise NotImplementedError(f'data_type {data_type.value} not implemented')
super().__init__(output_path, Platform.JAX, bench_op, device_type, device, data_type, dtype)
def experiment(self):
raise NotImplementedError()

25
src/jax/div.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxDivBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready()

28
src/jax/matmul.py Normal file
View file

@ -0,0 +1,28 @@
from pathlib import Path
from typing import List, Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxMatmulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype))
self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready()
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

25
src/jax/mul.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxMulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready()

32
src/jax/nn_dense.py Normal file
View file

@ -0,0 +1,32 @@
from pathlib import Path
from typing import Callable, List, Tuple
from jax import device_put, jit, random
from jax.experimental import stax
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxNNDenseBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
self.network: Callable = None
self.params = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
self.tensor = device_put(jnp.ones((batch_size, dimension), dtype=self.dtype))
network_init, self.network = stax.Dense(dimension)
_, self.params = network_init(random.PRNGKey(1), (batch_size, dimension))
self.network = jit(self.network)
self.tensor_result = self.network(self.params, self.tensor)
def experiment(self):
self.tensor_result = self.network(self.params, self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

33
src/jax/nn_matmul.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
from typing import List, Tuple
from jax import device_put, jit
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
def matmul(tensor_1: jnp.DeviceArray, tensor_2: jnp.DeviceArray) -> jnp.DeviceArray:
return tensor_1 @ tensor_2
class JaxNNMatmulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
self.network = jit(matmul)
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype))
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

20
src/jax/ops.py Normal file
View file

@ -0,0 +1,20 @@
from typing import Type
from src.common import Op
from src.jax.add import JaxAddBench
from src.jax.base import JaxBase
from src.jax.div import JaxDivBench
from src.jax.mul import JaxMulBench
from src.jax.matmul import JaxMatmulBench
from src.jax.nn_dense import JaxNNDenseBench
from src.jax.nn_matmul import JaxNNMatmulBench
jax_ops: dict[Op, Type[JaxBase]] = {
Op.ADD: JaxAddBench,
Op.MUL: JaxMulBench,
Op.DIV: JaxDivBench,
Op.MATMUL: JaxMatmulBench,
Op.NN_MATMUL: JaxNNMatmulBench,
Op.NN_DENSE: JaxNNDenseBench
}

85
src/op_info.py Normal file
View file

@ -0,0 +1,85 @@
from typing import Dict, List, Type, Tuple
from src.common import Op
class _BaseInfo():
@staticmethod
def name(experiment_args) -> str:
raise NotImplementedError()
@staticmethod
def mop(experiment_args) -> float:
raise NotImplementedError()
class AddInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class DivInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class MulInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class MatmulInfo(_BaseInfo):
@staticmethod
def name(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
@staticmethod
def mop(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1_000_000) * 2 * (shape_1[1] - 1)
class DenseInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
batch_size, dimension = experiment_args
return f'Dense(({batch_size}x{dimension}))'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
batch_size, dimension = experiment_args
return batch_size * (
((dimension * dimension / 1_000_000) * 2 * (dimension - 1)) + (
dimension / 1_000_000))
op_infos: Dict[Op, Type[_BaseInfo]] = {
Op.ADD: AddInfo,
Op.DIV: DivInfo,
Op.MUL: MulInfo,
Op.MATMUL: MatmulInfo,
Op.NN_MATMUL: MatmulInfo,
Op.NN_DENSE: DenseInfo
}

View file

@ -1,4 +1,7 @@
from pathlib import Path
import math
import multiprocessing as mp
import os
import numpy as np
import matplotlib.pyplot as plt
@ -6,11 +9,20 @@ import pandas as pd
import seaborn as sns
from src.base import BenchBase
from src.common import DataKey, DataType, Op, Platform
def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform):
key = DataKey(bench_op)
class CompKey:
def __init__(self):
self.data_type = 'data_type'
self.device = 'device'
self.bench_op = 'op'
self.platform = 'platform'
def plot_experiments(bench: BenchBase, data: pd.DataFrame):
key = DataKey(bench.bench_op)
sum_data = data[[key.experiment, key.time, key.count]].groupby(
key.experiment, as_index=False, sort=False).sum()
mean_data = data[[key.experiment, key.speed]].groupby(
@ -20,12 +32,12 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
sns.set_theme(style="ticks")
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16)
for axe in axes[:-1]:
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique())
axes[0].set_yscale("log")
if max_data[key.mop].max() > max_data[key.mop].min() * 100:
axes[0].set_yscale("log")
for patch, value in zip(chart.patches, max_data[key.mop]):
chart.annotate(f'{value:0.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
@ -33,6 +45,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1])
if data[key.speed].max() > data[key.speed].min() * 100:
axes[1].set_yscale("log")
for patch, value in zip(chart.patches, mean_data[key.speed]):
chart.annotate(f'{value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
@ -40,6 +54,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2])
if data[key.gflops].max() > data[key.gflops].min() * 100:
axes[2].set_yscale("log")
for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]):
chart.annotate(f'{(mop * count / 1000) / value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
@ -47,5 +63,62 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points')
plt.xticks(rotation=20)
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png')
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.91, right=0.99, bottom=0.1, left=0.05)
figure.suptitle(f'{bench.platform.value} {bench.bench_op.value} ({bench.data_type.value})', fontsize=16)
axes[0].set_title(f'{bench.device_name}', fontsize=12)
plt.savefig(bench.output_path / f'{bench.bench_op.value}_{bench.data_type.value}.png')
def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path):
op_data = all_data[(all_data[comp_key.bench_op] == bench_op) & (all_data[comp_key.device] == device)]
platform_list = op_data[comp_key.platform].unique()
if len(platform_list) <= 1:
return
key = DataKey(Op(bench_op))
sns.set_theme(style="ticks")
for data_type in op_data[comp_key.data_type].unique():
data = op_data[op_data[comp_key.data_type] == data_type]
graph = sns.catplot(x=key.experiment, y=key.gflops, hue=comp_key.platform, data=data,
kind='bar', estimator=np.median, height=8, aspect=1.4)
if data[key.gflops].max() > data[key.gflops].min() * 100:
graph.set(yscale="log")
plt.xticks(rotation=70, fontsize=8)
plt.subplots_adjust(top=0.92, bottom=0.25)
plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
plt.title(f'{device}', fontsize=12)
plt.savefig(output_path / device / f'{bench_op}_{data_type}.png')
def compare(output_path: Path):
all_data: pd.DataFrame = None
comp_key = CompKey()
for data_path in output_path.rglob('*.csv'):
if len(data_path.parents) <= 4:
print(f'Warning: cannot parse data at path {data_path} (subfolders missing)')
data_type = DataType(data_path.stem.split('_')[-1])
bench_op = Op(data_path.parents[0].name)
platform = Platform(data_path.parents[1].name)
device_name = data_path.parents[2].name
current_data = pd.read_csv(data_path, sep='\t')
current_data[comp_key.data_type] = data_type.value
current_data[comp_key.bench_op] = bench_op.value
current_data[comp_key.platform] = platform.value
current_data[comp_key.device] = device_name
if all_data is None:
all_data = current_data
else:
all_data = all_data.append(current_data, ignore_index=True, verify_integrity=True)
# Compare between platforms
comp_args = []
for device in all_data[comp_key.device].unique():
for bench_op in all_data[comp_key.bench_op].unique():
comp_args.append((all_data, comp_key, device, bench_op, output_path))
with mp.Pool(processes=math.ceil(os.cpu_count() * 0.8)) as pool:
pool.starmap(_draw_comparison, comp_args)

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchAddBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.ADD)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 + self.tensor_2
for _ in range(length):
_ = tensor_1 + tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 + self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -7,7 +7,7 @@ from src.common import DataType, Device, Op, Platform
class TorchBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
if torch.cuda.is_available():
if torch.cuda.device_count() > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
@ -18,22 +18,16 @@ class TorchBase(BenchBase):
device_type = Device.CPU
device = torch.device('cpu')
super().__init__(output_path, Platform.TORCH, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> torch.dtype:
if data_type == DataType.FLOAT16:
return torch.float16
if data_type == DataType.FLOAT32:
return torch.float32
if data_type == DataType.FLOAT64:
return torch.float64
raise NotImplementedError(f'data_type {data_type.value} not implemented')
dtype = torch.float16
elif data_type == DataType.FLOAT32:
dtype = torch.float32
elif data_type == DataType.FLOAT64:
dtype = torch.float64
else:
raise NotImplementedError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
super().__init__(output_path, Platform.TORCH, bench_op, device_type, device, data_type, dtype)
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
def experiment(self):
raise NotImplementedError()

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchDivBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.DIV)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 / self.tensor_2
for _ in range(length):
_ = tensor_1 / tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 / self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchMatmulBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MATMUL)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False)
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 @ self.tensor_2
for _ in range(length):
_ = tensor_1 @ tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 @ self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchMulBench(TorchBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MUL)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 * self.tensor_2
for _ in range(length):
_ = tensor_1 * tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 * self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

36
src/pytorch/nn_dense.py Normal file
View file

@ -0,0 +1,36 @@
from pathlib import Path
from typing import List, Tuple
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class DenseNetwork(torch.nn.Module):
def __init__(self, input_dim: int, dtype: torch.dtype):
super().__init__()
self.dense = torch.nn.Linear(input_dim, input_dim, dtype=dtype)
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
return self.dense(input_data)
class TorchNNDenseBench(TorchBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: torch.Tensor = None
self.tensor_result: torch.Tensor = None
self.network: torch.nn.Module = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
self.tensor = torch.ones((batch_size, dimension), dtype=self.dtype, device=self.device, requires_grad=False)
self.network = DenseNetwork(dimension, self.dtype).to(self.device)
self.tensor_result = self.network(self.tensor)
def experiment(self):
self.tensor_result = self.network(self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

34
src/pytorch/nn_matmul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
from typing import List, Tuple
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class MatMulNetwork(torch.nn.Module):
def forward(self, input_1: torch.Tensor, input_2: torch.Tensor) -> torch.Tensor:
return input_1 @ input_2
class TorchNNMatmulBench(TorchBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
self.network: torch.nn.Module = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False)
self.network = MatMulNetwork()
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -6,11 +6,15 @@ from src.pytorch.base import TorchBase
from src.pytorch.div import TorchDivBench
from src.pytorch.mul import TorchMulBench
from src.pytorch.matmul import TorchMatmulBench
from src.pytorch.nn_dense import TorchNNDenseBench
from src.pytorch.nn_matmul import TorchNNMatmulBench
torch_ops: dict[Op, Type[TorchBase]] = {
Op.ADD: TorchAddBench,
Op.MUL: TorchMulBench,
Op.DIV: TorchDivBench,
Op.MATMUL: TorchMatmulBench
Op.MATMUL: TorchMatmulBench,
Op.NN_MATMUL: TorchNNMatmulBench,
Op.NN_DENSE: TorchNNDenseBench
}

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFAddBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.ADD)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 + self.tensor_2
for _ in range(length):
_ = tensor_1 + tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 + self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -7,13 +7,13 @@ from src.common import DataType, Device, Op, Platform
class TFBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
gpus = tf.config.list_physical_devices('GPU')
if gpus:
if len(gpus) > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
tf.config.experimental.set_memory_growth(gpus[0], True)
# tf.config.experimental.set_memory_growth(gpus[0], True)
tf.config.set_visible_devices(gpus[0], 'GPU')
# logical_gpus = tf.config.list_logical_devices('GPU')
device_type = Device.GPU
@ -22,22 +22,16 @@ class TFBase(BenchBase):
device_type = Device.CPU
device = tf.device('/CPU:0')
super().__init__(output_path, Platform.TF2, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> tf.DType:
if data_type == DataType.FLOAT16:
return tf.float16
if data_type == DataType.FLOAT32:
return tf.float32
if data_type == DataType.FLOAT64:
return tf.float64
raise RuntimeError(f'data_type {data_type.value} not implemented')
dtype = tf.float16
elif data_type == DataType.FLOAT32:
dtype = tf.float32
elif data_type == DataType.FLOAT64:
dtype = tf.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device):
raise NotImplementedError()
super().__init__(output_path, Platform.TF2, bench_op, device_type, device, data_type, dtype)
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
def experiment(self):
raise NotImplementedError()

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFDivBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.DIV)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 / self.tensor_2
for _ in range(length):
_ = tensor_1 / tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 / self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFMatmulBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MATMUL)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_2, dtype=dtype)
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_2, dtype=self.dtype)
self.tensor_result = self.tensor_1 @ self.tensor_2
for _ in range(length):
_ = tensor_1 @ tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 @ self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFMulBench(TFBase):
def __init__(self, output_path: Path):
super().__init__(output_path, Op.MUL)
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
with device:
tensor_1 = tf.ones(shape_1, dtype=dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype)
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 * self.tensor_2
for _ in range(length):
_ = tensor_1 * tensor_2
def experiment(self):
self.tensor_result = self.tensor_1 * self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

35
src/tf_2/nn_dense.py Normal file
View file

@ -0,0 +1,35 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class DenseModel(tf.keras.Model):
def __init__(self, input_dim: int, dtype=tf.DType):
super().__init__()
self.dense = tf.keras.layers.Dense(input_dim, dtype=dtype)
def call(self, input_tensor: tf.Tensor) -> tf.Tensor:
return self.dense(input_tensor)
class TFNNDenseBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: tf.Tensor = None
self.network: tf.keras.Model = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
with self.device:
self.tensor = tf.ones((batch_size, dimension), dtype=self.dtype)
self.network = DenseModel(dimension, self.dtype)
def experiment(self):
self.network(self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

34
src/tf_2/nn_matmul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class MatmulModel(tf.keras.Model):
def call(self, tensor_1: tf.Tensor, tensor_2: tf.Tensor) -> tf.Tensor:
return tf.matmul(tensor_1, tensor_2)
class TFNNMatmulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
self.network: tf.keras.Model = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_2, dtype=self.dtype)
self.network = MatmulModel()
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -6,11 +6,15 @@ from src.tf_2.base import TFBase
from src.tf_2.div import TFDivBench
from src.tf_2.mul import TFMulBench
from src.tf_2.matmul import TFMatmulBench
from src.tf_2.nn_dense import TFNNDenseBench
from src.tf_2.nn_matmul import TFNNMatmulBench
tf2_ops: dict[Op, Type[TFBase]] = {
Op.ADD: TFAddBench,
Op.MUL: TFMulBench,
Op.DIV: TFDivBench,
Op.MATMUL: TFMatmulBench
Op.MATMUL: TFMatmulBench,
Op.NN_MATMUL: TFNNMatmulBench,
Op.NN_DENSE: TFNNDenseBench
}

30
src/tf_2_v1/add.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFAddBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.add_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.add_op = tensor_1 + tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.add_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

43
src/tf_2_v1/base.py Normal file
View file

@ -0,0 +1,43 @@
from pathlib import Path
import tensorflow.compat.v1 as tf
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class TFBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
if data_type == DataType.FLOAT16:
dtype = tf.float16
elif data_type == DataType.FLOAT32:
dtype = tf.float32
elif data_type == DataType.FLOAT64:
dtype = tf.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented')
super().__init__(output_path, Platform.TF2_V1, bench_op, Device.GPU, None, data_type, dtype)
self.session: tf.Session = None
def pre_experiment(self, _experiment_args):
tf.disable_v2_behavior()
# tf.disable_eager_execution()
# gpu_options = tf.GPUOptions(allow_growth=True)
# session_config = tf.ConfigProto(gpu_options=gpu_options)
# self.session = tf.Session(config=session_config)
self.session = tf.Session()
self.session.as_default()
def post_experiment(self):
self.session.close()
tf.reset_default_graph()
def experiment(self):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()

30
src/tf_2_v1/div.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFDivBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.div_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.div_op = tensor_1 / tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.div_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

30
src/tf_2_v1/matmul.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFMatmulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.matmul_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1, shape_2 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_2, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.matmul_op = tensor_1 @ tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.matmul_op)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

30
src/tf_2_v1/mul.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFMulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.mul_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.mul_op = tensor_1 * tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.mul_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

16
src/tf_2_v1/ops.py Normal file
View file

@ -0,0 +1,16 @@
from typing import Type
from src.common import Op
from src.tf_2_v1.add import TFAddBench
from src.tf_2_v1.base import TFBase
from src.tf_2_v1.div import TFDivBench
from src.tf_2_v1.mul import TFMulBench
from src.tf_2_v1.matmul import TFMatmulBench
tf2v1_ops: dict[Op, Type[TFBase]] = {
Op.ADD: TFAddBench,
Op.MUL: TFMulBench,
Op.DIV: TFDivBench,
Op.MATMUL: TFMatmulBench
}