Jax implementation, code factorisation

* Compatibility for older python version (typing)
This commit is contained in:
Corentin 2021-10-01 20:14:00 +09:00
commit 16b7239cd7
37 changed files with 1007 additions and 293 deletions

View file

@ -2,28 +2,46 @@ from argparse import ArgumentParser
import multiprocessing as mp import multiprocessing as mp
import os import os
from pathlib import Path from pathlib import Path
from typing import Type import sys
from typing import List, Type
from config.benchmark import Config
from src.base import BenchBase from src.base import BenchBase
from src.common import DataType, Op, Platform from src.common import DataType, Op, Platform
from src.plot import compare
def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op, def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
bench_args, bench_count: int): bench_args, bench_count: int):
if platform == Platform.TF2: if platform == Platform.JAX:
from src.jax.ops import jax_ops
if bench_op not in jax_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
jax_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
elif platform == Platform.TF2:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from src.tf_2.ops import tf2_ops from src.tf_2.ops import tf2_ops
if bench_op not in tf2_ops: if bench_op not in tf2_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else: else:
tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type) tf2_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print()
elif platform == Platform.TF2_V1:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from src.tf_2_v1.ops import tf2v1_ops
if bench_op not in tf2v1_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else:
tf2v1_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print() print()
elif platform == Platform.TORCH: elif platform == Platform.TORCH:
from src.pytorch.ops import torch_ops from src.pytorch.ops import torch_ops
if bench_op not in torch_ops: if bench_op not in torch_ops:
print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
else: else:
torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type) torch_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
print() print()
else: else:
print(f'Platform {platform.value} is not implemented yet') print(f'Platform {platform.value} is not implemented yet')
@ -32,6 +50,8 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be
def main(): def main():
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files') parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
parser.add_argument('--no-benchmark', action='store_true', default=False, help='Avoid running benchmarks')
parser.add_argument('--no-compare', action='store_true', default=False, help='Avoid running platform comparaison')
parser.add_argument('--count', type=int, default=30, parser.add_argument('--count', type=int, default=30,
help='Number of experiments per benchmark (for stastistical analysis)') help='Number of experiments per benchmark (for stastistical analysis)')
parser.add_argument('--platform', nargs='*', type=Platform, parser.add_argument('--platform', nargs='*', type=Platform,
@ -39,63 +59,57 @@ def main():
parser.add_argument('--data', nargs='*', type=DataType, parser.add_argument('--data', nargs='*', type=DataType,
help='List of data type to benchmark [float16, float32, float64] (else all are used)') help='List of data type to benchmark [float16, float32, float64] (else all are used)')
parser.add_argument('--op', nargs='*', type=Op, parser.add_argument('--op', nargs='*', type=Op,
help='List of operation to benchmark [add, mul, div, matmul] (else all are used)') help='List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)')
parser.add_argument('--list-op', action='store_true',
help='List all possible operation to benchmark (no further action will be done)')
parser.add_argument(
'--experiment-time', type=float,
help=f'Change time (in s) per experiment (default={Config.EXPERIMENT_TIME:0.3f}s)')
arguments = parser.parse_args() arguments = parser.parse_args()
if arguments.list_op:
print(', '.join([op.value for op in Op]))
sys.exit(0)
output_path: Path = arguments.output output_path: Path = arguments.output
no_benchmark: bool = arguments.no_benchmark
no_compare: bool = arguments.no_compare
bench_count: int = arguments.count bench_count: int = arguments.count
platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform) platforms: List[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
data: list[DataType] = arguments.data if arguments.data is not None else list(DataType) data: List[DataType] = arguments.data if arguments.data is not None else list(DataType)
bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op) bench_ops: List[Op] = arguments.op if arguments.op is not None else list(Op)
if arguments.experiment_time:
Config.EXPERIMENT_TIME = arguments.experiment_time
if not output_path.exists(): if not output_path.exists():
output_path.mkdir(parents=True) output_path.mkdir(parents=True)
benchmarks: list[dict[Op, Type[BenchBase]]] = [] if not no_benchmark:
element_wise_args = [ benchmarks: List[dict[Op, Type[BenchBase]]] = []
(100, 100),
(100, 200),
(128, 128),
(200, 100),
(200, 200),
(256, 256),
(256, 512),
(512, 256),
(400, 400),
(512, 512),
(800, 800),
(1024, 1024),
(1800, 1800)]
matmul_args = [
((100, 100), (100, 100)),
((100, 200), (200, 100)),
((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200))]
for platform in platforms: for platform in platforms:
for data_type in data: for data_type in data:
for bench_op in [Op.ADD, Op.MUL, Op.DIV]: for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
if bench_op in bench_ops: if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count)) benchmarks.append((output_path, platform, data_type, bench_op,
if Op.MATMUL in bench_ops: Config.ELEMENT_WISE_ARGS, bench_count))
benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count)) for bench_op in [Op.MATMUL, Op.NN_MATMUL]:
if bench_op in bench_ops:
benchmarks.append((output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
if Op.NN_DENSE in bench_ops:
benchmarks.append((output_path, platform, data_type, Op.NN_DENSE, Config.NN_1D_ARGS, bench_count))
if benchmarks:
for benchmark in benchmarks: for benchmark in benchmarks:
process = mp.Process(target=run_benchmark, args=benchmark) process = mp.Process(target=run_benchmark, args=benchmark)
process.start() process.start()
process.join() process.join()
print('Benchmark done') print('Benchmark done')
if not no_compare:
compare(output_path)
print('Compare done')
if __name__ == '__main__': if __name__ == '__main__':
main() main()

41
config/benchmark.py Normal file
View file

@ -0,0 +1,41 @@
class Config:
EXPERIMENT_TIME = 1.0
ELEMENT_WISE_ARGS = [
(100, 100),
(100, 200),
(128, 128),
(200, 100),
(200, 200),
(256, 256),
(256, 512),
(512, 256),
(400, 400),
(512, 512),
(800, 800),
(1024, 1024),
(1800, 1800)]
MATMUL_ARGS = [
((100, 100), (100, 100)),
((100, 200), (200, 100)),
((128, 128), (128, 128)),
((200, 100), (100, 200)),
((200, 200), (200, 200)),
((256, 256), (256, 256)),
((256, 512), (512, 256)),
((400, 400), (400, 400)),
((512, 256), (256, 512)),
((512, 512), (512, 512)),
((800, 800), (800, 800)),
((1000, 1000), (1000, 1000)),
((1200, 1200), (1200, 1200))]
NN_1D_ARGS = [
(1, 16), (16, 16), (64, 16),
(1, 64), (16, 64),
(1, 150), (16, 150),
(1, 256), (16, 256),
(1, 400), (16, 400), (64, 400),
(1, 512), (16, 512), (64, 512),
(1, 800), (16, 800), (64, 800),
(1, 1024), (16, 1024),
(1, 2000), (16, 2000), (64, 2000),
(1, 4000), (16, 4000), (64, 4000)]

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
matplotlib
seaborn
tensorflow
torch

View file

@ -1,69 +1,71 @@
from pathlib import Path from pathlib import Path
import time import time
from typing import List
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from config.benchmark import Config
from src.common import DataKey, DataType, Device, Op, Platform from src.common import DataKey, DataType, Device, Op, Platform
from src.plot import plot_experiments from src.op_info import op_infos
from src.utils import get_cpu_name, get_nvidia_name from src.utils import get_cpu_name, get_nvidia_name
class BenchBase(): class BenchBase():
def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device): def __init__(self, output_path: Path, platform: Platform, bench_op: Op,
device_type: Device, device,
data_type: DataType, dtype):
self._base_output_path = output_path self._base_output_path = output_path
self.output_path = output_path
self.platform = platform self.platform = platform
self.bench_op = bench_op self.bench_op = bench_op
self.device_type = device_type self.device_type = device_type
self.device = device self.device = device
self.dtype = None self.device_name = get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name()
self.data_type = data_type
self.dtype = dtype
self.info = op_infos[bench_op]
def set_output_path(self, device: Device, device_name: str):
self.output_path = ( self.output_path = (
self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value) self._base_output_path / f'{self.device_type.value}_{self.device_name}'
/ self.platform.value / self.bench_op.value) # noqa
def get_dtype(self, data_type: DataType): def pre_experiment(self, _experiment_args):
pass
def experiment(self):
raise NotImplementedError() raise NotImplementedError()
def experiment(self, _experiment_args, _length, _dtype, _device): def post_experiment(self):
raise NotImplementedError() pass
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()
def run(self, experiment_args, experiment_count: int, data_type: DataType):
self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name())
def run(self, experiment_args, experiment_count: int):
if not self.output_path.exists(): if not self.output_path.exists():
self.output_path.mkdir(parents=True) self.output_path.mkdir(parents=True)
dtype = self.get_dtype(data_type) print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark'
f' with data type: {self.data_type.value}')
print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}')
experiment_names = [] experiment_names = []
experiment_lengths = [] experiment_lengths = []
experiment_times = [] experiment_times = []
experiment_mop = [] experiment_mop = []
for args in experiment_args: for args in experiment_args:
self.pre_experiment(args)
# warmup # warmup
for _ in range(4): for _ in range(20):
self.experiment(args, 5, dtype, self.device) self.experiment()
# speed evalutaion # speed evalutaion
counter = 0 counter = 0
start_time = time.time() start_time = time.time()
while time.time() - start_time < 0.2: while (time.time() - start_time) < (Config.EXPERIMENT_TIME / 5):
self.experiment(args, 10, dtype, self.device) self.experiment()
counter += 10 counter += 1
end_time = time.time() end_time = time.time()
target_time = 1.0 # in s target_time = Config.EXPERIMENT_TIME # in s
experiment_speed = counter / (end_time - start_time) # in op/s experiment_speed = counter / (end_time - start_time) # in op/s
experiment_length = max(int(target_time / experiment_count * experiment_speed), 2) experiment_length = max(int(target_time / experiment_count * experiment_speed), 2)
# print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s' # print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s'
@ -73,24 +75,28 @@ class BenchBase():
run_times = [] run_times = []
for _ in range(experiment_count): for _ in range(experiment_count):
start_time = time.time() start_time = time.time()
self.experiment(args, experiment_length, dtype, self.device) for _ in range(experiment_length):
self.experiment()
run_times.append(time.time() - start_time) run_times.append(time.time() - start_time)
experiment_times += run_times experiment_times += run_times
experiment_names += [self.name(args)] * experiment_count experiment_names += [self.info.name(args)] * experiment_count
experiment_lengths += [experiment_length] * experiment_count experiment_lengths += [experiment_length] * experiment_count
experiment_mop += [self.mop(args)] * experiment_count experiment_mop += [self.info.mop(args)] * experiment_count
total_time = np.array(run_times, dtype=np.float64).sum() total_time = np.array(run_times, dtype=np.float64).sum()
total_glop = self.mop(args) * experiment_length * experiment_count / 1000 total_glop = self.info.mop(args) * experiment_length * experiment_count / 1000
print(f'Run {experiment_names[-1]} (x{experiment_length})' print(f'Run {experiment_names[-1]} (x{experiment_length})'
f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS') f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS')
self.post_experiment()
data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type) data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop)
plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform) # Avoid circular import
from src.plot import plot_experiments # pylint: disable=import-outside-toplevel
plot_experiments(self, data)
def save_experiments( def save_experiments(
self, experiment_names: list[str], experiment_times: list[float], self, experiment_names: List[str], experiment_times: List[float],
experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame: experiment_lengths: List[int], experiment_mop: List[float]) -> pd.DataFrame:
key = DataKey(self.bench_op) key = DataKey(self.bench_op)
data = pd.DataFrame( data = pd.DataFrame(
{ {
@ -102,5 +108,5 @@ class BenchBase():
key.gflops: [(mop * l) / (t * 1000.0) key.gflops: [(mop * l) / (t * 1000.0)
for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)] for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
}) })
data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t') data.to_csv(self.output_path / f'{self.bench_op.value}_{self.data_type.value}.csv', sep='\t')
return data return data

View file

@ -13,16 +13,19 @@ class DataType(Enum):
class Op(Enum): class Op(Enum):
NO_OP = 'noop'
ADD = 'add' ADD = 'add'
DIV = 'div' DIV = 'div'
MUL = 'mul' MUL = 'mul'
MATMUL = 'matmul' MATMUL = 'matmul'
NN_MATMUL = 'nn_matmul'
NN_DENSE = 'nn_dense'
class Platform(Enum): class Platform(Enum):
TF1 = 'TF1' JAX = 'jax'
# TF1 = 'TF1'
TF2 = 'TF2' TF2 = 'TF2'
TF2_V1 = 'TF2_V1'
TORCH = 'Torch' TORCH = 'Torch'

25
src/jax/add.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxAddBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready()

34
src/jax/base.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
import jax.numpy as jnp
import jax
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class JaxBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
gpu_devices = jax.devices('gpu')
if gpu_devices:
if len(gpu_devices) > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
device_type = Device.GPU
device = gpu_devices[0]
else:
device_type = Device.CPU
device = jax.devices('cpu')[0]
if data_type == DataType.FLOAT16:
dtype = jnp.float16
elif data_type == DataType.FLOAT32:
dtype = jnp.float32
elif data_type == DataType.FLOAT64:
dtype = jnp.float64
else:
raise NotImplementedError(f'data_type {data_type.value} not implemented')
super().__init__(output_path, Platform.JAX, bench_op, device_type, device, data_type, dtype)
def experiment(self):
raise NotImplementedError()

25
src/jax/div.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxDivBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready()

28
src/jax/matmul.py Normal file
View file

@ -0,0 +1,28 @@
from pathlib import Path
from typing import List, Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxMatmulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype))
self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready()
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

25
src/jax/mul.py Normal file
View file

@ -0,0 +1,25 @@
from pathlib import Path
from typing import Tuple
from jax import device_put
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxMulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready()
def experiment(self):
self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready()

32
src/jax/nn_dense.py Normal file
View file

@ -0,0 +1,32 @@
from pathlib import Path
from typing import Callable, List, Tuple
from jax import device_put, jit, random
from jax.experimental import stax
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
class JaxNNDenseBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
self.network: Callable = None
self.params = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
self.tensor = device_put(jnp.ones((batch_size, dimension), dtype=self.dtype))
network_init, self.network = stax.Dense(dimension)
_, self.params = network_init(random.PRNGKey(1), (batch_size, dimension))
self.network = jit(self.network)
self.tensor_result = self.network(self.params, self.tensor)
def experiment(self):
self.tensor_result = self.network(self.params, self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

33
src/jax/nn_matmul.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
from typing import List, Tuple
from jax import device_put, jit
import jax.numpy as jnp
from src.common import DataType, Op
from src.jax.base import JaxBase
def matmul(tensor_1: jnp.DeviceArray, tensor_2: jnp.DeviceArray) -> jnp.DeviceArray:
return tensor_1 @ tensor_2
class JaxNNMatmulBench(JaxBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: jnp.DeviceArray = None
self.tensor_2: jnp.DeviceArray = None
self.tensor_result: jnp.DeviceArray = None
self.network = jit(matmul)
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype))
self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype))
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

20
src/jax/ops.py Normal file
View file

@ -0,0 +1,20 @@
from typing import Type
from src.common import Op
from src.jax.add import JaxAddBench
from src.jax.base import JaxBase
from src.jax.div import JaxDivBench
from src.jax.mul import JaxMulBench
from src.jax.matmul import JaxMatmulBench
from src.jax.nn_dense import JaxNNDenseBench
from src.jax.nn_matmul import JaxNNMatmulBench
jax_ops: dict[Op, Type[JaxBase]] = {
Op.ADD: JaxAddBench,
Op.MUL: JaxMulBench,
Op.DIV: JaxDivBench,
Op.MATMUL: JaxMatmulBench,
Op.NN_MATMUL: JaxNNMatmulBench,
Op.NN_DENSE: JaxNNDenseBench
}

85
src/op_info.py Normal file
View file

@ -0,0 +1,85 @@
from typing import Dict, List, Type, Tuple
from src.common import Op
class _BaseInfo():
@staticmethod
def name(experiment_args) -> str:
raise NotImplementedError()
@staticmethod
def mop(experiment_args) -> float:
raise NotImplementedError()
class AddInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class DivInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class MulInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
shape_1 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1_000_000
class MatmulInfo(_BaseInfo):
@staticmethod
def name(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> str:
shape_1, shape_2 = experiment_args
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
@staticmethod
def mop(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1_000_000) * 2 * (shape_1[1] - 1)
class DenseInfo(_BaseInfo):
@staticmethod
def name(experiment_args: Tuple[int, int]) -> str:
batch_size, dimension = experiment_args
return f'Dense(({batch_size}x{dimension}))'
@staticmethod
def mop(experiment_args: Tuple[int, int]) -> float:
batch_size, dimension = experiment_args
return batch_size * (
((dimension * dimension / 1_000_000) * 2 * (dimension - 1)) + (
dimension / 1_000_000))
op_infos: Dict[Op, Type[_BaseInfo]] = {
Op.ADD: AddInfo,
Op.DIV: DivInfo,
Op.MUL: MulInfo,
Op.MATMUL: MatmulInfo,
Op.NN_MATMUL: MatmulInfo,
Op.NN_DENSE: DenseInfo
}

View file

@ -1,4 +1,7 @@
from pathlib import Path from pathlib import Path
import math
import multiprocessing as mp
import os
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@ -6,11 +9,20 @@ import pandas as pd
import seaborn as sns import seaborn as sns
from src.base import BenchBase
from src.common import DataKey, DataType, Op, Platform from src.common import DataKey, DataType, Op, Platform
def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform): class CompKey:
key = DataKey(bench_op) def __init__(self):
self.data_type = 'data_type'
self.device = 'device'
self.bench_op = 'op'
self.platform = 'platform'
def plot_experiments(bench: BenchBase, data: pd.DataFrame):
key = DataKey(bench.bench_op)
sum_data = data[[key.experiment, key.time, key.count]].groupby( sum_data = data[[key.experiment, key.time, key.count]].groupby(
key.experiment, as_index=False, sort=False).sum() key.experiment, as_index=False, sort=False).sum()
mean_data = data[[key.experiment, key.speed]].groupby( mean_data = data[[key.experiment, key.speed]].groupby(
@ -20,11 +32,11 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
sns.set_theme(style="ticks") sns.set_theme(style="ticks")
figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12)) figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16)
for axe in axes[:-1]: for axe in axes[:-1]:
axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique()) chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique())
if max_data[key.mop].max() > max_data[key.mop].min() * 100:
axes[0].set_yscale("log") axes[0].set_yscale("log")
for patch, value in zip(chart.patches, max_data[key.mop]): for patch, value in zip(chart.patches, max_data[key.mop]):
chart.annotate(f'{value:0.3f}', chart.annotate(f'{value:0.3f}',
@ -33,6 +45,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points') textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1]) chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1])
if data[key.speed].max() > data[key.speed].min() * 100:
axes[1].set_yscale("log")
for patch, value in zip(chart.patches, mean_data[key.speed]): for patch, value in zip(chart.patches, mean_data[key.speed]):
chart.annotate(f'{value:.3f}', chart.annotate(f'{value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()), (patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
@ -40,6 +54,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points') textcoords='offset points')
chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2]) chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2])
if data[key.gflops].max() > data[key.gflops].min() * 100:
axes[2].set_yscale("log")
for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]): for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]):
chart.annotate(f'{(mop * count / 1000) / value:.3f}', chart.annotate(f'{(mop * count / 1000) / value:.3f}',
(patch.get_x() + patch.get_width() / 2.0, patch.get_height()), (patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
@ -47,5 +63,62 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType,
textcoords='offset points') textcoords='offset points')
plt.xticks(rotation=20) plt.xticks(rotation=20)
plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05) plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.91, right=0.99, bottom=0.1, left=0.05)
plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png') figure.suptitle(f'{bench.platform.value} {bench.bench_op.value} ({bench.data_type.value})', fontsize=16)
axes[0].set_title(f'{bench.device_name}', fontsize=12)
plt.savefig(bench.output_path / f'{bench.bench_op.value}_{bench.data_type.value}.png')
def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path):
op_data = all_data[(all_data[comp_key.bench_op] == bench_op) & (all_data[comp_key.device] == device)]
platform_list = op_data[comp_key.platform].unique()
if len(platform_list) <= 1:
return
key = DataKey(Op(bench_op))
sns.set_theme(style="ticks")
for data_type in op_data[comp_key.data_type].unique():
data = op_data[op_data[comp_key.data_type] == data_type]
graph = sns.catplot(x=key.experiment, y=key.gflops, hue=comp_key.platform, data=data,
kind='bar', estimator=np.median, height=8, aspect=1.4)
if data[key.gflops].max() > data[key.gflops].min() * 100:
graph.set(yscale="log")
plt.xticks(rotation=70, fontsize=8)
plt.subplots_adjust(top=0.92, bottom=0.25)
plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
plt.title(f'{device}', fontsize=12)
plt.savefig(output_path / device / f'{bench_op}_{data_type}.png')
def compare(output_path: Path):
all_data: pd.DataFrame = None
comp_key = CompKey()
for data_path in output_path.rglob('*.csv'):
if len(data_path.parents) <= 4:
print(f'Warning: cannot parse data at path {data_path} (subfolders missing)')
data_type = DataType(data_path.stem.split('_')[-1])
bench_op = Op(data_path.parents[0].name)
platform = Platform(data_path.parents[1].name)
device_name = data_path.parents[2].name
current_data = pd.read_csv(data_path, sep='\t')
current_data[comp_key.data_type] = data_type.value
current_data[comp_key.bench_op] = bench_op.value
current_data[comp_key.platform] = platform.value
current_data[comp_key.device] = device_name
if all_data is None:
all_data = current_data
else:
all_data = all_data.append(current_data, ignore_index=True, verify_integrity=True)
# Compare between platforms
comp_args = []
for device in all_data[comp_key.device].unique():
for bench_op in all_data[comp_key.bench_op].unique():
comp_args.append((all_data, comp_key, device, bench_op, output_path))
with mp.Pool(processes=math.ceil(os.cpu_count() * 0.8)) as pool:
pool.starmap(_draw_comparison, comp_args)

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import torch import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchAddBench(TorchBase): class TorchAddBench(TorchBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD) super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 + self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 + tensor_2 self.tensor_result = self.tensor_1 + self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -7,7 +7,7 @@ from src.common import DataType, Device, Op, Platform
class TorchBase(BenchBase): class TorchBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op): def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
if torch.cuda.is_available(): if torch.cuda.is_available():
if torch.cuda.device_count() > 1: if torch.cuda.device_count() > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
@ -18,22 +18,16 @@ class TorchBase(BenchBase):
device_type = Device.CPU device_type = Device.CPU
device = torch.device('cpu') device = torch.device('cpu')
super().__init__(output_path, Platform.TORCH, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> torch.dtype:
if data_type == DataType.FLOAT16: if data_type == DataType.FLOAT16:
return torch.float16 dtype = torch.float16
if data_type == DataType.FLOAT32: elif data_type == DataType.FLOAT32:
return torch.float32 dtype = torch.float32
if data_type == DataType.FLOAT64: elif data_type == DataType.FLOAT64:
return torch.float64 dtype = torch.float64
else:
raise NotImplementedError(f'data_type {data_type.value} not implemented') raise NotImplementedError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device): super().__init__(output_path, Platform.TORCH, bench_op, device_type, device, data_type, dtype)
raise NotImplementedError()
def name(self, _experiment_args) -> str: def experiment(self):
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError() raise NotImplementedError()

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import torch import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchDivBench(TorchBase): class TorchDivBench(TorchBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV) super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 / self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 / tensor_2 self.tensor_result = self.tensor_1 / self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import torch import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchMatmulBench(TorchBase): class TorchMatmulBench(TorchBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL) super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args shape_1, shape_2 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False) self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 @ self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 @ tensor_2 self.tensor_result = self.tensor_1 @ self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
shape_1, shape_2 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import torch import torch
@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase
class TorchMulBench(TorchBase): class TorchMulBench(TorchBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL) super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_result = self.tensor_1 * self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 * tensor_2 self.tensor_result = self.tensor_1 * self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

36
src/pytorch/nn_dense.py Normal file
View file

@ -0,0 +1,36 @@
from pathlib import Path
from typing import List, Tuple
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class DenseNetwork(torch.nn.Module):
def __init__(self, input_dim: int, dtype: torch.dtype):
super().__init__()
self.dense = torch.nn.Linear(input_dim, input_dim, dtype=dtype)
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
return self.dense(input_data)
class TorchNNDenseBench(TorchBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: torch.Tensor = None
self.tensor_result: torch.Tensor = None
self.network: torch.nn.Module = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
self.tensor = torch.ones((batch_size, dimension), dtype=self.dtype, device=self.device, requires_grad=False)
self.network = DenseNetwork(dimension, self.dtype).to(self.device)
self.tensor_result = self.network(self.tensor)
def experiment(self):
self.tensor_result = self.network(self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

34
src/pytorch/nn_matmul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
from typing import List, Tuple
import torch
from src.common import DataType, Op
from src.pytorch.base import TorchBase
class MatMulNetwork(torch.nn.Module):
def forward(self, input_1: torch.Tensor, input_2: torch.Tensor) -> torch.Tensor:
return input_1 @ input_2
class TorchNNMatmulBench(TorchBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: torch.Tensor = None
self.tensor_2: torch.Tensor = None
self.tensor_result: torch.Tensor = None
self.network: torch.nn.Module = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False)
self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False)
self.network = MatMulNetwork()
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -6,11 +6,15 @@ from src.pytorch.base import TorchBase
from src.pytorch.div import TorchDivBench from src.pytorch.div import TorchDivBench
from src.pytorch.mul import TorchMulBench from src.pytorch.mul import TorchMulBench
from src.pytorch.matmul import TorchMatmulBench from src.pytorch.matmul import TorchMatmulBench
from src.pytorch.nn_dense import TorchNNDenseBench
from src.pytorch.nn_matmul import TorchNNMatmulBench
torch_ops: dict[Op, Type[TorchBase]] = { torch_ops: dict[Op, Type[TorchBase]] = {
Op.ADD: TorchAddBench, Op.ADD: TorchAddBench,
Op.MUL: TorchMulBench, Op.MUL: TorchMulBench,
Op.DIV: TorchDivBench, Op.DIV: TorchDivBench,
Op.MATMUL: TorchMatmulBench Op.MATMUL: TorchMatmulBench,
Op.NN_MATMUL: TorchNNMatmulBench,
Op.NN_DENSE: TorchNNDenseBench
} }

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import tensorflow as tf import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFAddBench(TFBase): class TFAddBench(TFBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD) super().__init__(output_path, Op.ADD, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
with device: with self.device:
tensor_1 = tf.ones(shape_1, dtype=dtype) self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype) self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 + self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 + tensor_2 self.tensor_result = self.tensor_1 + self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -7,13 +7,13 @@ from src.common import DataType, Device, Op, Platform
class TFBase(BenchBase): class TFBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op): def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
gpus = tf.config.list_physical_devices('GPU') gpus = tf.config.list_physical_devices('GPU')
if gpus: if gpus:
if len(gpus) > 1: if len(gpus) > 1:
print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
tf.config.experimental.set_memory_growth(gpus[0], True) # tf.config.experimental.set_memory_growth(gpus[0], True)
tf.config.set_visible_devices(gpus[0], 'GPU') tf.config.set_visible_devices(gpus[0], 'GPU')
# logical_gpus = tf.config.list_logical_devices('GPU') # logical_gpus = tf.config.list_logical_devices('GPU')
device_type = Device.GPU device_type = Device.GPU
@ -22,22 +22,16 @@ class TFBase(BenchBase):
device_type = Device.CPU device_type = Device.CPU
device = tf.device('/CPU:0') device = tf.device('/CPU:0')
super().__init__(output_path, Platform.TF2, bench_op, device_type, device)
def get_dtype(self, data_type: DataType) -> tf.DType:
if data_type == DataType.FLOAT16: if data_type == DataType.FLOAT16:
return tf.float16 dtype = tf.float16
if data_type == DataType.FLOAT32: elif data_type == DataType.FLOAT32:
return tf.float32 dtype = tf.float32
if data_type == DataType.FLOAT64: elif data_type == DataType.FLOAT64:
return tf.float64 dtype = tf.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented') raise RuntimeError(f'data_type {data_type.value} not implemented')
def experiment(self, _experiment_args, _length, _dtype, _device): super().__init__(output_path, Platform.TF2, bench_op, device_type, device, data_type, dtype)
raise NotImplementedError()
def name(self, _experiment_args) -> str: def experiment(self):
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError() raise NotImplementedError()

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import tensorflow as tf import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFDivBench(TFBase): class TFDivBench(TFBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV) super().__init__(output_path, Op.DIV, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
with device: with self.device:
tensor_1 = tf.ones(shape_1, dtype=dtype) self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype) self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 / self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 / tensor_2 self.tensor_result = self.tensor_1 / self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import tensorflow as tf import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFMatmulBench(TFBase): class TFMatmulBench(TFBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL) super().__init__(output_path, Op.MATMUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args shape_1, shape_2 = experiment_args
with device: with self.device:
tensor_1 = tf.ones(shape_1, dtype=dtype) self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
tensor_2 = tf.ones(shape_2, dtype=dtype) self.tensor_2 = tf.ones(shape_2, dtype=self.dtype)
self.tensor_result = self.tensor_1 @ self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 @ tensor_2 self.tensor_result = self.tensor_1 @ self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
shape_1, shape_2 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1, shape_2 = experiment_args
return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
def run(self,
experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

View file

@ -1,4 +1,5 @@
from pathlib import Path from pathlib import Path
from typing import List, Tuple
import tensorflow as tf import tensorflow as tf
@ -7,28 +8,21 @@ from src.tf_2.base import TFBase
class TFMulBench(TFBase): class TFMulBench(TFBase):
def __init__(self, output_path: Path): def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL) super().__init__(output_path, Op.MUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1 = experiment_args shape_1 = experiment_args
with device: with self.device:
tensor_1 = tf.ones(shape_1, dtype=dtype) self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
tensor_2 = tf.ones(shape_1, dtype=dtype) self.tensor_2 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_result = self.tensor_1 * self.tensor_2
for _ in range(length): def experiment(self):
_ = tensor_1 * tensor_2 self.tensor_result = self.tensor_1 * self.tensor_2
def name(self, experiment_args: tuple[int, int]) -> str: def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
shape_1 = experiment_args super().run(experiment_args, experiment_count)
return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
def mop(self, experiment_args: tuple[int, int]) -> float:
shape_1 = experiment_args
return shape_1[0] * shape_1[1] / 1000_000
def run(self,
experiment_args: list[tuple[int, int]],
experiment_count: int,
data_type: DataType):
super().run(experiment_args, experiment_count, data_type)

35
src/tf_2/nn_dense.py Normal file
View file

@ -0,0 +1,35 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class DenseModel(tf.keras.Model):
def __init__(self, input_dim: int, dtype=tf.DType):
super().__init__()
self.dense = tf.keras.layers.Dense(input_dim, dtype=dtype)
def call(self, input_tensor: tf.Tensor) -> tf.Tensor:
return self.dense(input_tensor)
class TFNNDenseBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_DENSE, data_type)
self.tensor: tf.Tensor = None
self.network: tf.keras.Model = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
batch_size, dimension = experiment_args
with self.device:
self.tensor = tf.ones((batch_size, dimension), dtype=self.dtype)
self.network = DenseModel(dimension, self.dtype)
def experiment(self):
self.network(self.tensor)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

34
src/tf_2/nn_matmul.py Normal file
View file

@ -0,0 +1,34 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow as tf
from src.common import DataType, Op
from src.tf_2.base import TFBase
class MatmulModel(tf.keras.Model):
def call(self, tensor_1: tf.Tensor, tensor_2: tf.Tensor) -> tf.Tensor:
return tf.matmul(tensor_1, tensor_2)
class TFNNMatmulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.NN_MATMUL, data_type)
self.tensor_1: tf.Tensor = None
self.tensor_2: tf.Tensor = None
self.tensor_result: tf.Tensor = None
self.network: tf.keras.Model = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
shape_1, shape_2 = experiment_args
with self.device:
self.tensor_1 = tf.ones(shape_1, dtype=self.dtype)
self.tensor_2 = tf.ones(shape_2, dtype=self.dtype)
self.network = MatmulModel()
def experiment(self):
self.tensor_result = self.network(self.tensor_1, self.tensor_2)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

View file

@ -6,11 +6,15 @@ from src.tf_2.base import TFBase
from src.tf_2.div import TFDivBench from src.tf_2.div import TFDivBench
from src.tf_2.mul import TFMulBench from src.tf_2.mul import TFMulBench
from src.tf_2.matmul import TFMatmulBench from src.tf_2.matmul import TFMatmulBench
from src.tf_2.nn_dense import TFNNDenseBench
from src.tf_2.nn_matmul import TFNNMatmulBench
tf2_ops: dict[Op, Type[TFBase]] = { tf2_ops: dict[Op, Type[TFBase]] = {
Op.ADD: TFAddBench, Op.ADD: TFAddBench,
Op.MUL: TFMulBench, Op.MUL: TFMulBench,
Op.DIV: TFDivBench, Op.DIV: TFDivBench,
Op.MATMUL: TFMatmulBench Op.MATMUL: TFMatmulBench,
Op.NN_MATMUL: TFNNMatmulBench,
Op.NN_DENSE: TFNNDenseBench
} }

30
src/tf_2_v1/add.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFAddBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.ADD, data_type)
self.add_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.add_op = tensor_1 + tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.add_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

43
src/tf_2_v1/base.py Normal file
View file

@ -0,0 +1,43 @@
from pathlib import Path
import tensorflow.compat.v1 as tf
from src.base import BenchBase
from src.common import DataType, Device, Op, Platform
class TFBase(BenchBase):
def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
if data_type == DataType.FLOAT16:
dtype = tf.float16
elif data_type == DataType.FLOAT32:
dtype = tf.float32
elif data_type == DataType.FLOAT64:
dtype = tf.float64
else:
raise RuntimeError(f'data_type {data_type.value} not implemented')
super().__init__(output_path, Platform.TF2_V1, bench_op, Device.GPU, None, data_type, dtype)
self.session: tf.Session = None
def pre_experiment(self, _experiment_args):
tf.disable_v2_behavior()
# tf.disable_eager_execution()
# gpu_options = tf.GPUOptions(allow_growth=True)
# session_config = tf.ConfigProto(gpu_options=gpu_options)
# self.session = tf.Session(config=session_config)
self.session = tf.Session()
self.session.as_default()
def post_experiment(self):
self.session.close()
tf.reset_default_graph()
def experiment(self):
raise NotImplementedError()
def name(self, _experiment_args) -> str:
raise NotImplementedError()
def mop(self, _experiment_args) -> float:
raise NotImplementedError()

30
src/tf_2_v1/div.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFDivBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.DIV, data_type)
self.div_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.div_op = tensor_1 / tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.div_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

30
src/tf_2_v1/matmul.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFMatmulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MATMUL, data_type)
self.matmul_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1, shape_2 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_2, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.matmul_op = tensor_1 @ tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.matmul_op)
def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
super().run(experiment_args, experiment_count)

30
src/tf_2_v1/mul.py Normal file
View file

@ -0,0 +1,30 @@
from pathlib import Path
from typing import List, Tuple
import tensorflow.compat.v1 as tf
from src.common import DataType, Op
from src.tf_2_v1.base import TFBase
class TFMulBench(TFBase):
def __init__(self, output_path: Path, data_type: DataType):
super().__init__(output_path, Op.MUL, data_type)
self.mul_op = None
def pre_experiment(self, experiment_args: Tuple[int, int]):
super().pre_experiment(experiment_args)
shape_1 = experiment_args
tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
initializer=tf.initializers.ones, trainable=False)
self.mul_op = tensor_1 * tensor_2
self.session.run(tf.initializers.global_variables())
def experiment(self):
self.session.run(self.mul_op)
def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
super().run(experiment_args, experiment_count)

16
src/tf_2_v1/ops.py Normal file
View file

@ -0,0 +1,16 @@
from typing import Type
from src.common import Op
from src.tf_2_v1.add import TFAddBench
from src.tf_2_v1.base import TFBase
from src.tf_2_v1.div import TFDivBench
from src.tf_2_v1.mul import TFMulBench
from src.tf_2_v1.matmul import TFMatmulBench
tf2v1_ops: dict[Op, Type[TFBase]] = {
Op.ADD: TFAddBench,
Op.MUL: TFMulBench,
Op.DIV: TFDivBench,
Op.MATMUL: TFMatmulBench
}