diff --git a/benchmark.py b/benchmark.py index 8aa5f1a..39f7783 100644 --- a/benchmark.py +++ b/benchmark.py @@ -2,28 +2,46 @@ from argparse import ArgumentParser import multiprocessing as mp import os from pathlib import Path -from typing import Type +import sys +from typing import List, Type +from config.benchmark import Config from src.base import BenchBase from src.common import DataType, Op, Platform +from src.plot import compare def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op, bench_args, bench_count: int): - if platform == Platform.TF2: + if platform == Platform.JAX: + from src.jax.ops import jax_ops + if bench_op not in jax_ops: + print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') + else: + jax_ops[bench_op](output_path, data_type).run(bench_args, bench_count) + print() + elif platform == Platform.TF2: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from src.tf_2.ops import tf2_ops if bench_op not in tf2_ops: print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') else: - tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type) + tf2_ops[bench_op](output_path, data_type).run(bench_args, bench_count) + print() + elif platform == Platform.TF2_V1: + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + from src.tf_2_v1.ops import tf2v1_ops + if bench_op not in tf2v1_ops: + print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') + else: + tf2v1_ops[bench_op](output_path, data_type).run(bench_args, bench_count) print() elif platform == Platform.TORCH: from src.pytorch.ops import torch_ops if bench_op not in torch_ops: print(f'Operation {bench_op.value} is not implemented for {platform.value} yet') else: - torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type) + torch_ops[bench_op](output_path, data_type).run(bench_args, bench_count) print() else: print(f'Platform {platform.value} is not implemented yet') @@ -32,6 +50,8 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be def main(): parser = ArgumentParser() parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files') + parser.add_argument('--no-benchmark', action='store_true', default=False, help='Avoid running benchmarks') + parser.add_argument('--no-compare', action='store_true', default=False, help='Avoid running platform comparaison') parser.add_argument('--count', type=int, default=30, help='Number of experiments per benchmark (for stastistical analysis)') parser.add_argument('--platform', nargs='*', type=Platform, @@ -39,62 +59,56 @@ def main(): parser.add_argument('--data', nargs='*', type=DataType, help='List of data type to benchmark [float16, float32, float64] (else all are used)') parser.add_argument('--op', nargs='*', type=Op, - help='List of operation to benchmark [add, mul, div, matmul] (else all are used)') + help='List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)') + parser.add_argument('--list-op', action='store_true', + help='List all possible operation to benchmark (no further action will be done)') + parser.add_argument( + '--experiment-time', type=float, + help=f'Change time (in s) per experiment (default={Config.EXPERIMENT_TIME:0.3f}s)') arguments = parser.parse_args() + if arguments.list_op: + print(', '.join([op.value for op in Op])) + sys.exit(0) + output_path: Path = arguments.output + no_benchmark: bool = arguments.no_benchmark + no_compare: bool = arguments.no_compare bench_count: int = arguments.count - platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform) - data: list[DataType] = arguments.data if arguments.data is not None else list(DataType) - bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op) + platforms: List[Platform] = arguments.platform if arguments.platform is not None else list(Platform) + data: List[DataType] = arguments.data if arguments.data is not None else list(DataType) + bench_ops: List[Op] = arguments.op if arguments.op is not None else list(Op) + + if arguments.experiment_time: + Config.EXPERIMENT_TIME = arguments.experiment_time if not output_path.exists(): output_path.mkdir(parents=True) - benchmarks: list[dict[Op, Type[BenchBase]]] = [] - element_wise_args = [ - (100, 100), - (100, 200), - (128, 128), - (200, 100), - (200, 200), - (256, 256), - (256, 512), - (512, 256), - (400, 400), - (512, 512), - (800, 800), - (1024, 1024), - (1800, 1800)] - matmul_args = [ - ((100, 100), (100, 100)), - ((100, 200), (200, 100)), - ((128, 128), (128, 128)), - ((200, 100), (100, 200)), - ((200, 200), (200, 200)), - ((256, 256), (256, 256)), - ((256, 512), (512, 256)), - ((400, 400), (400, 400)), - ((512, 256), (256, 512)), - ((512, 512), (512, 512)), - ((800, 800), (800, 800)), - ((1000, 1000), (1000, 1000)), - ((1200, 1200), (1200, 1200))] + if not no_benchmark: + benchmarks: List[dict[Op, Type[BenchBase]]] = [] + for platform in platforms: + for data_type in data: + for bench_op in [Op.ADD, Op.MUL, Op.DIV]: + if bench_op in bench_ops: + benchmarks.append((output_path, platform, data_type, bench_op, + Config.ELEMENT_WISE_ARGS, bench_count)) + for bench_op in [Op.MATMUL, Op.NN_MATMUL]: + if bench_op in bench_ops: + benchmarks.append((output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count)) + if Op.NN_DENSE in bench_ops: + benchmarks.append((output_path, platform, data_type, Op.NN_DENSE, Config.NN_1D_ARGS, bench_count)) - for platform in platforms: - for data_type in data: - for bench_op in [Op.ADD, Op.MUL, Op.DIV]: - if bench_op in bench_ops: - benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count)) - if Op.MATMUL in bench_ops: - benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count)) + if benchmarks: + for benchmark in benchmarks: + process = mp.Process(target=run_benchmark, args=benchmark) + process.start() + process.join() + print('Benchmark done') - for benchmark in benchmarks: - process = mp.Process(target=run_benchmark, args=benchmark) - process.start() - process.join() - - print('Benchmark done') + if not no_compare: + compare(output_path) + print('Compare done') if __name__ == '__main__': diff --git a/config/benchmark.py b/config/benchmark.py new file mode 100644 index 0000000..a072a34 --- /dev/null +++ b/config/benchmark.py @@ -0,0 +1,41 @@ +class Config: + EXPERIMENT_TIME = 1.0 + ELEMENT_WISE_ARGS = [ + (100, 100), + (100, 200), + (128, 128), + (200, 100), + (200, 200), + (256, 256), + (256, 512), + (512, 256), + (400, 400), + (512, 512), + (800, 800), + (1024, 1024), + (1800, 1800)] + MATMUL_ARGS = [ + ((100, 100), (100, 100)), + ((100, 200), (200, 100)), + ((128, 128), (128, 128)), + ((200, 100), (100, 200)), + ((200, 200), (200, 200)), + ((256, 256), (256, 256)), + ((256, 512), (512, 256)), + ((400, 400), (400, 400)), + ((512, 256), (256, 512)), + ((512, 512), (512, 512)), + ((800, 800), (800, 800)), + ((1000, 1000), (1000, 1000)), + ((1200, 1200), (1200, 1200))] + NN_1D_ARGS = [ + (1, 16), (16, 16), (64, 16), + (1, 64), (16, 64), + (1, 150), (16, 150), + (1, 256), (16, 256), + (1, 400), (16, 400), (64, 400), + (1, 512), (16, 512), (64, 512), + (1, 800), (16, 800), (64, 800), + (1, 1024), (16, 1024), + (1, 2000), (16, 2000), (64, 2000), + (1, 4000), (16, 4000), (64, 4000)] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..bed91a2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +matplotlib +seaborn +tensorflow +torch diff --git a/src/base.py b/src/base.py index 643a18f..7dc36f3 100644 --- a/src/base.py +++ b/src/base.py @@ -1,69 +1,71 @@ from pathlib import Path import time +from typing import List import numpy as np import pandas as pd +from config.benchmark import Config from src.common import DataKey, DataType, Device, Op, Platform -from src.plot import plot_experiments +from src.op_info import op_infos from src.utils import get_cpu_name, get_nvidia_name class BenchBase(): - def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device): + def __init__(self, output_path: Path, platform: Platform, bench_op: Op, + device_type: Device, device, + data_type: DataType, dtype): self._base_output_path = output_path - self.output_path = output_path self.platform = platform self.bench_op = bench_op self.device_type = device_type self.device = device - self.dtype = None + self.device_name = get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name() + self.data_type = data_type + self.dtype = dtype + self.info = op_infos[bench_op] - def set_output_path(self, device: Device, device_name: str): self.output_path = ( - self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value) + self._base_output_path / f'{self.device_type.value}_{self.device_name}' + / self.platform.value / self.bench_op.value) # noqa - def get_dtype(self, data_type: DataType): + def pre_experiment(self, _experiment_args): + pass + + def experiment(self): raise NotImplementedError() - def experiment(self, _experiment_args, _length, _dtype, _device): - raise NotImplementedError() - - def name(self, _experiment_args) -> str: - raise NotImplementedError() - - def mop(self, _experiment_args) -> float: - raise NotImplementedError() - - def run(self, experiment_args, experiment_count: int, data_type: DataType): - self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name()) + def post_experiment(self): + pass + def run(self, experiment_args, experiment_count: int): if not self.output_path.exists(): self.output_path.mkdir(parents=True) - dtype = self.get_dtype(data_type) - - print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}') + print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark' + f' with data type: {self.data_type.value}') experiment_names = [] experiment_lengths = [] experiment_times = [] experiment_mop = [] for args in experiment_args: + self.pre_experiment(args) + # warmup - for _ in range(4): - self.experiment(args, 5, dtype, self.device) + for _ in range(20): + self.experiment() # speed evalutaion counter = 0 start_time = time.time() - while time.time() - start_time < 0.2: - self.experiment(args, 10, dtype, self.device) - counter += 10 + while (time.time() - start_time) < (Config.EXPERIMENT_TIME / 5): + self.experiment() + counter += 1 end_time = time.time() - target_time = 1.0 # in s + target_time = Config.EXPERIMENT_TIME # in s experiment_speed = counter / (end_time - start_time) # in op/s experiment_length = max(int(target_time / experiment_count * experiment_speed), 2) # print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s' @@ -73,24 +75,28 @@ class BenchBase(): run_times = [] for _ in range(experiment_count): start_time = time.time() - self.experiment(args, experiment_length, dtype, self.device) + for _ in range(experiment_length): + self.experiment() run_times.append(time.time() - start_time) experiment_times += run_times - experiment_names += [self.name(args)] * experiment_count + experiment_names += [self.info.name(args)] * experiment_count experiment_lengths += [experiment_length] * experiment_count - experiment_mop += [self.mop(args)] * experiment_count + experiment_mop += [self.info.mop(args)] * experiment_count total_time = np.array(run_times, dtype=np.float64).sum() - total_glop = self.mop(args) * experiment_length * experiment_count / 1000 + total_glop = self.info.mop(args) * experiment_length * experiment_count / 1000 print(f'Run {experiment_names[-1]} (x{experiment_length})' f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS') + self.post_experiment() - data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type) - plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform) + data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop) + # Avoid circular import + from src.plot import plot_experiments # pylint: disable=import-outside-toplevel + plot_experiments(self, data) def save_experiments( - self, experiment_names: list[str], experiment_times: list[float], - experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame: + self, experiment_names: List[str], experiment_times: List[float], + experiment_lengths: List[int], experiment_mop: List[float]) -> pd.DataFrame: key = DataKey(self.bench_op) data = pd.DataFrame( { @@ -102,5 +108,5 @@ class BenchBase(): key.gflops: [(mop * l) / (t * 1000.0) for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)] }) - data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t') + data.to_csv(self.output_path / f'{self.bench_op.value}_{self.data_type.value}.csv', sep='\t') return data diff --git a/src/common.py b/src/common.py index a487e9c..56b6389 100644 --- a/src/common.py +++ b/src/common.py @@ -13,16 +13,19 @@ class DataType(Enum): class Op(Enum): - NO_OP = 'noop' ADD = 'add' DIV = 'div' MUL = 'mul' MATMUL = 'matmul' + NN_MATMUL = 'nn_matmul' + NN_DENSE = 'nn_dense' class Platform(Enum): - TF1 = 'TF1' + JAX = 'jax' + # TF1 = 'TF1' TF2 = 'TF2' + TF2_V1 = 'TF2_V1' TORCH = 'Torch' diff --git a/src/jax/add.py b/src/jax/add.py new file mode 100644 index 0000000..9fe0596 --- /dev/null +++ b/src/jax/add.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Tuple + +from jax import device_put +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +class JaxAddBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.ADD, data_type) + self.tensor_1: jnp.DeviceArray = None + self.tensor_2: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1 = experiment_args + self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready() + + def experiment(self): + self.tensor_result = jnp.add(self.tensor_1, self.tensor_2).block_until_ready() diff --git a/src/jax/base.py b/src/jax/base.py new file mode 100644 index 0000000..c65eec9 --- /dev/null +++ b/src/jax/base.py @@ -0,0 +1,34 @@ +from pathlib import Path + +import jax.numpy as jnp +import jax + +from src.base import BenchBase +from src.common import DataType, Device, Op, Platform + + +class JaxBase(BenchBase): + def __init__(self, output_path: Path, bench_op: Op, data_type: DataType): + gpu_devices = jax.devices('gpu') + if gpu_devices: + if len(gpu_devices) > 1: + print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') + device_type = Device.GPU + device = gpu_devices[0] + else: + device_type = Device.CPU + device = jax.devices('cpu')[0] + + if data_type == DataType.FLOAT16: + dtype = jnp.float16 + elif data_type == DataType.FLOAT32: + dtype = jnp.float32 + elif data_type == DataType.FLOAT64: + dtype = jnp.float64 + else: + raise NotImplementedError(f'data_type {data_type.value} not implemented') + + super().__init__(output_path, Platform.JAX, bench_op, device_type, device, data_type, dtype) + + def experiment(self): + raise NotImplementedError() diff --git a/src/jax/div.py b/src/jax/div.py new file mode 100644 index 0000000..23a6ad8 --- /dev/null +++ b/src/jax/div.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Tuple + +from jax import device_put +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +class JaxDivBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.DIV, data_type) + self.tensor_1: jnp.DeviceArray = None + self.tensor_2: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1 = experiment_args + self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready() + + def experiment(self): + self.tensor_result = jnp.divide(self.tensor_1, self.tensor_2).block_until_ready() diff --git a/src/jax/matmul.py b/src/jax/matmul.py new file mode 100644 index 0000000..82befef --- /dev/null +++ b/src/jax/matmul.py @@ -0,0 +1,28 @@ +from pathlib import Path +from typing import List, Tuple + +from jax import device_put +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +class JaxMatmulBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MATMUL, data_type) + self.tensor_1: jnp.DeviceArray = None + self.tensor_2: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1, shape_2 = experiment_args + self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype)) + self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready() + + def experiment(self): + self.tensor_result = jnp.matmul(self.tensor_1, self.tensor_2).block_until_ready() + + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/jax/mul.py b/src/jax/mul.py new file mode 100644 index 0000000..92e5f33 --- /dev/null +++ b/src/jax/mul.py @@ -0,0 +1,25 @@ +from pathlib import Path +from typing import Tuple + +from jax import device_put +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +class JaxMulBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MUL, data_type) + self.tensor_1: jnp.DeviceArray = None + self.tensor_2: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1 = experiment_args + self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_2 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready() + + def experiment(self): + self.tensor_result = jnp.multiply(self.tensor_1, self.tensor_2).block_until_ready() diff --git a/src/jax/nn_dense.py b/src/jax/nn_dense.py new file mode 100644 index 0000000..86e1ecb --- /dev/null +++ b/src/jax/nn_dense.py @@ -0,0 +1,32 @@ +from pathlib import Path +from typing import Callable, List, Tuple + +from jax import device_put, jit, random +from jax.experimental import stax +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +class JaxNNDenseBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_DENSE, data_type) + self.tensor: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + self.network: Callable = None + self.params = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + batch_size, dimension = experiment_args + self.tensor = device_put(jnp.ones((batch_size, dimension), dtype=self.dtype)) + network_init, self.network = stax.Dense(dimension) + _, self.params = network_init(random.PRNGKey(1), (batch_size, dimension)) + self.network = jit(self.network) + self.tensor_result = self.network(self.params, self.tensor) + + def experiment(self): + self.tensor_result = self.network(self.params, self.tensor) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/jax/nn_matmul.py b/src/jax/nn_matmul.py new file mode 100644 index 0000000..85f50b9 --- /dev/null +++ b/src/jax/nn_matmul.py @@ -0,0 +1,33 @@ +from pathlib import Path +from typing import List, Tuple + +from jax import device_put, jit +import jax.numpy as jnp + +from src.common import DataType, Op +from src.jax.base import JaxBase + + +def matmul(tensor_1: jnp.DeviceArray, tensor_2: jnp.DeviceArray) -> jnp.DeviceArray: + return tensor_1 @ tensor_2 + + +class JaxNNMatmulBench(JaxBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_MATMUL, data_type) + self.tensor_1: jnp.DeviceArray = None + self.tensor_2: jnp.DeviceArray = None + self.tensor_result: jnp.DeviceArray = None + self.network = jit(matmul) + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1, shape_2 = experiment_args + self.tensor_1 = device_put(jnp.ones(shape_1, dtype=self.dtype)) + self.tensor_2 = device_put(jnp.ones(shape_2, dtype=self.dtype)) + self.tensor_result = self.network(self.tensor_1, self.tensor_2) + + def experiment(self): + self.tensor_result = self.network(self.tensor_1, self.tensor_2) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/jax/ops.py b/src/jax/ops.py new file mode 100644 index 0000000..ad926f9 --- /dev/null +++ b/src/jax/ops.py @@ -0,0 +1,20 @@ +from typing import Type + +from src.common import Op +from src.jax.add import JaxAddBench +from src.jax.base import JaxBase +from src.jax.div import JaxDivBench +from src.jax.mul import JaxMulBench +from src.jax.matmul import JaxMatmulBench +from src.jax.nn_dense import JaxNNDenseBench +from src.jax.nn_matmul import JaxNNMatmulBench + + +jax_ops: dict[Op, Type[JaxBase]] = { + Op.ADD: JaxAddBench, + Op.MUL: JaxMulBench, + Op.DIV: JaxDivBench, + Op.MATMUL: JaxMatmulBench, + Op.NN_MATMUL: JaxNNMatmulBench, + Op.NN_DENSE: JaxNNDenseBench +} diff --git a/src/op_info.py b/src/op_info.py new file mode 100644 index 0000000..c30f7f9 --- /dev/null +++ b/src/op_info.py @@ -0,0 +1,85 @@ +from typing import Dict, List, Type, Tuple + +from src.common import Op + + +class _BaseInfo(): + @staticmethod + def name(experiment_args) -> str: + raise NotImplementedError() + + @staticmethod + def mop(experiment_args) -> float: + raise NotImplementedError() + + +class AddInfo(_BaseInfo): + @staticmethod + def name(experiment_args: Tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}' + + @staticmethod + def mop(experiment_args: Tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1_000_000 + + +class DivInfo(_BaseInfo): + @staticmethod + def name(experiment_args: Tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}' + + @staticmethod + def mop(experiment_args: Tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1_000_000 + + +class MulInfo(_BaseInfo): + @staticmethod + def name(experiment_args: Tuple[int, int]) -> str: + shape_1 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}' + + @staticmethod + def mop(experiment_args: Tuple[int, int]) -> float: + shape_1 = experiment_args + return shape_1[0] * shape_1[1] / 1_000_000 + + +class MatmulInfo(_BaseInfo): + @staticmethod + def name(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> str: + shape_1, shape_2 = experiment_args + return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}' + + @staticmethod + def mop(experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]]) -> float: + shape_1, shape_2 = experiment_args + return (shape_1[0] * shape_2[1] / 1_000_000) * 2 * (shape_1[1] - 1) + + +class DenseInfo(_BaseInfo): + @staticmethod + def name(experiment_args: Tuple[int, int]) -> str: + batch_size, dimension = experiment_args + return f'Dense(({batch_size}x{dimension}))' + + @staticmethod + def mop(experiment_args: Tuple[int, int]) -> float: + batch_size, dimension = experiment_args + return batch_size * ( + ((dimension * dimension / 1_000_000) * 2 * (dimension - 1)) + ( + dimension / 1_000_000)) + + +op_infos: Dict[Op, Type[_BaseInfo]] = { + Op.ADD: AddInfo, + Op.DIV: DivInfo, + Op.MUL: MulInfo, + Op.MATMUL: MatmulInfo, + Op.NN_MATMUL: MatmulInfo, + Op.NN_DENSE: DenseInfo +} diff --git a/src/plot.py b/src/plot.py index f4cf907..7d71346 100644 --- a/src/plot.py +++ b/src/plot.py @@ -1,4 +1,7 @@ from pathlib import Path +import math +import multiprocessing as mp +import os import numpy as np import matplotlib.pyplot as plt @@ -6,11 +9,20 @@ import pandas as pd import seaborn as sns +from src.base import BenchBase from src.common import DataKey, DataType, Op, Platform -def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform): - key = DataKey(bench_op) +class CompKey: + def __init__(self): + self.data_type = 'data_type' + self.device = 'device' + self.bench_op = 'op' + self.platform = 'platform' + + +def plot_experiments(bench: BenchBase, data: pd.DataFrame): + key = DataKey(bench.bench_op) sum_data = data[[key.experiment, key.time, key.count]].groupby( key.experiment, as_index=False, sort=False).sum() mean_data = data[[key.experiment, key.speed]].groupby( @@ -20,12 +32,12 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, sns.set_theme(style="ticks") figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12)) - figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16) for axe in axes[:-1]: axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique()) - axes[0].set_yscale("log") + if max_data[key.mop].max() > max_data[key.mop].min() * 100: + axes[0].set_yscale("log") for patch, value in zip(chart.patches, max_data[key.mop]): chart.annotate(f'{value:0.3f}', (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), @@ -33,6 +45,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, textcoords='offset points') chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1]) + if data[key.speed].max() > data[key.speed].min() * 100: + axes[1].set_yscale("log") for patch, value in zip(chart.patches, mean_data[key.speed]): chart.annotate(f'{value:.3f}', (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), @@ -40,6 +54,8 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, textcoords='offset points') chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2]) + if data[key.gflops].max() > data[key.gflops].min() * 100: + axes[2].set_yscale("log") for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]): chart.annotate(f'{(mop * count / 1000) / value:.3f}', (patch.get_x() + patch.get_width() / 2.0, patch.get_height()), @@ -47,5 +63,62 @@ def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, textcoords='offset points') plt.xticks(rotation=20) - plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05) - plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png') + plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.91, right=0.99, bottom=0.1, left=0.05) + figure.suptitle(f'{bench.platform.value} {bench.bench_op.value} ({bench.data_type.value})', fontsize=16) + axes[0].set_title(f'{bench.device_name}', fontsize=12) + plt.savefig(bench.output_path / f'{bench.bench_op.value}_{bench.data_type.value}.png') + + +def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path): + op_data = all_data[(all_data[comp_key.bench_op] == bench_op) & (all_data[comp_key.device] == device)] + platform_list = op_data[comp_key.platform].unique() + if len(platform_list) <= 1: + return + + key = DataKey(Op(bench_op)) + + sns.set_theme(style="ticks") + for data_type in op_data[comp_key.data_type].unique(): + data = op_data[op_data[comp_key.data_type] == data_type] + graph = sns.catplot(x=key.experiment, y=key.gflops, hue=comp_key.platform, data=data, + kind='bar', estimator=np.median, height=8, aspect=1.4) + if data[key.gflops].max() > data[key.gflops].min() * 100: + graph.set(yscale="log") + plt.xticks(rotation=70, fontsize=8) + plt.subplots_adjust(top=0.92, bottom=0.25) + plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16) + plt.title(f'{device}', fontsize=12) + plt.savefig(output_path / device / f'{bench_op}_{data_type}.png') + + +def compare(output_path: Path): + all_data: pd.DataFrame = None + comp_key = CompKey() + + for data_path in output_path.rglob('*.csv'): + if len(data_path.parents) <= 4: + print(f'Warning: cannot parse data at path {data_path} (subfolders missing)') + data_type = DataType(data_path.stem.split('_')[-1]) + bench_op = Op(data_path.parents[0].name) + platform = Platform(data_path.parents[1].name) + device_name = data_path.parents[2].name + + current_data = pd.read_csv(data_path, sep='\t') + current_data[comp_key.data_type] = data_type.value + current_data[comp_key.bench_op] = bench_op.value + current_data[comp_key.platform] = platform.value + current_data[comp_key.device] = device_name + + if all_data is None: + all_data = current_data + else: + all_data = all_data.append(current_data, ignore_index=True, verify_integrity=True) + + # Compare between platforms + comp_args = [] + for device in all_data[comp_key.device].unique(): + for bench_op in all_data[comp_key.bench_op].unique(): + comp_args.append((all_data, comp_key, device, bench_op, output_path)) + + with mp.Pool(processes=math.ceil(os.cpu_count() * 0.8)) as pool: + pool.starmap(_draw_comparison, comp_args) diff --git a/src/pytorch/add.py b/src/pytorch/add.py index 08f6b84..b69e2c5 100644 --- a/src/pytorch/add.py +++ b/src/pytorch/add.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import torch @@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase class TorchAddBench(TorchBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.ADD) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.ADD, data_type) + self.tensor_1: torch.Tensor = None + self.tensor_2: torch.Tensor = None + self.tensor_result: torch.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) - tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_result = self.tensor_1 + self.tensor_2 - for _ in range(length): - _ = tensor_1 + tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 + self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/base.py b/src/pytorch/base.py index 335042d..289de6a 100644 --- a/src/pytorch/base.py +++ b/src/pytorch/base.py @@ -7,7 +7,7 @@ from src.common import DataType, Device, Op, Platform class TorchBase(BenchBase): - def __init__(self, output_path: Path, bench_op: Op): + def __init__(self, output_path: Path, bench_op: Op, data_type: DataType): if torch.cuda.is_available(): if torch.cuda.device_count() > 1: print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') @@ -18,22 +18,16 @@ class TorchBase(BenchBase): device_type = Device.CPU device = torch.device('cpu') - super().__init__(output_path, Platform.TORCH, bench_op, device_type, device) - - def get_dtype(self, data_type: DataType) -> torch.dtype: if data_type == DataType.FLOAT16: - return torch.float16 - if data_type == DataType.FLOAT32: - return torch.float32 - if data_type == DataType.FLOAT64: - return torch.float64 - raise NotImplementedError(f'data_type {data_type.value} not implemented') + dtype = torch.float16 + elif data_type == DataType.FLOAT32: + dtype = torch.float32 + elif data_type == DataType.FLOAT64: + dtype = torch.float64 + else: + raise NotImplementedError(f'data_type {data_type.value} not implemented') - def experiment(self, _experiment_args, _length, _dtype, _device): - raise NotImplementedError() + super().__init__(output_path, Platform.TORCH, bench_op, device_type, device, data_type, dtype) - def name(self, _experiment_args) -> str: - raise NotImplementedError() - - def mop(self, _experiment_args) -> float: + def experiment(self): raise NotImplementedError() diff --git a/src/pytorch/div.py b/src/pytorch/div.py index 9a0b309..d954bd8 100644 --- a/src/pytorch/div.py +++ b/src/pytorch/div.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import torch @@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase class TorchDivBench(TorchBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.DIV) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.DIV, data_type) + self.tensor_1: torch.Tensor = None + self.tensor_2: torch.Tensor = None + self.tensor_result: torch.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) - tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_result = self.tensor_1 / self.tensor_2 - for _ in range(length): - _ = tensor_1 / tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 / self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/matmul.py b/src/pytorch/matmul.py index c40c261..9e27a50 100644 --- a/src/pytorch/matmul.py +++ b/src/pytorch/matmul.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import torch @@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase class TorchMatmulBench(TorchBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.MATMUL) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MATMUL, data_type) + self.tensor_1: torch.Tensor = None + self.tensor_2: torch.Tensor = None + self.tensor_result: torch.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1, shape_2 = experiment_args - tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) - tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False) + self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_result = self.tensor_1 @ self.tensor_2 - for _ in range(length): - _ = tensor_1 @ tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 @ self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1, shape_2 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1, shape_2 = experiment_args - return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1) - - def run(self, - experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/mul.py b/src/pytorch/mul.py index 7208a6d..3830e93 100644 --- a/src/pytorch/mul.py +++ b/src/pytorch/mul.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import torch @@ -7,27 +8,20 @@ from src.pytorch.base import TorchBase class TorchMulBench(TorchBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.MUL) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MUL, data_type) + self.tensor_1: torch.Tensor = None + self.tensor_2: torch.Tensor = None + self.tensor_result: torch.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) - tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False) + self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_2 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_result = self.tensor_1 * self.tensor_2 - for _ in range(length): - _ = tensor_1 * tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 * self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/nn_dense.py b/src/pytorch/nn_dense.py new file mode 100644 index 0000000..b65fb99 --- /dev/null +++ b/src/pytorch/nn_dense.py @@ -0,0 +1,36 @@ +from pathlib import Path +from typing import List, Tuple + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class DenseNetwork(torch.nn.Module): + def __init__(self, input_dim: int, dtype: torch.dtype): + super().__init__() + self.dense = torch.nn.Linear(input_dim, input_dim, dtype=dtype) + + def forward(self, input_data: torch.Tensor) -> torch.Tensor: + return self.dense(input_data) + + +class TorchNNDenseBench(TorchBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_DENSE, data_type) + self.tensor: torch.Tensor = None + self.tensor_result: torch.Tensor = None + self.network: torch.nn.Module = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + batch_size, dimension = experiment_args + self.tensor = torch.ones((batch_size, dimension), dtype=self.dtype, device=self.device, requires_grad=False) + self.network = DenseNetwork(dimension, self.dtype).to(self.device) + self.tensor_result = self.network(self.tensor) + + def experiment(self): + self.tensor_result = self.network(self.tensor) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/nn_matmul.py b/src/pytorch/nn_matmul.py new file mode 100644 index 0000000..7ff33e4 --- /dev/null +++ b/src/pytorch/nn_matmul.py @@ -0,0 +1,34 @@ +from pathlib import Path +from typing import List, Tuple + +import torch + +from src.common import DataType, Op +from src.pytorch.base import TorchBase + + +class MatMulNetwork(torch.nn.Module): + def forward(self, input_1: torch.Tensor, input_2: torch.Tensor) -> torch.Tensor: + return input_1 @ input_2 + + +class TorchNNMatmulBench(TorchBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_MATMUL, data_type) + self.tensor_1: torch.Tensor = None + self.tensor_2: torch.Tensor = None + self.tensor_result: torch.Tensor = None + self.network: torch.nn.Module = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1, shape_2 = experiment_args + self.tensor_1 = torch.ones(shape_1, dtype=self.dtype, device=self.device, requires_grad=False) + self.tensor_2 = torch.ones(shape_2, dtype=self.dtype, device=self.device, requires_grad=False) + self.network = MatMulNetwork() + self.tensor_result = self.network(self.tensor_1, self.tensor_2) + + def experiment(self): + self.tensor_result = self.network(self.tensor_1, self.tensor_2) + + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/pytorch/ops.py b/src/pytorch/ops.py index 22e4e96..20e9157 100644 --- a/src/pytorch/ops.py +++ b/src/pytorch/ops.py @@ -6,11 +6,15 @@ from src.pytorch.base import TorchBase from src.pytorch.div import TorchDivBench from src.pytorch.mul import TorchMulBench from src.pytorch.matmul import TorchMatmulBench +from src.pytorch.nn_dense import TorchNNDenseBench +from src.pytorch.nn_matmul import TorchNNMatmulBench torch_ops: dict[Op, Type[TorchBase]] = { Op.ADD: TorchAddBench, Op.MUL: TorchMulBench, Op.DIV: TorchDivBench, - Op.MATMUL: TorchMatmulBench + Op.MATMUL: TorchMatmulBench, + Op.NN_MATMUL: TorchNNMatmulBench, + Op.NN_DENSE: TorchNNDenseBench } diff --git a/src/tf_2/add.py b/src/tf_2/add.py index 7850157..b7691b6 100644 --- a/src/tf_2/add.py +++ b/src/tf_2/add.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import tensorflow as tf @@ -7,28 +8,21 @@ from src.tf_2.base import TFBase class TFAddBench(TFBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.ADD) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.ADD, data_type) + self.tensor_1: tf.Tensor = None + self.tensor_2: tf.Tensor = None + self.tensor_result: tf.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - with device: - tensor_1 = tf.ones(shape_1, dtype=dtype) - tensor_2 = tf.ones(shape_1, dtype=dtype) + with self.device: + self.tensor_1 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_2 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_result = self.tensor_1 + self.tensor_2 - for _ in range(length): - _ = tensor_1 + tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 + self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/base.py b/src/tf_2/base.py index 808ee45..337b808 100644 --- a/src/tf_2/base.py +++ b/src/tf_2/base.py @@ -7,13 +7,13 @@ from src.common import DataType, Device, Op, Platform class TFBase(BenchBase): - def __init__(self, output_path: Path, bench_op: Op): + def __init__(self, output_path: Path, bench_op: Op, data_type: DataType): gpus = tf.config.list_physical_devices('GPU') if gpus: if len(gpus) > 1: print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)') - tf.config.experimental.set_memory_growth(gpus[0], True) + # tf.config.experimental.set_memory_growth(gpus[0], True) tf.config.set_visible_devices(gpus[0], 'GPU') # logical_gpus = tf.config.list_logical_devices('GPU') device_type = Device.GPU @@ -22,22 +22,16 @@ class TFBase(BenchBase): device_type = Device.CPU device = tf.device('/CPU:0') - super().__init__(output_path, Platform.TF2, bench_op, device_type, device) - - def get_dtype(self, data_type: DataType) -> tf.DType: if data_type == DataType.FLOAT16: - return tf.float16 - if data_type == DataType.FLOAT32: - return tf.float32 - if data_type == DataType.FLOAT64: - return tf.float64 - raise RuntimeError(f'data_type {data_type.value} not implemented') + dtype = tf.float16 + elif data_type == DataType.FLOAT32: + dtype = tf.float32 + elif data_type == DataType.FLOAT64: + dtype = tf.float64 + else: + raise RuntimeError(f'data_type {data_type.value} not implemented') - def experiment(self, _experiment_args, _length, _dtype, _device): - raise NotImplementedError() + super().__init__(output_path, Platform.TF2, bench_op, device_type, device, data_type, dtype) - def name(self, _experiment_args) -> str: - raise NotImplementedError() - - def mop(self, _experiment_args) -> float: + def experiment(self): raise NotImplementedError() diff --git a/src/tf_2/div.py b/src/tf_2/div.py index 21dd9b4..cac5023 100644 --- a/src/tf_2/div.py +++ b/src/tf_2/div.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import tensorflow as tf @@ -7,28 +8,21 @@ from src.tf_2.base import TFBase class TFDivBench(TFBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.DIV) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.DIV, data_type) + self.tensor_1: tf.Tensor = None + self.tensor_2: tf.Tensor = None + self.tensor_result: tf.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - with device: - tensor_1 = tf.ones(shape_1, dtype=dtype) - tensor_2 = tf.ones(shape_1, dtype=dtype) + with self.device: + self.tensor_1 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_2 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_result = self.tensor_1 / self.tensor_2 - for _ in range(length): - _ = tensor_1 / tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 / self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/matmul.py b/src/tf_2/matmul.py index 70308b3..0c9bf0f 100644 --- a/src/tf_2/matmul.py +++ b/src/tf_2/matmul.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import tensorflow as tf @@ -7,28 +8,21 @@ from src.tf_2.base import TFBase class TFMatmulBench(TFBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.MATMUL) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MATMUL, data_type) + self.tensor_1: tf.Tensor = None + self.tensor_2: tf.Tensor = None + self.tensor_result: tf.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1, shape_2 = experiment_args - with device: - tensor_1 = tf.ones(shape_1, dtype=dtype) - tensor_2 = tf.ones(shape_2, dtype=dtype) + with self.device: + self.tensor_1 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_2 = tf.ones(shape_2, dtype=self.dtype) + self.tensor_result = self.tensor_1 @ self.tensor_2 - for _ in range(length): - _ = tensor_1 @ tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 @ self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1, shape_2 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1, shape_2 = experiment_args - return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1) - - def run(self, - experiment_args: list[tuple[tuple[int, int], tuple[int, int]]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/mul.py b/src/tf_2/mul.py index 12ca880..70360db 100644 --- a/src/tf_2/mul.py +++ b/src/tf_2/mul.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import List, Tuple import tensorflow as tf @@ -7,28 +8,21 @@ from src.tf_2.base import TFBase class TFMulBench(TFBase): - def __init__(self, output_path: Path): - super().__init__(output_path, Op.MUL) + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MUL, data_type) + self.tensor_1: tf.Tensor = None + self.tensor_2: tf.Tensor = None + self.tensor_result: tf.Tensor = None - def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device): + def pre_experiment(self, experiment_args: Tuple[int, int]): shape_1 = experiment_args - with device: - tensor_1 = tf.ones(shape_1, dtype=dtype) - tensor_2 = tf.ones(shape_1, dtype=dtype) + with self.device: + self.tensor_1 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_2 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_result = self.tensor_1 * self.tensor_2 - for _ in range(length): - _ = tensor_1 * tensor_2 + def experiment(self): + self.tensor_result = self.tensor_1 * self.tensor_2 - def name(self, experiment_args: tuple[int, int]) -> str: - shape_1 = experiment_args - return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}' - - def mop(self, experiment_args: tuple[int, int]) -> float: - shape_1 = experiment_args - return shape_1[0] * shape_1[1] / 1000_000 - - def run(self, - experiment_args: list[tuple[int, int]], - experiment_count: int, - data_type: DataType): - super().run(experiment_args, experiment_count, data_type) + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/nn_dense.py b/src/tf_2/nn_dense.py new file mode 100644 index 0000000..bee39b2 --- /dev/null +++ b/src/tf_2/nn_dense.py @@ -0,0 +1,35 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class DenseModel(tf.keras.Model): + def __init__(self, input_dim: int, dtype=tf.DType): + super().__init__() + self.dense = tf.keras.layers.Dense(input_dim, dtype=dtype) + + def call(self, input_tensor: tf.Tensor) -> tf.Tensor: + return self.dense(input_tensor) + + +class TFNNDenseBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_DENSE, data_type) + self.tensor: tf.Tensor = None + self.network: tf.keras.Model = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + batch_size, dimension = experiment_args + with self.device: + self.tensor = tf.ones((batch_size, dimension), dtype=self.dtype) + self.network = DenseModel(dimension, self.dtype) + + def experiment(self): + self.network(self.tensor) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/nn_matmul.py b/src/tf_2/nn_matmul.py new file mode 100644 index 0000000..53de941 --- /dev/null +++ b/src/tf_2/nn_matmul.py @@ -0,0 +1,34 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow as tf + +from src.common import DataType, Op +from src.tf_2.base import TFBase + + +class MatmulModel(tf.keras.Model): + def call(self, tensor_1: tf.Tensor, tensor_2: tf.Tensor) -> tf.Tensor: + return tf.matmul(tensor_1, tensor_2) + + +class TFNNMatmulBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.NN_MATMUL, data_type) + self.tensor_1: tf.Tensor = None + self.tensor_2: tf.Tensor = None + self.tensor_result: tf.Tensor = None + self.network: tf.keras.Model = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + shape_1, shape_2 = experiment_args + with self.device: + self.tensor_1 = tf.ones(shape_1, dtype=self.dtype) + self.tensor_2 = tf.ones(shape_2, dtype=self.dtype) + self.network = MatmulModel() + + def experiment(self): + self.tensor_result = self.network(self.tensor_1, self.tensor_2) + + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2/ops.py b/src/tf_2/ops.py index 7c3d12a..88f89c7 100644 --- a/src/tf_2/ops.py +++ b/src/tf_2/ops.py @@ -6,11 +6,15 @@ from src.tf_2.base import TFBase from src.tf_2.div import TFDivBench from src.tf_2.mul import TFMulBench from src.tf_2.matmul import TFMatmulBench +from src.tf_2.nn_dense import TFNNDenseBench +from src.tf_2.nn_matmul import TFNNMatmulBench tf2_ops: dict[Op, Type[TFBase]] = { Op.ADD: TFAddBench, Op.MUL: TFMulBench, Op.DIV: TFDivBench, - Op.MATMUL: TFMatmulBench + Op.MATMUL: TFMatmulBench, + Op.NN_MATMUL: TFNNMatmulBench, + Op.NN_DENSE: TFNNDenseBench } diff --git a/src/tf_2_v1/add.py b/src/tf_2_v1/add.py new file mode 100644 index 0000000..d29594b --- /dev/null +++ b/src/tf_2_v1/add.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow.compat.v1 as tf + +from src.common import DataType, Op +from src.tf_2_v1.base import TFBase + + +class TFAddBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.ADD, data_type) + self.add_op = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + super().pre_experiment(experiment_args) + shape_1 = experiment_args + tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + self.add_op = tensor_1 + tensor_2 + + self.session.run(tf.initializers.global_variables()) + + def experiment(self): + self.session.run(self.add_op) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2_v1/base.py b/src/tf_2_v1/base.py new file mode 100644 index 0000000..c2ce4b5 --- /dev/null +++ b/src/tf_2_v1/base.py @@ -0,0 +1,43 @@ +from pathlib import Path + +import tensorflow.compat.v1 as tf + +from src.base import BenchBase +from src.common import DataType, Device, Op, Platform + + +class TFBase(BenchBase): + def __init__(self, output_path: Path, bench_op: Op, data_type: DataType): + if data_type == DataType.FLOAT16: + dtype = tf.float16 + elif data_type == DataType.FLOAT32: + dtype = tf.float32 + elif data_type == DataType.FLOAT64: + dtype = tf.float64 + else: + raise RuntimeError(f'data_type {data_type.value} not implemented') + + super().__init__(output_path, Platform.TF2_V1, bench_op, Device.GPU, None, data_type, dtype) + self.session: tf.Session = None + + def pre_experiment(self, _experiment_args): + tf.disable_v2_behavior() + # tf.disable_eager_execution() + # gpu_options = tf.GPUOptions(allow_growth=True) + # session_config = tf.ConfigProto(gpu_options=gpu_options) + # self.session = tf.Session(config=session_config) + self.session = tf.Session() + self.session.as_default() + + def post_experiment(self): + self.session.close() + tf.reset_default_graph() + + def experiment(self): + raise NotImplementedError() + + def name(self, _experiment_args) -> str: + raise NotImplementedError() + + def mop(self, _experiment_args) -> float: + raise NotImplementedError() diff --git a/src/tf_2_v1/div.py b/src/tf_2_v1/div.py new file mode 100644 index 0000000..c39b9cb --- /dev/null +++ b/src/tf_2_v1/div.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow.compat.v1 as tf + +from src.common import DataType, Op +from src.tf_2_v1.base import TFBase + + +class TFDivBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.DIV, data_type) + self.div_op = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + super().pre_experiment(experiment_args) + shape_1 = experiment_args + tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + self.div_op = tensor_1 / tensor_2 + + self.session.run(tf.initializers.global_variables()) + + def experiment(self): + self.session.run(self.div_op) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2_v1/matmul.py b/src/tf_2_v1/matmul.py new file mode 100644 index 0000000..38a16a4 --- /dev/null +++ b/src/tf_2_v1/matmul.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow.compat.v1 as tf + +from src.common import DataType, Op +from src.tf_2_v1.base import TFBase + + +class TFMatmulBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MATMUL, data_type) + self.matmul_op = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + super().pre_experiment(experiment_args) + shape_1, shape_2 = experiment_args + tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + tensor_2 = tf.get_variable('tensor_2', shape=shape_2, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + self.matmul_op = tensor_1 @ tensor_2 + + self.session.run(tf.initializers.global_variables()) + + def experiment(self): + self.session.run(self.matmul_op) + + def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2_v1/mul.py b/src/tf_2_v1/mul.py new file mode 100644 index 0000000..ff58833 --- /dev/null +++ b/src/tf_2_v1/mul.py @@ -0,0 +1,30 @@ +from pathlib import Path +from typing import List, Tuple + +import tensorflow.compat.v1 as tf + +from src.common import DataType, Op +from src.tf_2_v1.base import TFBase + + +class TFMulBench(TFBase): + def __init__(self, output_path: Path, data_type: DataType): + super().__init__(output_path, Op.MUL, data_type) + self.mul_op = None + + def pre_experiment(self, experiment_args: Tuple[int, int]): + super().pre_experiment(experiment_args) + shape_1 = experiment_args + tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype, + initializer=tf.initializers.ones, trainable=False) + self.mul_op = tensor_1 * tensor_2 + + self.session.run(tf.initializers.global_variables()) + + def experiment(self): + self.session.run(self.mul_op) + + def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int): + super().run(experiment_args, experiment_count) diff --git a/src/tf_2_v1/ops.py b/src/tf_2_v1/ops.py new file mode 100644 index 0000000..d409e3f --- /dev/null +++ b/src/tf_2_v1/ops.py @@ -0,0 +1,16 @@ +from typing import Type + +from src.common import Op +from src.tf_2_v1.add import TFAddBench +from src.tf_2_v1.base import TFBase +from src.tf_2_v1.div import TFDivBench +from src.tf_2_v1.mul import TFMulBench +from src.tf_2_v1.matmul import TFMatmulBench + + +tf2v1_ops: dict[Op, Type[TFBase]] = { + Op.ADD: TFAddBench, + Op.MUL: TFMulBench, + Op.DIV: TFDivBench, + Op.MATMUL: TFMatmulBench +}