Add TF1 platform, docker and README

2021-10-05 10:51:18 +09:00 · 2021-10-05 10:51:18 +09:00 · dbe5490c5b
commit dbe5490c5b
parent 16b7239cd7
28 changed files with 655 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,84 @@
+# Deep Learning Benchmarks
+
+## Dependencies
+
+* **python** : version 3.6 or newer (tested on 3.6 and 3.9)
+
+Python packages, can be installed with `pip` (see the `--platform` option in the usage section):
+
+* **matplotlib**
+* **seaborn**
+* **tensorflow** : tested with version 2.6 (for `--platform TF2 TF2_V1`) and 1.15 (for `--platform TF1`)
+* **torch** : tested with version 1.9 (for `--platform Torch`)
+* **jaxlib** : tested with version 0.1.71+cuda111 (for `--platform Torch`)
+* **jax** : tested with version 0.2.21 (for `--platform Torch`)
+
+To run with docker:
+
+* **docker**
+* **docker-compose**
+* **libnvidia-container** : for arch (AUR) (required for `nvidia-container-toolkit`)
+* **nvidia-container-toolkit** : for arch (AUR) (required for gpu accelerated docker using `--gpus`)
+* **nvidia-container-runtime** : for arch (AUR) (easier docker-compose integration using `runtime: nvidia`)
+
+See other distribution documentation to get GPU accelerated docker containers.
+
+## Usage
+
+Runing the `benchmark.py` script will run all the possible benchmarks automatically.
+
+Options can be seen using the `--help` or `-h` argument :
+
+```
+usage: benchmark.py [-h] [--output OUTPUT] [--no-benchmark] [--no-compare] [--count COUNT] [--platform [PLATFORM ...]]
+                    [--data [DATA ...]] [--op [OP ...]] [--list-op] [--list-platform] [--list-data]
+                    [--experiment-time EXPERIMENT_TIME]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --output OUTPUT       Path to output files (default: output)
+  --no-benchmark        Avoid running benchmarks
+  --no-compare          Avoid running platform comparaison
+  --count COUNT         Number of experiments per benchmark (for stastistical analysis)
+  --platform [PLATFORM ...]
+                        List of platform to benchmark [TF1, TF2, Torch] (else all are used)
+  --data [DATA ...]     List of data type to benchmark [float16, float32, float64] (else all are used)
+  --op [OP ...]         List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)
+  --list-op             List all possible operation to benchmark (no further action will be done)
+  --list-platform       List all possible platform to benchmark (no further action will be done)
+  --list-data           List all possible data to benchmark (no further action will be done)
+  --experiment-time EXPERIMENT_TIME
+                        Change time (in s) per experiment (default=1.000s)
+```
+
+### Configuration
+
+In the `config/benchmark.py` file there are parameters that can be changed:
+
+* **ExperimentCategory** : Enum class that defines ranges of Mop (Millions of operations) used for comparisons
+* **EXPERIMENT_TIME** : float number to set the amount of time to run each benchmark experiment (in seconds)
+* **[]_ARGS** : each kind of benchmark will be run with specific arguments that can be changed here (usually batch sizes and input shapes)
+
+### Using Docker
+
+There are 3 services to use with `docker-compose` (`tensorflow_v1`, `tensorflow` and `torch`), simply run a service:
+
+```
+docker-compose run --rm torch
+```
+
+Arguments can be passed using the `BENCH_ARGS` environment variable like:
+
+```
+BENCH_ARGS="--op nn_dense_x5 --data float32 --no-compare" docker-compose run --rm tensorflow_v1
+```
+
+
+## Notes
+
+Plots y axis are automatically changed to logarithmic scale if the range is too wide, categorized comparisons are forced to not be in logarithmic scale for clarity (categories are implemented for this reason).
+
+
+## Future
+
+* More operations : Conv1d, Conv2D, DepthWiseConv2D, RNN
--- a/benchmark.py
+++ b/benchmark.py
@ -14,12 +14,26 @@ from src.plot import compare
 def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
                  bench_args, bench_count: int):
    if platform == Platform.JAX:
+        if data_type == DataType.FLOAT64:
+            os.environ['JAX_ENABLE_X64'] = 'true'
        from src.jax.ops import jax_ops
        if bench_op not in jax_ops:
            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
        else:
            jax_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
            print()
+    elif platform == Platform.TF1:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        import tensorflow as tf
+        if tf.__version__.split('.')[0] != '1':
+            print(f'Cannot run benchmark for platform TF1 with tensorflow version: {tf.__version__}')
+            return
+        from src.tf_1.ops import tf1_ops
+        if bench_op not in tf1_ops:
+            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
+        else:
+            tf1_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
+            print()
    elif platform == Platform.TF2:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        from src.tf_2.ops import tf2_ops
@ -30,6 +44,10 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be
            print()
    elif platform == Platform.TF2_V1:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        import tensorflow as tf
+        if tf.__version__.split('.')[0] != '2':
+            print(f'Cannot run benchmark for platform TF2_V1 with tensorflow version: {tf.__version__}')
+            return
        from src.tf_2_v1.ops import tf2v1_ops
        if bench_op not in tf2v1_ops:
            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
@ -49,7 +67,8 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be

 def main():
    parser = ArgumentParser()
-    parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
+    parser.add_argument('--output', type=Path, default=Path('output'),
+                        help='Path to output files (default: output)')
    parser.add_argument('--no-benchmark', action='store_true', default=False, help='Avoid running benchmarks')
    parser.add_argument('--no-compare', action='store_true', default=False, help='Avoid running platform comparaison')
    parser.add_argument('--count', type=int, default=30,
@ -62,6 +81,10 @@ def main():
                        help='List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)')
    parser.add_argument('--list-op', action='store_true',
                        help='List all possible operation to benchmark (no further action will be done)')
+    parser.add_argument('--list-platform', action='store_true',
+                        help='List all possible platform to benchmark (no further action will be done)')
+    parser.add_argument('--list-data', action='store_true',
+                        help='List all possible data to benchmark (no further action will be done)')
    parser.add_argument(
        '--experiment-time', type=float,
        help=f'Change time (in s) per experiment (default={Config.EXPERIMENT_TIME:0.3f}s)')
@ -70,6 +93,12 @@ def main():
    if arguments.list_op:
        print(', '.join([op.value for op in Op]))
        sys.exit(0)
+    if arguments.list_platform:
+        print(', '.join([p.value for p in Platform]))
+        sys.exit(0)
+    if arguments.list_data:
+        print(', '.join([d.value for d in DataType]))
+        sys.exit(0)

    output_path: Path = arguments.output
    no_benchmark: bool = arguments.no_benchmark
@ -91,13 +120,16 @@ def main():
            for data_type in data:
                for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
                    if bench_op in bench_ops:
-                        benchmarks.append((output_path, platform, data_type, bench_op,
-                                           Config.ELEMENT_WISE_ARGS, bench_count))
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.ELEMENT_WISE_ARGS, bench_count))
                for bench_op in [Op.MATMUL, Op.NN_MATMUL]:
                    if bench_op in bench_ops:
-                        benchmarks.append((output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
-                if Op.NN_DENSE in bench_ops:
-                    benchmarks.append((output_path, platform, data_type, Op.NN_DENSE, Config.NN_1D_ARGS, bench_count))
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
+                for bench_op in [Op.NN_DENSE, Op.NN_DENSE_X5]:
+                    if bench_op in bench_ops:
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.NN_1D_ARGS, bench_count))

        if benchmarks:
            for benchmark in benchmarks:
--- a/config/benchmark.py
+++ b/config/benchmark.py
@ -1,5 +1,15 @@
+from enum import Enum
+
+
 class Config:
+    class ExperimentCategory(Enum):
+        SMALL = [0, 20]  # in Mop/experiment
+        MEDIUM = [20, 1000]
+        LARGE = [1000, 1_000_000]
+        VERY_LARGE = [1_000_000, 1_000_000_000]
+
    EXPERIMENT_TIME = 1.0
+
    ELEMENT_WISE_ARGS = [
        (100, 100),
        (100, 200),
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,32 @@
+version: "3.8"
+services:
+  tensorflow_v1:
+    build:
+      context: docker
+      dockerfile: tensorflow_v1.dockerfile
+    image: ayo/tf_v1_benchmark
+    container_name: tf_v1_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform TF1 ${BENCH_ARGS}"
+  tensorflow:
+    build:
+      context: docker
+      dockerfile: tensorflow.dockerfile
+    image: ayo/tf_benchmark
+    container_name: tf_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform TF2 TF2_V1 ${BENCH_ARGS}"
+  torch:
+    build:
+      context: docker
+      dockerfile: pytorch.dockerfile
+    image: ayo/torch_benchmark
+    container_name: torch_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform Torch ${BENCH_ARGS}"
--- a/docker/pytorch.dockerfile
+++ b/docker/pytorch.dockerfile
@ -0,0 +1,5 @@
+FROM pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
--- a/docker/tensorflow.dockerfile
+++ b/docker/tensorflow.dockerfile
@ -0,0 +1,5 @@
+FROM tensorflow/tensorflow:latest-gpu
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
--- a/docker/tensorflow_v1.dockerfile
+++ b/docker/tensorflow_v1.dockerfile
@ -0,0 +1,5 @@
+FROM tensorflow/tensorflow:1.15.5-gpu
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
--- a/src/common.py
+++ b/src/common.py
@ -19,11 +19,12 @@ class Op(Enum):
    MATMUL = 'matmul'
    NN_MATMUL = 'nn_matmul'
    NN_DENSE = 'nn_dense'
+    NN_DENSE_X5 = 'nn_dense_x5'


 class Platform(Enum):
    JAX = 'jax'
-    # TF1 = 'TF1'
+    TF1 = 'TF1'
    TF2 = 'TF2'
    TF2_V1 = 'TF2_V1'
    TORCH = 'Torch'
--- a/src/jax/nn_dense_x5.py
+++ b/src/jax/nn_dense_x5.py
@ -0,0 +1,33 @@
+from pathlib import Path
+from typing import Callable, List, Tuple
+
+from jax import device_put, jit, random
+from jax.experimental import stax
+import jax.numpy as jnp
+
+from src.common import DataType, Op
+from src.jax.base import JaxBase
+
+
+class JaxNNDenseX5Bench(JaxBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: jnp.DeviceArray = None
+        self.tensor_result: jnp.DeviceArray = None
+        self.network: Callable = None
+        self.params = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        self.tensor = device_put(jnp.ones((batch_size, dimension), dtype=self.dtype))
+        network_init, self.network = stax.serial(
+            *[stax.Dense(dimension) for _ in range(5)])
+        _, self.params = network_init(random.PRNGKey(1), (batch_size, dimension))
+        self.network = jit(self.network)
+        self.tensor_result = self.network(self.params, self.tensor)
+
+    def experiment(self):
+        self.tensor_result = self.network(self.params, self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/jax/ops.py
+++ b/src/jax/ops.py
@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type

 from src.common import Op
 from src.jax.add import JaxAddBench
@ -7,14 +7,16 @@ from src.jax.div import JaxDivBench
 from src.jax.mul import JaxMulBench
 from src.jax.matmul import JaxMatmulBench
 from src.jax.nn_dense import JaxNNDenseBench
+from src.jax.nn_dense_x5 import JaxNNDenseX5Bench
 from src.jax.nn_matmul import JaxNNMatmulBench


-jax_ops: dict[Op, Type[JaxBase]] = {
+jax_ops: Dict[Op, Type[JaxBase]] = {
    Op.ADD: JaxAddBench,
    Op.MUL: JaxMulBench,
    Op.DIV: JaxDivBench,
    Op.MATMUL: JaxMatmulBench,
    Op.NN_MATMUL: JaxNNMatmulBench,
-    Op.NN_DENSE: JaxNNDenseBench
+    Op.NN_DENSE: JaxNNDenseBench,
+    Op.NN_DENSE_X5: JaxNNDenseX5Bench
 }
--- a/src/op_info.py
+++ b/src/op_info.py
@ -71,8 +71,22 @@ class DenseInfo(_BaseInfo):
    def mop(experiment_args: Tuple[int, int]) -> float:
        batch_size, dimension = experiment_args
        return batch_size * (
-            ((dimension * dimension / 1_000_000) * 2 * (dimension - 1)) + (
-                dimension / 1_000_000))
+            ((2 * dimension * dimension * (dimension - 1) / 1_000_000) + (dimension / 1_000_000))
+        )
+
+
+class DenseX5Info(_BaseInfo):
+    @staticmethod
+    def name(experiment_args: Tuple[int, int]) -> str:
+        batch_size, dimension = experiment_args
+        return f'5xDense(({batch_size}x{dimension}))'
+
+    @staticmethod
+    def mop(experiment_args: Tuple[int, int]) -> float:
+        batch_size, dimension = experiment_args
+        return 5 * batch_size * (
+            ((2 * dimension * dimension * (dimension - 1) / 1_000_000) + (dimension / 1_000_000))
+        )


 op_infos: Dict[Op, Type[_BaseInfo]] = {
@ -81,5 +95,6 @@ op_infos: Dict[Op, Type[_BaseInfo]] = {
    Op.MUL: MulInfo,
    Op.MATMUL: MatmulInfo,
    Op.NN_MATMUL: MatmulInfo,
-    Op.NN_DENSE: DenseInfo
+    Op.NN_DENSE: DenseInfo,
+    Op.NN_DENSE_X5: DenseX5Info
 }
--- a/src/plot.py
+++ b/src/plot.py
@ -2,13 +2,14 @@ from pathlib import Path
 import math
 import multiprocessing as mp
 import os
+from typing import List

 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns

-
+from config.benchmark import Config
 from src.base import BenchBase
 from src.common import DataKey, DataType, Op, Platform

@ -69,7 +70,8 @@ def plot_experiments(bench: BenchBase, data: pd.DataFrame):
    plt.savefig(bench.output_path / f'{bench.bench_op.value}_{bench.data_type.value}.png')


-def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path):
+def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path,
+                     experiment_category: Config.ExperimentCategory = None):
    op_data = all_data[(all_data[comp_key.bench_op] == bench_op) & (all_data[comp_key.device] == device)]
    platform_list = op_data[comp_key.platform].unique()
    if len(platform_list) <= 1:
@ -80,15 +82,24 @@ def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, ben
    sns.set_theme(style="ticks")
    for data_type in op_data[comp_key.data_type].unique():
        data = op_data[op_data[comp_key.data_type] == data_type]
+        if experiment_category is not None:
+            data = data[(data[key.mop] > experiment_category.value[0]) & (data[key.mop] < experiment_category.value[1])]
+            if data.size < 1:
+                return
        graph = sns.catplot(x=key.experiment, y=key.gflops, hue=comp_key.platform, data=data,
                            kind='bar', estimator=np.median, height=8, aspect=1.4)
-        if data[key.gflops].max() > data[key.gflops].min() * 100:
+        if experiment_category is None and data[key.gflops].max() > data[key.gflops].min() * 100:
            graph.set(yscale="log")
        plt.xticks(rotation=70, fontsize=8)
        plt.subplots_adjust(top=0.92, bottom=0.25)
-        plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
        plt.title(f'{device}', fontsize=12)
-        plt.savefig(output_path / device / f'{bench_op}_{data_type}.png')
+        if experiment_category is None:
+            plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
+            plt.savefig(output_path / f'{bench_op}_{data_type}.png')
+        else:
+            plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type}, {experiment_category.name})',
+                         fontsize=16)
+            plt.savefig(output_path / f'{bench_op}_{data_type}_{experiment_category.name}.png')


 def compare(output_path: Path):
@ -117,8 +128,13 @@ def compare(output_path: Path):
    # Compare between platforms
    comp_args = []
    for device in all_data[comp_key.device].unique():
+        compare_path = output_path / device / 'comparison'
+        if not compare_path.exists():
+            compare_path.mkdir(parents=True)
        for bench_op in all_data[comp_key.bench_op].unique():
-            comp_args.append((all_data, comp_key, device, bench_op, output_path))
+            comp_args.append((all_data, comp_key, device, bench_op, compare_path, None))
+            for cat in Config.ExperimentCategory:
+                comp_args.append((all_data, comp_key, device, bench_op, compare_path, cat))

    with mp.Pool(processes=math.ceil(os.cpu_count() * 0.8)) as pool:
        pool.starmap(_draw_comparison, comp_args)
--- a/src/pytorch/nn_dense_x5.py
+++ b/src/pytorch/nn_dense_x5.py
@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class DenseNetwork(torch.nn.Module):
+    def __init__(self, input_dim: int, dtype: torch.dtype):
+        super().__init__()
+        self.dense = torch.nn.Sequential(
+            *[torch.nn.Linear(input_dim, input_dim, dtype=dtype) for _ in range(5)])
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        return self.dense(input_data)
+
+
+class TorchNNDenseX5Bench(TorchBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: torch.Tensor = None
+        self.tensor_result: torch.Tensor = None
+        self.network: torch.nn.Module = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        self.tensor = torch.ones((batch_size, dimension), dtype=self.dtype, device=self.device, requires_grad=False)
+        self.network = DenseNetwork(dimension, self.dtype).to(self.device)
+        self.tensor_result = self.network(self.tensor)
+
+    def experiment(self):
+        self.tensor_result = self.network(self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/pytorch/ops.py
+++ b/src/pytorch/ops.py
@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type

 from src.common import Op
 from src.pytorch.add import TorchAddBench
@ -8,13 +8,15 @@ from src.pytorch.mul import TorchMulBench
 from src.pytorch.matmul import TorchMatmulBench
 from src.pytorch.nn_dense import TorchNNDenseBench
 from src.pytorch.nn_matmul import TorchNNMatmulBench
+from src.pytorch.nn_dense_x5 import TorchNNDenseX5Bench


-torch_ops: dict[Op, Type[TorchBase]] = {
+torch_ops: Dict[Op, Type[TorchBase]] = {
    Op.ADD: TorchAddBench,
    Op.MUL: TorchMulBench,
    Op.DIV: TorchDivBench,
    Op.MATMUL: TorchMatmulBench,
    Op.NN_MATMUL: TorchNNMatmulBench,
-    Op.NN_DENSE: TorchNNDenseBench
+    Op.NN_DENSE: TorchNNDenseBench,
+    Op.NN_DENSE_X5: TorchNNDenseX5Bench
 }
--- a/src/tf_1/add.py
+++ b/src/tf_1/add.py
@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFAddBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.ADD, data_type)
+        self.add_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.add_op = tensor_1 + tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.add_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/base.py
+++ b/src/tf_1/base.py
@ -0,0 +1,32 @@
+from pathlib import Path
+
+import tensorflow.compat.v1 as tf
+
+from src.base import BenchBase
+from src.common import DataType, Device, Op, Platform
+
+
+class TFBase(BenchBase):
+    def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
+        if data_type == DataType.FLOAT16:
+            dtype = tf.float16
+        elif data_type == DataType.FLOAT32:
+            dtype = tf.float32
+        elif data_type == DataType.FLOAT64:
+            dtype = tf.float64
+        else:
+            raise RuntimeError(f'data_type {data_type.value} not implemented')
+
+        super().__init__(output_path, Platform.TF1, bench_op, Device.GPU, None, data_type, dtype)
+        self.session: tf.Session = None
+
+    def pre_experiment(self, _experiment_args):
+        self.session = tf.Session()
+        self.session.as_default()
+
+    def post_experiment(self):
+        self.session.close()
+        tf.reset_default_graph()
+
+    def experiment(self):
+        raise NotImplementedError()
--- a/src/tf_1/div.py
+++ b/src/tf_1/div.py
@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFDivBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.DIV, data_type)
+        self.div_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.div_op = tensor_1 / tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.div_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/matmul.py
+++ b/src/tf_1/matmul.py
@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFMatmulBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.MATMUL, data_type)
+        self.matmul_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1, shape_2 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_2, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.matmul_op = tf.matmul(tensor_1, tensor_2)
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.matmul_op)
+
+    def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/mul.py
+++ b/src/tf_1/mul.py
@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFMulBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.MUL, data_type)
+        self.mul_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.mul_op = tensor_1 * tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.mul_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/nn_dense.py
+++ b/src/tf_1/nn_dense.py
@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFNNDenseBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        weights = tf.get_variable('Weights', shape=(dimension, dimension), dtype=self.dtype,
+                                  initializer=tf.initializers.ones, trainable=False)
+        biases = tf.get_variable('Biases', shape=dimension, dtype=self.dtype,
+                                 initializer=tf.initializers.ones, trainable=False)
+        self.dense_op = tf.matmul(input_tensor, weights) + biases
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/nn_dense_x5.py
+++ b/src/tf_1/nn_dense_x5.py
@ -0,0 +1,35 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        output_tensor = input_tensor
+        for layer in range(5):
+            weights = tf.get_variable(f'Weights_{layer}', shape=(dimension, dimension), dtype=self.dtype,
+                                      initializer=tf.initializers.ones, trainable=False)
+            biases = tf.get_variable(f'Biases_{layer}', shape=dimension, dtype=self.dtype,
+                                     initializer=tf.initializers.ones, trainable=False)
+            output_tensor = tf.matmul(output_tensor, weights) + biases
+        self.dense_op = output_tensor
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_1/ops.py
+++ b/src/tf_1/ops.py
@ -0,0 +1,20 @@
+from typing import Dict, Type
+
+from src.common import Op
+from src.tf_1.add import TFAddBench
+from src.tf_1.base import TFBase
+from src.tf_1.div import TFDivBench
+from src.tf_1.mul import TFMulBench
+from src.tf_1.matmul import TFMatmulBench
+from src.tf_1.nn_dense import TFNNDenseBench
+from src.tf_1.nn_dense_x5 import TFNNDenseX5Bench
+
+
+tf1_ops: Dict[Op, Type[TFBase]] = {
+    Op.ADD: TFAddBench,
+    Op.MUL: TFMulBench,
+    Op.DIV: TFDivBench,
+    Op.MATMUL: TFMatmulBench,
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
+}
--- a/src/tf_2/nn_dense_x5.py
+++ b/src/tf_2/nn_dense_x5.py
@ -0,0 +1,36 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class DenseModel(tf.keras.Model):
+    def __init__(self, input_dim: int, dtype=tf.DType):
+        super().__init__()
+        self.dense = tf.keras.Sequential(
+            [tf.keras.layers.Dense(input_dim, dtype=dtype) for _ in range(5)])
+
+    def call(self, input_tensor: tf.Tensor) -> tf.Tensor:
+        return self.dense(input_tensor)
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: tf.Tensor = None
+        self.network: tf.keras.Model = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        with self.device:
+            self.tensor = tf.ones((batch_size, dimension), dtype=self.dtype)
+            self.network = DenseModel(dimension, self.dtype)
+
+    def experiment(self):
+        self.network(self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_2/ops.py
+++ b/src/tf_2/ops.py
@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type

 from src.common import Op
 from src.tf_2.add import TFAddBench
@ -7,14 +7,16 @@ from src.tf_2.div import TFDivBench
 from src.tf_2.mul import TFMulBench
 from src.tf_2.matmul import TFMatmulBench
 from src.tf_2.nn_dense import TFNNDenseBench
+from src.tf_2.nn_dense_x5 import TFNNDenseX5Bench
 from src.tf_2.nn_matmul import TFNNMatmulBench


-tf2_ops: dict[Op, Type[TFBase]] = {
+tf2_ops: Dict[Op, Type[TFBase]] = {
    Op.ADD: TFAddBench,
    Op.MUL: TFMulBench,
    Op.DIV: TFDivBench,
    Op.MATMUL: TFMatmulBench,
    Op.NN_MATMUL: TFNNMatmulBench,
-    Op.NN_DENSE: TFNNDenseBench
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
 }
--- a/src/tf_2_v1/base.py
+++ b/src/tf_2_v1/base.py
@ -35,9 +35,3 @@ class TFBase(BenchBase):

    def experiment(self):
        raise NotImplementedError()
-
-    def name(self, _experiment_args) -> str:
-        raise NotImplementedError()
-
-    def mop(self, _experiment_args) -> float:
-        raise NotImplementedError()
--- a/src/tf_2_v1/nn_dense.py
+++ b/src/tf_2_v1/nn_dense.py
@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_2_v1.base import TFBase
+
+
+class TFNNDenseBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        inpput_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                        initializer=tf.initializers.ones, trainable=False)
+        weights = tf.get_variable('Weights', shape=(dimension, dimension), dtype=self.dtype,
+                                  initializer=tf.initializers.ones, trainable=False)
+        biases = tf.get_variable('Biases', shape=dimension, dtype=self.dtype,
+                                 initializer=tf.initializers.ones, trainable=False)
+        self.dense_op = tf.matmul(inpput_tensor, weights) + biases
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_2_v1/nn_dense_x5.py
+++ b/src/tf_2_v1/nn_dense_x5.py
@ -0,0 +1,35 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_2_v1.base import TFBase
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        output_tensor = input_tensor
+        for layer in range(5):
+            weights = tf.get_variable(f'Weights_{layer}', shape=(dimension, dimension), dtype=self.dtype,
+                                      initializer=tf.initializers.ones, trainable=False)
+            biases = tf.get_variable(f'Biases_{layer}', shape=dimension, dtype=self.dtype,
+                                     initializer=tf.initializers.ones, trainable=False)
+            output_tensor = tf.matmul(output_tensor, weights) + biases
+        self.dense_op = output_tensor
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
--- a/src/tf_2_v1/ops.py
+++ b/src/tf_2_v1/ops.py
@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type

 from src.common import Op
 from src.tf_2_v1.add import TFAddBench
@ -6,11 +6,15 @@ from src.tf_2_v1.base import TFBase
 from src.tf_2_v1.div import TFDivBench
 from src.tf_2_v1.mul import TFMulBench
 from src.tf_2_v1.matmul import TFMatmulBench
+from src.tf_2_v1.nn_dense import TFNNDenseBench
+from src.tf_2_v1.nn_dense_x5 import TFNNDenseX5Bench


-tf2v1_ops: dict[Op, Type[TFBase]] = {
+tf2v1_ops: Dict[Op, Type[TFBase]] = {
    Op.ADD: TFAddBench,
    Op.MUL: TFMulBench,
    Op.DIV: TFDivBench,
-    Op.MATMUL: TFMatmulBench
+    Op.MATMUL: TFMatmulBench,
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
 }