From dbe5490c5b0512d65b8e282bf45e345927657e70 Mon Sep 17 00:00:00 2001
From: Corentin <corentin-pro@mail.com>
Date: Tue, 5 Oct 2021 10:51:18 +0900
Subject: [PATCH] Add TF1 platform, docker and README

---
 README.md                       | 84 +++++++++++++++++++++++++++++++++
 benchmark.py                    | 44 ++++++++++++++---
 config/benchmark.py             | 10 ++++
 docker-compose.yml              | 32 +++++++++++++
 docker/pytorch.dockerfile       |  5 ++
 docker/tensorflow.dockerfile    |  5 ++
 docker/tensorflow_v1.dockerfile |  5 ++
 src/common.py                   |  3 +-
 src/jax/nn_dense_x5.py          | 33 +++++++++++++
 src/jax/ops.py                  |  8 ++--
 src/op_info.py                  | 21 +++++++--
 src/plot.py                     | 28 ++++++++---
 src/pytorch/nn_dense_x5.py      | 37 +++++++++++++++
 src/pytorch/ops.py              |  8 ++--
 src/tf_1/add.py                 | 30 ++++++++++++
 src/tf_1/base.py                | 32 +++++++++++++
 src/tf_1/div.py                 | 30 ++++++++++++
 src/tf_1/matmul.py              | 30 ++++++++++++
 src/tf_1/mul.py                 | 30 ++++++++++++
 src/tf_1/nn_dense.py            | 32 +++++++++++++
 src/tf_1/nn_dense_x5.py         | 35 ++++++++++++++
 src/tf_1/ops.py                 | 20 ++++++++
 src/tf_2/nn_dense_x5.py         | 36 ++++++++++++++
 src/tf_2/ops.py                 |  8 ++--
 src/tf_2_v1/base.py             |  6 ---
 src/tf_2_v1/nn_dense.py         | 32 +++++++++++++
 src/tf_2_v1/nn_dense_x5.py      | 35 ++++++++++++++
 src/tf_2_v1/ops.py              | 10 ++--
 28 files changed, 655 insertions(+), 34 deletions(-)
 create mode 100644 README.md
 create mode 100644 docker-compose.yml
 create mode 100644 docker/pytorch.dockerfile
 create mode 100644 docker/tensorflow.dockerfile
 create mode 100644 docker/tensorflow_v1.dockerfile
 create mode 100644 src/jax/nn_dense_x5.py
 create mode 100644 src/pytorch/nn_dense_x5.py
 create mode 100644 src/tf_1/add.py
 create mode 100644 src/tf_1/base.py
 create mode 100644 src/tf_1/div.py
 create mode 100644 src/tf_1/matmul.py
 create mode 100644 src/tf_1/mul.py
 create mode 100644 src/tf_1/nn_dense.py
 create mode 100644 src/tf_1/nn_dense_x5.py
 create mode 100644 src/tf_1/ops.py
 create mode 100644 src/tf_2/nn_dense_x5.py
 create mode 100644 src/tf_2_v1/nn_dense.py
 create mode 100644 src/tf_2_v1/nn_dense_x5.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8e5f462
--- /dev/null
+++ b/README.md
@@ -0,0 +1,84 @@
+# Deep Learning Benchmarks
+
+## Dependencies
+
+* **python** : version 3.6 or newer (tested on 3.6 and 3.9)
+
+Python packages, can be installed with `pip` (see the `--platform` option in the usage section):
+
+* **matplotlib**
+* **seaborn**
+* **tensorflow** : tested with version 2.6 (for `--platform TF2 TF2_V1`) and 1.15 (for `--platform TF1`)
+* **torch** : tested with version 1.9 (for `--platform Torch`)
+* **jaxlib** : tested with version 0.1.71+cuda111 (for `--platform Torch`)
+* **jax** : tested with version 0.2.21 (for `--platform Torch`)
+
+To run with docker:
+
+* **docker**
+* **docker-compose**
+* **libnvidia-container** : for arch (AUR) (required for `nvidia-container-toolkit`)
+* **nvidia-container-toolkit** : for arch (AUR) (required for gpu accelerated docker using `--gpus`)
+* **nvidia-container-runtime** : for arch (AUR) (easier docker-compose integration using `runtime: nvidia`)
+
+See other distribution documentation to get GPU accelerated docker containers.
+
+## Usage
+
+Runing the `benchmark.py` script will run all the possible benchmarks automatically.
+
+Options can be seen using the `--help` or `-h` argument :
+
+```
+usage: benchmark.py [-h] [--output OUTPUT] [--no-benchmark] [--no-compare] [--count COUNT] [--platform [PLATFORM ...]]
+                    [--data [DATA ...]] [--op [OP ...]] [--list-op] [--list-platform] [--list-data]
+                    [--experiment-time EXPERIMENT_TIME]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --output OUTPUT       Path to output files (default: output)
+  --no-benchmark        Avoid running benchmarks
+  --no-compare          Avoid running platform comparaison
+  --count COUNT         Number of experiments per benchmark (for stastistical analysis)
+  --platform [PLATFORM ...]
+                        List of platform to benchmark [TF1, TF2, Torch] (else all are used)
+  --data [DATA ...]     List of data type to benchmark [float16, float32, float64] (else all are used)
+  --op [OP ...]         List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)
+  --list-op             List all possible operation to benchmark (no further action will be done)
+  --list-platform       List all possible platform to benchmark (no further action will be done)
+  --list-data           List all possible data to benchmark (no further action will be done)
+  --experiment-time EXPERIMENT_TIME
+                        Change time (in s) per experiment (default=1.000s)
+```
+
+### Configuration
+
+In the `config/benchmark.py` file there are parameters that can be changed:
+
+* **ExperimentCategory** : Enum class that defines ranges of Mop (Millions of operations) used for comparisons
+* **EXPERIMENT_TIME** : float number to set the amount of time to run each benchmark experiment (in seconds)
+* **[]_ARGS** : each kind of benchmark will be run with specific arguments that can be changed here (usually batch sizes and input shapes)
+
+### Using Docker
+
+There are 3 services to use with `docker-compose` (`tensorflow_v1`, `tensorflow` and `torch`), simply run a service:
+
+```
+docker-compose run --rm torch
+```
+
+Arguments can be passed using the `BENCH_ARGS` environment variable like:
+
+```
+BENCH_ARGS="--op nn_dense_x5 --data float32 --no-compare" docker-compose run --rm tensorflow_v1
+```
+
+
+## Notes
+
+Plots y axis are automatically changed to logarithmic scale if the range is too wide, categorized comparisons are forced to not be in logarithmic scale for clarity (categories are implemented for this reason).
+
+
+## Future
+
+* More operations : Conv1d, Conv2D, DepthWiseConv2D, RNN
diff --git a/benchmark.py b/benchmark.py
index 39f7783..cc44d3c 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -14,12 +14,26 @@ from src.plot import compare
 def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
                   bench_args, bench_count: int):
     if platform == Platform.JAX:
+        if data_type == DataType.FLOAT64:
+            os.environ['JAX_ENABLE_X64'] = 'true'
         from src.jax.ops import jax_ops
         if bench_op not in jax_ops:
             print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
         else:
             jax_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
             print()
+    elif platform == Platform.TF1:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        import tensorflow as tf
+        if tf.__version__.split('.')[0] != '1':
+            print(f'Cannot run benchmark for platform TF1 with tensorflow version: {tf.__version__}')
+            return
+        from src.tf_1.ops import tf1_ops
+        if bench_op not in tf1_ops:
+            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
+        else:
+            tf1_ops[bench_op](output_path, data_type).run(bench_args, bench_count)
+            print()
     elif platform == Platform.TF2:
         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
         from src.tf_2.ops import tf2_ops
@@ -30,6 +44,10 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be
             print()
     elif platform == Platform.TF2_V1:
         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        import tensorflow as tf
+        if tf.__version__.split('.')[0] != '2':
+            print(f'Cannot run benchmark for platform TF2_V1 with tensorflow version: {tf.__version__}')
+            return
         from src.tf_2_v1.ops import tf2v1_ops
         if bench_op not in tf2v1_ops:
             print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
@@ -49,7 +67,8 @@ def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, be
 
 def main():
     parser = ArgumentParser()
-    parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
+    parser.add_argument('--output', type=Path, default=Path('output'),
+                        help='Path to output files (default: output)')
     parser.add_argument('--no-benchmark', action='store_true', default=False, help='Avoid running benchmarks')
     parser.add_argument('--no-compare', action='store_true', default=False, help='Avoid running platform comparaison')
     parser.add_argument('--count', type=int, default=30,
@@ -62,6 +81,10 @@ def main():
                         help='List of operation to benchmark (add, mul, div, matmul, etc) (else all are used)')
     parser.add_argument('--list-op', action='store_true',
                         help='List all possible operation to benchmark (no further action will be done)')
+    parser.add_argument('--list-platform', action='store_true',
+                        help='List all possible platform to benchmark (no further action will be done)')
+    parser.add_argument('--list-data', action='store_true',
+                        help='List all possible data to benchmark (no further action will be done)')
     parser.add_argument(
         '--experiment-time', type=float,
         help=f'Change time (in s) per experiment (default={Config.EXPERIMENT_TIME:0.3f}s)')
@@ -70,6 +93,12 @@ def main():
     if arguments.list_op:
         print(', '.join([op.value for op in Op]))
         sys.exit(0)
+    if arguments.list_platform:
+        print(', '.join([p.value for p in Platform]))
+        sys.exit(0)
+    if arguments.list_data:
+        print(', '.join([d.value for d in DataType]))
+        sys.exit(0)
 
     output_path: Path = arguments.output
     no_benchmark: bool = arguments.no_benchmark
@@ -91,13 +120,16 @@ def main():
             for data_type in data:
                 for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
                     if bench_op in bench_ops:
-                        benchmarks.append((output_path, platform, data_type, bench_op,
-                                           Config.ELEMENT_WISE_ARGS, bench_count))
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.ELEMENT_WISE_ARGS, bench_count))
                 for bench_op in [Op.MATMUL, Op.NN_MATMUL]:
                     if bench_op in bench_ops:
-                        benchmarks.append((output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
-                if Op.NN_DENSE in bench_ops:
-                    benchmarks.append((output_path, platform, data_type, Op.NN_DENSE, Config.NN_1D_ARGS, bench_count))
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.MATMUL_ARGS, bench_count))
+                for bench_op in [Op.NN_DENSE, Op.NN_DENSE_X5]:
+                    if bench_op in bench_ops:
+                        benchmarks.append(
+                            (output_path, platform, data_type, bench_op, Config.NN_1D_ARGS, bench_count))
 
         if benchmarks:
             for benchmark in benchmarks:
diff --git a/config/benchmark.py b/config/benchmark.py
index a072a34..6c6eb17 100644
--- a/config/benchmark.py
+++ b/config/benchmark.py
@@ -1,5 +1,15 @@
+from enum import Enum
+
+
 class Config:
+    class ExperimentCategory(Enum):
+        SMALL = [0, 20]  # in Mop/experiment
+        MEDIUM = [20, 1000]
+        LARGE = [1000, 1_000_000]
+        VERY_LARGE = [1_000_000, 1_000_000_000]
+
     EXPERIMENT_TIME = 1.0
+
     ELEMENT_WISE_ARGS = [
         (100, 100),
         (100, 200),
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..5de05a7
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,32 @@
+version: "3.8"
+services:
+  tensorflow_v1:
+    build:
+      context: docker
+      dockerfile: tensorflow_v1.dockerfile
+    image: ayo/tf_v1_benchmark
+    container_name: tf_v1_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform TF1 ${BENCH_ARGS}"
+  tensorflow:
+    build:
+      context: docker
+      dockerfile: tensorflow.dockerfile
+    image: ayo/tf_benchmark
+    container_name: tf_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform TF2 TF2_V1 ${BENCH_ARGS}"
+  torch:
+    build:
+      context: docker
+      dockerfile: pytorch.dockerfile
+    image: ayo/torch_benchmark
+    container_name: torch_benchmark
+    runtime: nvidia
+    volumes:
+      - .:/work
+    command: bash -c "umask 0000 && python benchmark.py --platform Torch ${BENCH_ARGS}"
diff --git a/docker/pytorch.dockerfile b/docker/pytorch.dockerfile
new file mode 100644
index 0000000..8347dbd
--- /dev/null
+++ b/docker/pytorch.dockerfile
@@ -0,0 +1,5 @@
+FROM pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
diff --git a/docker/tensorflow.dockerfile b/docker/tensorflow.dockerfile
new file mode 100644
index 0000000..92924e9
--- /dev/null
+++ b/docker/tensorflow.dockerfile
@@ -0,0 +1,5 @@
+FROM tensorflow/tensorflow:latest-gpu
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
diff --git a/docker/tensorflow_v1.dockerfile b/docker/tensorflow_v1.dockerfile
new file mode 100644
index 0000000..a59626c
--- /dev/null
+++ b/docker/tensorflow_v1.dockerfile
@@ -0,0 +1,5 @@
+FROM tensorflow/tensorflow:1.15.5-gpu
+
+RUN pip install matplotlib seaborn && mkdir /work
+
+WORKDIR /work
diff --git a/src/common.py b/src/common.py
index 56b6389..d4e9cb6 100644
--- a/src/common.py
+++ b/src/common.py
@@ -19,11 +19,12 @@ class Op(Enum):
     MATMUL = 'matmul'
     NN_MATMUL = 'nn_matmul'
     NN_DENSE = 'nn_dense'
+    NN_DENSE_X5 = 'nn_dense_x5'
 
 
 class Platform(Enum):
     JAX = 'jax'
-    # TF1 = 'TF1'
+    TF1 = 'TF1'
     TF2 = 'TF2'
     TF2_V1 = 'TF2_V1'
     TORCH = 'Torch'
diff --git a/src/jax/nn_dense_x5.py b/src/jax/nn_dense_x5.py
new file mode 100644
index 0000000..290672c
--- /dev/null
+++ b/src/jax/nn_dense_x5.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+from typing import Callable, List, Tuple
+
+from jax import device_put, jit, random
+from jax.experimental import stax
+import jax.numpy as jnp
+
+from src.common import DataType, Op
+from src.jax.base import JaxBase
+
+
+class JaxNNDenseX5Bench(JaxBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: jnp.DeviceArray = None
+        self.tensor_result: jnp.DeviceArray = None
+        self.network: Callable = None
+        self.params = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        self.tensor = device_put(jnp.ones((batch_size, dimension), dtype=self.dtype))
+        network_init, self.network = stax.serial(
+            *[stax.Dense(dimension) for _ in range(5)])
+        _, self.params = network_init(random.PRNGKey(1), (batch_size, dimension))
+        self.network = jit(self.network)
+        self.tensor_result = self.network(self.params, self.tensor)
+
+    def experiment(self):
+        self.tensor_result = self.network(self.params, self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/jax/ops.py b/src/jax/ops.py
index ad926f9..46237b4 100644
--- a/src/jax/ops.py
+++ b/src/jax/ops.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type
 
 from src.common import Op
 from src.jax.add import JaxAddBench
@@ -7,14 +7,16 @@ from src.jax.div import JaxDivBench
 from src.jax.mul import JaxMulBench
 from src.jax.matmul import JaxMatmulBench
 from src.jax.nn_dense import JaxNNDenseBench
+from src.jax.nn_dense_x5 import JaxNNDenseX5Bench
 from src.jax.nn_matmul import JaxNNMatmulBench
 
 
-jax_ops: dict[Op, Type[JaxBase]] = {
+jax_ops: Dict[Op, Type[JaxBase]] = {
     Op.ADD: JaxAddBench,
     Op.MUL: JaxMulBench,
     Op.DIV: JaxDivBench,
     Op.MATMUL: JaxMatmulBench,
     Op.NN_MATMUL: JaxNNMatmulBench,
-    Op.NN_DENSE: JaxNNDenseBench
+    Op.NN_DENSE: JaxNNDenseBench,
+    Op.NN_DENSE_X5: JaxNNDenseX5Bench
 }
diff --git a/src/op_info.py b/src/op_info.py
index c30f7f9..1e44361 100644
--- a/src/op_info.py
+++ b/src/op_info.py
@@ -71,8 +71,22 @@ class DenseInfo(_BaseInfo):
     def mop(experiment_args: Tuple[int, int]) -> float:
         batch_size, dimension = experiment_args
         return batch_size * (
-            ((dimension * dimension / 1_000_000) * 2 * (dimension - 1)) + (
-                dimension / 1_000_000))
+            ((2 * dimension * dimension * (dimension - 1) / 1_000_000) + (dimension / 1_000_000))
+        )
+
+
+class DenseX5Info(_BaseInfo):
+    @staticmethod
+    def name(experiment_args: Tuple[int, int]) -> str:
+        batch_size, dimension = experiment_args
+        return f'5xDense(({batch_size}x{dimension}))'
+
+    @staticmethod
+    def mop(experiment_args: Tuple[int, int]) -> float:
+        batch_size, dimension = experiment_args
+        return 5 * batch_size * (
+            ((2 * dimension * dimension * (dimension - 1) / 1_000_000) + (dimension / 1_000_000))
+        )
 
 
 op_infos: Dict[Op, Type[_BaseInfo]] = {
@@ -81,5 +95,6 @@ op_infos: Dict[Op, Type[_BaseInfo]] = {
     Op.MUL: MulInfo,
     Op.MATMUL: MatmulInfo,
     Op.NN_MATMUL: MatmulInfo,
-    Op.NN_DENSE: DenseInfo
+    Op.NN_DENSE: DenseInfo,
+    Op.NN_DENSE_X5: DenseX5Info
 }
diff --git a/src/plot.py b/src/plot.py
index 7d71346..cd94848 100644
--- a/src/plot.py
+++ b/src/plot.py
@@ -2,13 +2,14 @@ from pathlib import Path
 import math
 import multiprocessing as mp
 import os
+from typing import List
 
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 
-
+from config.benchmark import Config
 from src.base import BenchBase
 from src.common import DataKey, DataType, Op, Platform
 
@@ -69,7 +70,8 @@ def plot_experiments(bench: BenchBase, data: pd.DataFrame):
     plt.savefig(bench.output_path / f'{bench.bench_op.value}_{bench.data_type.value}.png')
 
 
-def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path):
+def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, bench_op: str, output_path: Path,
+                     experiment_category: Config.ExperimentCategory = None):
     op_data = all_data[(all_data[comp_key.bench_op] == bench_op) & (all_data[comp_key.device] == device)]
     platform_list = op_data[comp_key.platform].unique()
     if len(platform_list) <= 1:
@@ -80,15 +82,24 @@ def _draw_comparison(all_data: pd.DataFrame, comp_key: CompKey, device: str, ben
     sns.set_theme(style="ticks")
     for data_type in op_data[comp_key.data_type].unique():
         data = op_data[op_data[comp_key.data_type] == data_type]
+        if experiment_category is not None:
+            data = data[(data[key.mop] > experiment_category.value[0]) & (data[key.mop] < experiment_category.value[1])]
+            if data.size < 1:
+                return
         graph = sns.catplot(x=key.experiment, y=key.gflops, hue=comp_key.platform, data=data,
                             kind='bar', estimator=np.median, height=8, aspect=1.4)
-        if data[key.gflops].max() > data[key.gflops].min() * 100:
+        if experiment_category is None and data[key.gflops].max() > data[key.gflops].min() * 100:
             graph.set(yscale="log")
         plt.xticks(rotation=70, fontsize=8)
         plt.subplots_adjust(top=0.92, bottom=0.25)
-        plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
         plt.title(f'{device}', fontsize=12)
-        plt.savefig(output_path / device / f'{bench_op}_{data_type}.png')
+        if experiment_category is None:
+            plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type})', fontsize=16)
+            plt.savefig(output_path / f'{bench_op}_{data_type}.png')
+        else:
+            plt.suptitle('/'.join(platform_list) + f' {bench_op} ({data_type}, {experiment_category.name})',
+                         fontsize=16)
+            plt.savefig(output_path / f'{bench_op}_{data_type}_{experiment_category.name}.png')
 
 
 def compare(output_path: Path):
@@ -117,8 +128,13 @@ def compare(output_path: Path):
     # Compare between platforms
     comp_args = []
     for device in all_data[comp_key.device].unique():
+        compare_path = output_path / device / 'comparison'
+        if not compare_path.exists():
+            compare_path.mkdir(parents=True)
         for bench_op in all_data[comp_key.bench_op].unique():
-            comp_args.append((all_data, comp_key, device, bench_op, output_path))
+            comp_args.append((all_data, comp_key, device, bench_op, compare_path, None))
+            for cat in Config.ExperimentCategory:
+                comp_args.append((all_data, comp_key, device, bench_op, compare_path, cat))
 
     with mp.Pool(processes=math.ceil(os.cpu_count() * 0.8)) as pool:
         pool.starmap(_draw_comparison, comp_args)
diff --git a/src/pytorch/nn_dense_x5.py b/src/pytorch/nn_dense_x5.py
new file mode 100644
index 0000000..1507d1d
--- /dev/null
+++ b/src/pytorch/nn_dense_x5.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class DenseNetwork(torch.nn.Module):
+    def __init__(self, input_dim: int, dtype: torch.dtype):
+        super().__init__()
+        self.dense = torch.nn.Sequential(
+            *[torch.nn.Linear(input_dim, input_dim, dtype=dtype) for _ in range(5)])
+
+    def forward(self, input_data: torch.Tensor) -> torch.Tensor:
+        return self.dense(input_data)
+
+
+class TorchNNDenseX5Bench(TorchBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: torch.Tensor = None
+        self.tensor_result: torch.Tensor = None
+        self.network: torch.nn.Module = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        self.tensor = torch.ones((batch_size, dimension), dtype=self.dtype, device=self.device, requires_grad=False)
+        self.network = DenseNetwork(dimension, self.dtype).to(self.device)
+        self.tensor_result = self.network(self.tensor)
+
+    def experiment(self):
+        self.tensor_result = self.network(self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/pytorch/ops.py b/src/pytorch/ops.py
index 20e9157..9a881b0 100644
--- a/src/pytorch/ops.py
+++ b/src/pytorch/ops.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type
 
 from src.common import Op
 from src.pytorch.add import TorchAddBench
@@ -8,13 +8,15 @@ from src.pytorch.mul import TorchMulBench
 from src.pytorch.matmul import TorchMatmulBench
 from src.pytorch.nn_dense import TorchNNDenseBench
 from src.pytorch.nn_matmul import TorchNNMatmulBench
+from src.pytorch.nn_dense_x5 import TorchNNDenseX5Bench
 
 
-torch_ops: dict[Op, Type[TorchBase]] = {
+torch_ops: Dict[Op, Type[TorchBase]] = {
     Op.ADD: TorchAddBench,
     Op.MUL: TorchMulBench,
     Op.DIV: TorchDivBench,
     Op.MATMUL: TorchMatmulBench,
     Op.NN_MATMUL: TorchNNMatmulBench,
-    Op.NN_DENSE: TorchNNDenseBench
+    Op.NN_DENSE: TorchNNDenseBench,
+    Op.NN_DENSE_X5: TorchNNDenseX5Bench
 }
diff --git a/src/tf_1/add.py b/src/tf_1/add.py
new file mode 100644
index 0000000..61db591
--- /dev/null
+++ b/src/tf_1/add.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFAddBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.ADD, data_type)
+        self.add_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.add_op = tensor_1 + tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.add_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/base.py b/src/tf_1/base.py
new file mode 100644
index 0000000..8a3834f
--- /dev/null
+++ b/src/tf_1/base.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+
+import tensorflow.compat.v1 as tf
+
+from src.base import BenchBase
+from src.common import DataType, Device, Op, Platform
+
+
+class TFBase(BenchBase):
+    def __init__(self, output_path: Path, bench_op: Op, data_type: DataType):
+        if data_type == DataType.FLOAT16:
+            dtype = tf.float16
+        elif data_type == DataType.FLOAT32:
+            dtype = tf.float32
+        elif data_type == DataType.FLOAT64:
+            dtype = tf.float64
+        else:
+            raise RuntimeError(f'data_type {data_type.value} not implemented')
+
+        super().__init__(output_path, Platform.TF1, bench_op, Device.GPU, None, data_type, dtype)
+        self.session: tf.Session = None
+
+    def pre_experiment(self, _experiment_args):
+        self.session = tf.Session()
+        self.session.as_default()
+
+    def post_experiment(self):
+        self.session.close()
+        tf.reset_default_graph()
+
+    def experiment(self):
+        raise NotImplementedError()
diff --git a/src/tf_1/div.py b/src/tf_1/div.py
new file mode 100644
index 0000000..f16e383
--- /dev/null
+++ b/src/tf_1/div.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFDivBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.DIV, data_type)
+        self.div_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.div_op = tensor_1 / tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.div_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/matmul.py b/src/tf_1/matmul.py
new file mode 100644
index 0000000..ebf8476
--- /dev/null
+++ b/src/tf_1/matmul.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFMatmulBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.MATMUL, data_type)
+        self.matmul_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1, shape_2 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_2, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.matmul_op = tf.matmul(tensor_1, tensor_2)
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.matmul_op)
+
+    def run(self, experiment_args: List[Tuple[Tuple[int, int], Tuple[int, int]]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/mul.py b/src/tf_1/mul.py
new file mode 100644
index 0000000..cc8e50c
--- /dev/null
+++ b/src/tf_1/mul.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFMulBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.MUL, data_type)
+        self.mul_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        shape_1 = experiment_args
+        tensor_1 = tf.get_variable('tensor_1', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        tensor_2 = tf.get_variable('tensor_2', shape=shape_1, dtype=self.dtype,
+                                   initializer=tf.initializers.ones, trainable=False)
+        self.mul_op = tensor_1 * tensor_2
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.mul_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/nn_dense.py b/src/tf_1/nn_dense.py
new file mode 100644
index 0000000..cc6d389
--- /dev/null
+++ b/src/tf_1/nn_dense.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFNNDenseBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        weights = tf.get_variable('Weights', shape=(dimension, dimension), dtype=self.dtype,
+                                  initializer=tf.initializers.ones, trainable=False)
+        biases = tf.get_variable('Biases', shape=dimension, dtype=self.dtype,
+                                 initializer=tf.initializers.ones, trainable=False)
+        self.dense_op = tf.matmul(input_tensor, weights) + biases
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/nn_dense_x5.py b/src/tf_1/nn_dense_x5.py
new file mode 100644
index 0000000..cffc55e
--- /dev/null
+++ b/src/tf_1/nn_dense_x5.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_1.base import TFBase
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        output_tensor = input_tensor
+        for layer in range(5):
+            weights = tf.get_variable(f'Weights_{layer}', shape=(dimension, dimension), dtype=self.dtype,
+                                      initializer=tf.initializers.ones, trainable=False)
+            biases = tf.get_variable(f'Biases_{layer}', shape=dimension, dtype=self.dtype,
+                                     initializer=tf.initializers.ones, trainable=False)
+            output_tensor = tf.matmul(output_tensor, weights) + biases
+        self.dense_op = output_tensor
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_1/ops.py b/src/tf_1/ops.py
new file mode 100644
index 0000000..fa78295
--- /dev/null
+++ b/src/tf_1/ops.py
@@ -0,0 +1,20 @@
+from typing import Dict, Type
+
+from src.common import Op
+from src.tf_1.add import TFAddBench
+from src.tf_1.base import TFBase
+from src.tf_1.div import TFDivBench
+from src.tf_1.mul import TFMulBench
+from src.tf_1.matmul import TFMatmulBench
+from src.tf_1.nn_dense import TFNNDenseBench
+from src.tf_1.nn_dense_x5 import TFNNDenseX5Bench
+
+
+tf1_ops: Dict[Op, Type[TFBase]] = {
+    Op.ADD: TFAddBench,
+    Op.MUL: TFMulBench,
+    Op.DIV: TFDivBench,
+    Op.MATMUL: TFMatmulBench,
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
+}
diff --git a/src/tf_2/nn_dense_x5.py b/src/tf_2/nn_dense_x5.py
new file mode 100644
index 0000000..5f52c32
--- /dev/null
+++ b/src/tf_2/nn_dense_x5.py
@@ -0,0 +1,36 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class DenseModel(tf.keras.Model):
+    def __init__(self, input_dim: int, dtype=tf.DType):
+        super().__init__()
+        self.dense = tf.keras.Sequential(
+            [tf.keras.layers.Dense(input_dim, dtype=dtype) for _ in range(5)])
+
+    def call(self, input_tensor: tf.Tensor) -> tf.Tensor:
+        return self.dense(input_tensor)
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.tensor: tf.Tensor = None
+        self.network: tf.keras.Model = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        batch_size, dimension = experiment_args
+        with self.device:
+            self.tensor = tf.ones((batch_size, dimension), dtype=self.dtype)
+            self.network = DenseModel(dimension, self.dtype)
+
+    def experiment(self):
+        self.network(self.tensor)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_2/ops.py b/src/tf_2/ops.py
index 88f89c7..638a06f 100644
--- a/src/tf_2/ops.py
+++ b/src/tf_2/ops.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type
 
 from src.common import Op
 from src.tf_2.add import TFAddBench
@@ -7,14 +7,16 @@ from src.tf_2.div import TFDivBench
 from src.tf_2.mul import TFMulBench
 from src.tf_2.matmul import TFMatmulBench
 from src.tf_2.nn_dense import TFNNDenseBench
+from src.tf_2.nn_dense_x5 import TFNNDenseX5Bench
 from src.tf_2.nn_matmul import TFNNMatmulBench
 
 
-tf2_ops: dict[Op, Type[TFBase]] = {
+tf2_ops: Dict[Op, Type[TFBase]] = {
     Op.ADD: TFAddBench,
     Op.MUL: TFMulBench,
     Op.DIV: TFDivBench,
     Op.MATMUL: TFMatmulBench,
     Op.NN_MATMUL: TFNNMatmulBench,
-    Op.NN_DENSE: TFNNDenseBench
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
 }
diff --git a/src/tf_2_v1/base.py b/src/tf_2_v1/base.py
index c2ce4b5..39c60ef 100644
--- a/src/tf_2_v1/base.py
+++ b/src/tf_2_v1/base.py
@@ -35,9 +35,3 @@ class TFBase(BenchBase):
 
     def experiment(self):
         raise NotImplementedError()
-
-    def name(self, _experiment_args) -> str:
-        raise NotImplementedError()
-
-    def mop(self, _experiment_args) -> float:
-        raise NotImplementedError()
diff --git a/src/tf_2_v1/nn_dense.py b/src/tf_2_v1/nn_dense.py
new file mode 100644
index 0000000..f86ed0e
--- /dev/null
+++ b/src/tf_2_v1/nn_dense.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_2_v1.base import TFBase
+
+
+class TFNNDenseBench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        inpput_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                        initializer=tf.initializers.ones, trainable=False)
+        weights = tf.get_variable('Weights', shape=(dimension, dimension), dtype=self.dtype,
+                                  initializer=tf.initializers.ones, trainable=False)
+        biases = tf.get_variable('Biases', shape=dimension, dtype=self.dtype,
+                                 initializer=tf.initializers.ones, trainable=False)
+        self.dense_op = tf.matmul(inpput_tensor, weights) + biases
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_2_v1/nn_dense_x5.py b/src/tf_2_v1/nn_dense_x5.py
new file mode 100644
index 0000000..abbc9f7
--- /dev/null
+++ b/src/tf_2_v1/nn_dense_x5.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from typing import List, Tuple
+
+import tensorflow.compat.v1 as tf
+
+from src.common import DataType, Op
+from src.tf_2_v1.base import TFBase
+
+
+class TFNNDenseX5Bench(TFBase):
+    def __init__(self, output_path: Path, data_type: DataType):
+        super().__init__(output_path, Op.NN_DENSE_X5, data_type)
+        self.dense_op = None
+
+    def pre_experiment(self, experiment_args: Tuple[int, int]):
+        super().pre_experiment(experiment_args)
+        batch_size, dimension = experiment_args
+        input_tensor = tf.get_variable('input_tensor', shape=(batch_size, dimension), dtype=self.dtype,
+                                       initializer=tf.initializers.ones, trainable=False)
+        output_tensor = input_tensor
+        for layer in range(5):
+            weights = tf.get_variable(f'Weights_{layer}', shape=(dimension, dimension), dtype=self.dtype,
+                                      initializer=tf.initializers.ones, trainable=False)
+            biases = tf.get_variable(f'Biases_{layer}', shape=dimension, dtype=self.dtype,
+                                     initializer=tf.initializers.ones, trainable=False)
+            output_tensor = tf.matmul(output_tensor, weights) + biases
+        self.dense_op = output_tensor
+
+        self.session.run(tf.initializers.global_variables())
+
+    def experiment(self):
+        self.session.run(self.dense_op)
+
+    def run(self, experiment_args: List[Tuple[int, int]], experiment_count: int):
+        super().run(experiment_args, experiment_count)
diff --git a/src/tf_2_v1/ops.py b/src/tf_2_v1/ops.py
index d409e3f..d5b2c93 100644
--- a/src/tf_2_v1/ops.py
+++ b/src/tf_2_v1/ops.py
@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Dict, Type
 
 from src.common import Op
 from src.tf_2_v1.add import TFAddBench
@@ -6,11 +6,15 @@ from src.tf_2_v1.base import TFBase
 from src.tf_2_v1.div import TFDivBench
 from src.tf_2_v1.mul import TFMulBench
 from src.tf_2_v1.matmul import TFMatmulBench
+from src.tf_2_v1.nn_dense import TFNNDenseBench
+from src.tf_2_v1.nn_dense_x5 import TFNNDenseX5Bench
 
 
-tf2v1_ops: dict[Op, Type[TFBase]] = {
+tf2v1_ops: Dict[Op, Type[TFBase]] = {
     Op.ADD: TFAddBench,
     Op.MUL: TFMulBench,
     Op.DIV: TFDivBench,
-    Op.MATMUL: TFMatmulBench
+    Op.MATMUL: TFMatmulBench,
+    Op.NN_DENSE: TFNNDenseBench,
+    Op.NN_DENSE_X5: TFNNDenseX5Bench
 }