diff --git a/benchmark.py b/benchmark.py
index c7f42ac..8aa5f1a 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -1,39 +1,99 @@
 from argparse import ArgumentParser
+import multiprocessing as mp
+import os
 from pathlib import Path
+from typing import Type
 
-from src.base import DataType
-from src.torch.matmul import TorchMatmulBench
+from src.base import BenchBase
+from src.common import DataType, Op, Platform
+
+
+def run_benchmark(output_path: Path, platform: Platform, data_type: DataType, bench_op: Op,
+                  bench_args, bench_count: int):
+    if platform == Platform.TF2:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+        from src.tf_2.ops import tf2_ops
+        if bench_op not in tf2_ops:
+            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
+        else:
+            tf2_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
+            print()
+    elif platform == Platform.TORCH:
+        from src.pytorch.ops import torch_ops
+        if bench_op not in torch_ops:
+            print(f'Operation {bench_op.value} is not implemented for {platform.value} yet')
+        else:
+            torch_ops[bench_op](output_path).run(bench_args, bench_count, data_type)
+            print()
+    else:
+        print(f'Platform {platform.value} is not implemented yet')
 
 
 def main():
     parser = ArgumentParser()
     parser.add_argument('--output', type=Path, default=Path('output'), help='Path to output files')
+    parser.add_argument('--count', type=int, default=30,
+                        help='Number of experiments per benchmark (for stastistical analysis)')
+    parser.add_argument('--platform', nargs='*', type=Platform,
+                        help='List of platform to benchmark [TF1, TF2, Torch] (else all are used)')
+    parser.add_argument('--data', nargs='*', type=DataType,
+                        help='List of data type to benchmark [float16, float32, float64] (else all are used)')
+    parser.add_argument('--op', nargs='*', type=Op,
+                        help='List of operation to benchmark [add, mul, div, matmul] (else all are used)')
     arguments = parser.parse_args()
 
     output_path: Path = arguments.output
+    bench_count: int = arguments.count
+    platforms: list[Platform] = arguments.platform if arguments.platform is not None else list(Platform)
+    data: list[DataType] = arguments.data if arguments.data is not None else list(DataType)
+    bench_ops: list[Op] = arguments.op if arguments.op is not None else list(Op)
 
     if not output_path.exists():
         output_path.mkdir(parents=True)
 
-    for data_type in DataType:
-        TorchMatmulBench(output_path).run(
-            [
-                ((100, 100), (100, 100)),
-                ((100, 200), (200, 100)),
-                ((128, 128), (128, 128)),
-                ((200, 100), (100, 200)),
-                ((200, 200), (200, 200)),
-                ((256, 256), (256, 256)),
-                ((256, 512), (512, 256)),
-                ((400, 400), (400, 400)),
-                ((512, 256), (256, 512)),
-                ((512, 512), (512, 512)),
-                ((800, 800), (800, 800)),
-                ((1000, 1000), (1000, 1000)),
-                ((1200, 1200), (1200, 1200)),
-            ],
-            12,
-            data_type)
+    benchmarks: list[dict[Op, Type[BenchBase]]] = []
+    element_wise_args = [
+        (100, 100),
+        (100, 200),
+        (128, 128),
+        (200, 100),
+        (200, 200),
+        (256, 256),
+        (256, 512),
+        (512, 256),
+        (400, 400),
+        (512, 512),
+        (800, 800),
+        (1024, 1024),
+        (1800, 1800)]
+    matmul_args = [
+        ((100, 100), (100, 100)),
+        ((100, 200), (200, 100)),
+        ((128, 128), (128, 128)),
+        ((200, 100), (100, 200)),
+        ((200, 200), (200, 200)),
+        ((256, 256), (256, 256)),
+        ((256, 512), (512, 256)),
+        ((400, 400), (400, 400)),
+        ((512, 256), (256, 512)),
+        ((512, 512), (512, 512)),
+        ((800, 800), (800, 800)),
+        ((1000, 1000), (1000, 1000)),
+        ((1200, 1200), (1200, 1200))]
+
+    for platform in platforms:
+        for data_type in data:
+            for bench_op in [Op.ADD, Op.MUL, Op.DIV]:
+                if bench_op in bench_ops:
+                    benchmarks.append((output_path, platform, data_type, bench_op, element_wise_args, bench_count))
+            if Op.MATMUL in bench_ops:
+                benchmarks.append((output_path, platform, data_type, Op.MATMUL, matmul_args, bench_count))
+
+    for benchmark in benchmarks:
+        process = mp.Process(target=run_benchmark, args=benchmark)
+        process.start()
+        process.join()
+
     print('Benchmark done')
 
 
diff --git a/src/base.py b/src/base.py
index 94b4924..643a18f 100644
--- a/src/base.py
+++ b/src/base.py
@@ -1,22 +1,106 @@
 from pathlib import Path
-from enum import Enum
+import time
+
+import numpy as np
+import pandas as pd
+
+from src.common import DataKey, DataType, Device, Op, Platform
+from src.plot import plot_experiments
+from src.utils import get_cpu_name, get_nvidia_name
 
 
-class Device(Enum):
-    CPU = 'cpu'
-    GPU = 'gpu'
-
-
-class DataType(Enum):
-    FLOAT16 = 'float16'
-    FLOAT32 = 'float32'
-    FLOAT64 = 'float64'
-
-
-class Base():
-    def __init__(self, output_path: Path):
+class BenchBase():
+    def __init__(self, output_path: Path, platform: Platform, bench_op: Op, device_type: Device, device):
         self._base_output_path = output_path
         self.output_path = output_path
 
+        self.platform = platform
+        self.bench_op = bench_op
+        self.device_type = device_type
+        self.device = device
+        self.dtype = None
+
     def set_output_path(self, device: Device, device_name: str):
-        self.output_path = self._base_output_path / f'{device.value}_{device_name}'
+        self.output_path = (
+            self._base_output_path / f'{device.value}_{device_name}' / self.platform.value / self.bench_op.value)
+
+    def get_dtype(self, data_type: DataType):
+        raise NotImplementedError()
+
+    def experiment(self, _experiment_args, _length, _dtype, _device):
+        raise NotImplementedError()
+
+    def name(self, _experiment_args) -> str:
+        raise NotImplementedError()
+
+    def mop(self, _experiment_args) -> float:
+        raise NotImplementedError()
+
+    def run(self, experiment_args, experiment_count: int, data_type: DataType):
+        self.set_output_path(self.device_type, get_cpu_name() if self.device_type == Device.CPU else get_nvidia_name())
+
+        if not self.output_path.exists():
+            self.output_path.mkdir(parents=True)
+
+        dtype = self.get_dtype(data_type)
+
+        print(f'Starting {self.platform.value}\'s {self.bench_op.value} benchmark with data type: {data_type.value}')
+
+        experiment_names = []
+        experiment_lengths = []
+        experiment_times = []
+        experiment_mop = []
+        for args in experiment_args:
+            # warmup
+            for _ in range(4):
+                self.experiment(args, 5, dtype, self.device)
+
+            # speed evalutaion
+            counter = 0
+            start_time = time.time()
+            while time.time() - start_time < 0.2:
+                self.experiment(args, 10, dtype, self.device)
+                counter += 10
+            end_time = time.time()
+
+            target_time = 1.0  # in s
+            experiment_speed = counter / (end_time - start_time)  # in op/s
+            experiment_length = max(int(target_time / experiment_count * experiment_speed), 2)
+            # print(f'Evaluated {counter} {self.bench_op.value} in {end_time - start_time:0.3f}s'
+            #       f' => {experiment_speed:.03f}{self.bench_op.value}/s'
+            #       f', estimate {target_time:.03f}s with {experiment_length}x{experiment_count} exps')
+
+            run_times = []
+            for _ in range(experiment_count):
+                start_time = time.time()
+                self.experiment(args, experiment_length, dtype, self.device)
+                run_times.append(time.time() - start_time)
+            experiment_times += run_times
+            experiment_names += [self.name(args)] * experiment_count
+            experiment_lengths += [experiment_length] * experiment_count
+            experiment_mop += [self.mop(args)] * experiment_count
+
+            total_time = np.array(run_times, dtype=np.float64).sum()
+            total_glop = self.mop(args) * experiment_length * experiment_count / 1000
+            print(f'Run {experiment_names[-1]} (x{experiment_length})'
+                  f' in {total_time:0.2f}s => {total_glop / total_time:0.3f}GFOPS')
+
+        data = self.save_experiments(experiment_names, experiment_times, experiment_lengths, experiment_mop, data_type)
+        plot_experiments(self.output_path, data, data_type, self.bench_op, self.platform)
+
+    def save_experiments(
+            self, experiment_names: list[str], experiment_times: list[float],
+            experiment_lengths: list[int], experiment_mop: list[float], data_type: DataType) -> pd.DataFrame:
+        key = DataKey(self.bench_op)
+        data = pd.DataFrame(
+            {
+                key.experiment: experiment_names,
+                key.time: experiment_times,
+                key.count: experiment_lengths,
+                key.speed: [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
+                key.mop: experiment_mop,
+                key.gflops: [(mop * l) / (t * 1000.0)
+                             for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
+            })
+        data.to_csv(self.output_path / f'{self.bench_op.value}_{data_type.value}.csv', sep='\t')
+        return data
diff --git a/src/common.py b/src/common.py
new file mode 100644
index 0000000..a487e9c
--- /dev/null
+++ b/src/common.py
@@ -0,0 +1,36 @@
+from enum import Enum
+
+
+class Device(Enum):
+    CPU = 'cpu'
+    GPU = 'gpu'
+
+
+class DataType(Enum):
+    FLOAT16 = 'float16'
+    FLOAT32 = 'float32'
+    FLOAT64 = 'float64'
+
+
+class Op(Enum):
+    NO_OP = 'noop'
+    ADD = 'add'
+    DIV = 'div'
+    MUL = 'mul'
+    MATMUL = 'matmul'
+
+
+class Platform(Enum):
+    TF1 = 'TF1'
+    TF2 = 'TF2'
+    TORCH = 'Torch'
+
+
+class DataKey():
+    def __init__(self, bench_op: Op):
+        self.experiment = 'experiment'
+        self.time = 'run times (s)'
+        self.count = 'count'
+        self.mop = f'Mop/{bench_op.value}'
+        self.speed = f'ms/{bench_op.value}'
+        self.gflops = 'GFLOPS'
diff --git a/src/plot.py b/src/plot.py
new file mode 100644
index 0000000..f4cf907
--- /dev/null
+++ b/src/plot.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import seaborn as sns
+
+
+from src.common import DataKey, DataType, Op, Platform
+
+
+def plot_experiments(output_path: Path, data: pd.DataFrame, data_type: DataType, bench_op: Op, platform: Platform):
+    key = DataKey(bench_op)
+    sum_data = data[[key.experiment, key.time, key.count]].groupby(
+        key.experiment, as_index=False, sort=False).sum()
+    mean_data = data[[key.experiment, key.speed]].groupby(
+        key.experiment, as_index=False, sort=False).mean()
+    max_data = data[[key.experiment, key.mop]].groupby(
+        key.experiment, as_index=False, sort=False).max()
+
+    sns.set_theme(style="ticks")
+    figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
+    figure.suptitle(f'{platform.value} {bench_op.value} ({data_type.value})', fontsize=16)
+    for axe in axes[:-1]:
+        axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
+
+    chart = sns.barplot(x=key.experiment, y=key.mop, data=max_data, ax=axes[0], order=data[key.experiment].unique())
+    axes[0].set_yscale("log")
+    for patch, value in zip(chart.patches, max_data[key.mop]):
+        chart.annotate(f'{value:0.3f}',
+                       (patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
+                       ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
+                       textcoords='offset points')
+
+    chart = sns.barplot(x=key.experiment, y=key.speed, data=data, estimator=np.median, ax=axes[1])
+    for patch, value in zip(chart.patches, mean_data[key.speed]):
+        chart.annotate(f'{value:.3f}',
+                       (patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
+                       ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
+                       textcoords='offset points')
+
+    chart = sns.barplot(x=key.experiment, y=key.gflops, data=data, estimator=np.median, ax=axes[2])
+    for patch, mop, count, value in zip(chart.patches, max_data[key.mop], sum_data[key.count], sum_data[key.time]):
+        chart.annotate(f'{(mop * count / 1000) / value:.3f}',
+                       (patch.get_x() + patch.get_width() / 2.0, patch.get_height()),
+                       ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
+                       textcoords='offset points')
+
+    plt.xticks(rotation=20)
+    plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
+    plt.savefig(output_path / f'{bench_op.value}_{data_type.value}.png')
diff --git a/src/pytorch/add.py b/src/pytorch/add.py
new file mode 100644
index 0000000..08f6b84
--- /dev/null
+++ b/src/pytorch/add.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class TorchAddBench(TorchBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.ADD)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
+        shape_1 = experiment_args
+        tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+        tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+
+        for _ in range(length):
+            _ = tensor_1 + tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/pytorch/base.py b/src/pytorch/base.py
new file mode 100644
index 0000000..335042d
--- /dev/null
+++ b/src/pytorch/base.py
@@ -0,0 +1,39 @@
+from pathlib import Path
+
+import torch
+
+from src.base import BenchBase
+from src.common import DataType, Device, Op, Platform
+
+
+class TorchBase(BenchBase):
+    def __init__(self, output_path: Path, bench_op: Op):
+        if torch.cuda.is_available():
+            if torch.cuda.device_count() > 1:
+                print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
+            torch.backends.cudnn.benchmark = True
+            device_type = Device.GPU
+            device = torch.device('cuda:0')
+        else:
+            device_type = Device.CPU
+            device = torch.device('cpu')
+
+        super().__init__(output_path, Platform.TORCH, bench_op, device_type, device)
+
+    def get_dtype(self, data_type: DataType) -> torch.dtype:
+        if data_type == DataType.FLOAT16:
+            return torch.float16
+        if data_type == DataType.FLOAT32:
+            return torch.float32
+        if data_type == DataType.FLOAT64:
+            return torch.float64
+        raise NotImplementedError(f'data_type {data_type.value} not implemented')
+
+    def experiment(self, _experiment_args, _length, _dtype, _device):
+        raise NotImplementedError()
+
+    def name(self, _experiment_args) -> str:
+        raise NotImplementedError()
+
+    def mop(self, _experiment_args) -> float:
+        raise NotImplementedError()
diff --git a/src/pytorch/div.py b/src/pytorch/div.py
new file mode 100644
index 0000000..9a0b309
--- /dev/null
+++ b/src/pytorch/div.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class TorchDivBench(TorchBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.DIV)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
+        shape_1 = experiment_args
+        tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+        tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+
+        for _ in range(length):
+            _ = tensor_1 / tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/pytorch/matmul.py b/src/pytorch/matmul.py
new file mode 100644
index 0000000..c40c261
--- /dev/null
+++ b/src/pytorch/matmul.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class TorchMatmulBench(TorchBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.MATMUL)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
+        shape_1, shape_2 = experiment_args
+        tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+        tensor_2 = torch.ones(shape_2, dtype=dtype, device=device, requires_grad=False)
+
+        for _ in range(length):
+            _ = tensor_1 @ tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1, shape_2 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1, shape_2 = experiment_args
+        return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
+
+    def run(self,
+            experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/pytorch/mul.py b/src/pytorch/mul.py
new file mode 100644
index 0000000..7208a6d
--- /dev/null
+++ b/src/pytorch/mul.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+import torch
+
+from src.common import DataType, Op
+from src.pytorch.base import TorchBase
+
+
+class TorchMulBench(TorchBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.MUL)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: torch.dtype, device: torch.device):
+        shape_1 = experiment_args
+        tensor_1 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+        tensor_2 = torch.ones(shape_1, dtype=dtype, device=device, requires_grad=False)
+
+        for _ in range(length):
+            _ = tensor_1 * tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/pytorch/ops.py b/src/pytorch/ops.py
new file mode 100644
index 0000000..22e4e96
--- /dev/null
+++ b/src/pytorch/ops.py
@@ -0,0 +1,16 @@
+from typing import Type
+
+from src.common import Op
+from src.pytorch.add import TorchAddBench
+from src.pytorch.base import TorchBase
+from src.pytorch.div import TorchDivBench
+from src.pytorch.mul import TorchMulBench
+from src.pytorch.matmul import TorchMatmulBench
+
+
+torch_ops: dict[Op, Type[TorchBase]] = {
+    Op.ADD: TorchAddBench,
+    Op.MUL: TorchMulBench,
+    Op.DIV: TorchDivBench,
+    Op.MATMUL: TorchMatmulBench
+}
diff --git a/src/tf_2/add.py b/src/tf_2/add.py
new file mode 100644
index 0000000..7850157
--- /dev/null
+++ b/src/tf_2/add.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class TFAddBench(TFBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.ADD)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
+        shape_1 = experiment_args
+        with device:
+            tensor_1 = tf.ones(shape_1, dtype=dtype)
+            tensor_2 = tf.ones(shape_1, dtype=dtype)
+
+            for _ in range(length):
+                _ = tensor_1 + tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} + {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/tf_2/base.py b/src/tf_2/base.py
new file mode 100644
index 0000000..808ee45
--- /dev/null
+++ b/src/tf_2/base.py
@@ -0,0 +1,43 @@
+from pathlib import Path
+
+import tensorflow as tf
+
+from src.base import BenchBase
+from src.common import DataType, Device, Op, Platform
+
+
+class TFBase(BenchBase):
+    def __init__(self, output_path: Path, bench_op: Op):
+        gpus = tf.config.list_physical_devices('GPU')
+        if gpus:
+            if len(gpus) > 1:
+                print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
+
+            tf.config.experimental.set_memory_growth(gpus[0], True)
+            tf.config.set_visible_devices(gpus[0], 'GPU')
+            # logical_gpus = tf.config.list_logical_devices('GPU')
+            device_type = Device.GPU
+            device = tf.device('/GPU:0')
+        else:
+            device_type = Device.CPU
+            device = tf.device('/CPU:0')
+
+        super().__init__(output_path, Platform.TF2, bench_op, device_type, device)
+
+    def get_dtype(self, data_type: DataType) -> tf.DType:
+        if data_type == DataType.FLOAT16:
+            return tf.float16
+        if data_type == DataType.FLOAT32:
+            return tf.float32
+        if data_type == DataType.FLOAT64:
+            return tf.float64
+        raise RuntimeError(f'data_type {data_type.value} not implemented')
+
+    def experiment(self, _experiment_args, _length, _dtype, _device):
+        raise NotImplementedError()
+
+    def name(self, _experiment_args) -> str:
+        raise NotImplementedError()
+
+    def mop(self, _experiment_args) -> float:
+        raise NotImplementedError()
diff --git a/src/tf_2/div.py b/src/tf_2/div.py
new file mode 100644
index 0000000..21dd9b4
--- /dev/null
+++ b/src/tf_2/div.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class TFDivBench(TFBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.DIV)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
+        shape_1 = experiment_args
+        with device:
+            tensor_1 = tf.ones(shape_1, dtype=dtype)
+            tensor_2 = tf.ones(shape_1, dtype=dtype)
+
+            for _ in range(length):
+                _ = tensor_1 / tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} / {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/tf_2/matmul.py b/src/tf_2/matmul.py
new file mode 100644
index 0000000..70308b3
--- /dev/null
+++ b/src/tf_2/matmul.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class TFMatmulBench(TFBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.MATMUL)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
+        shape_1, shape_2 = experiment_args
+        with device:
+            tensor_1 = tf.ones(shape_1, dtype=dtype)
+            tensor_2 = tf.ones(shape_2, dtype=dtype)
+
+            for _ in range(length):
+                _ = tensor_1 @ tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1, shape_2 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1, shape_2 = experiment_args
+        return (shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)
+
+    def run(self,
+            experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/tf_2/mul.py b/src/tf_2/mul.py
new file mode 100644
index 0000000..12ca880
--- /dev/null
+++ b/src/tf_2/mul.py
@@ -0,0 +1,34 @@
+from pathlib import Path
+
+import tensorflow as tf
+
+from src.common import DataType, Op
+from src.tf_2.base import TFBase
+
+
+class TFMulBench(TFBase):
+    def __init__(self, output_path: Path):
+        super().__init__(output_path, Op.MUL)
+
+    def experiment(self, experiment_args: tuple[int, int], length: int, dtype: tf.DType, device: tf.device):
+        shape_1 = experiment_args
+        with device:
+            tensor_1 = tf.ones(shape_1, dtype=dtype)
+            tensor_2 = tf.ones(shape_1, dtype=dtype)
+
+            for _ in range(length):
+                _ = tensor_1 * tensor_2
+
+    def name(self, experiment_args: tuple[int, int]) -> str:
+        shape_1 = experiment_args
+        return f'{shape_1[0]}x{shape_1[1]} * {shape_1[0]}x{shape_1[1]}'
+
+    def mop(self, experiment_args: tuple[int, int]) -> float:
+        shape_1 = experiment_args
+        return shape_1[0] * shape_1[1] / 1000_000
+
+    def run(self,
+            experiment_args: list[tuple[int, int]],
+            experiment_count: int,
+            data_type: DataType):
+        super().run(experiment_args, experiment_count, data_type)
diff --git a/src/tf_2/ops.py b/src/tf_2/ops.py
new file mode 100644
index 0000000..7c3d12a
--- /dev/null
+++ b/src/tf_2/ops.py
@@ -0,0 +1,16 @@
+from typing import Type
+
+from src.common import Op
+from src.tf_2.add import TFAddBench
+from src.tf_2.base import TFBase
+from src.tf_2.div import TFDivBench
+from src.tf_2.mul import TFMulBench
+from src.tf_2.matmul import TFMatmulBench
+
+
+tf2_ops: dict[Op, Type[TFBase]] = {
+    Op.ADD: TFAddBench,
+    Op.MUL: TFMulBench,
+    Op.DIV: TFDivBench,
+    Op.MATMUL: TFMatmulBench
+}
diff --git a/src/torch/base.py b/src/torch/base.py
deleted file mode 100644
index 6007243..0000000
--- a/src/torch/base.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from pathlib import Path
-
-import torch
-
-from src.base import Base, Device
-from src.utils import get_cpu_name, get_nvidia_name
-
-
-class TorchBase(Base):
-    def __init__(self, output_path: Path):
-        super().__init__(output_path)
-
-        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-        if torch.cuda.is_available():
-            if torch.cuda.device_count() > 1:
-                print('WARINING : no multiple CUDA device benchmark implemented yet (only using first)')
-            self.set_output_path(Device.GPU, get_nvidia_name())
-            torch.backends.cudnn.benchmark = True
-        else:
-            self.set_output_path(Device.CPU, get_cpu_name())
-
-        if not self.output_path.exists():
-            self.output_path.mkdir(parents=True)
diff --git a/src/torch/matmul.py b/src/torch/matmul.py
deleted file mode 100644
index 2dd91fe..0000000
--- a/src/torch/matmul.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import time
-
-from src.base import DataType
-from src.torch.base import TorchBase
-
-import matplotlib.pyplot as plt
-import pandas as pd
-import seaborn as sns
-import torch
-
-
-class TorchMatmulBench(TorchBase):
-
-    def run(self,
-            experiment_args: list[tuple[tuple[int, int], tuple[int, int]]],
-            experiment_count: int,
-            data_type: DataType):
-        sns.set_theme(style="ticks")
-
-        dtype = None
-        if data_type == DataType.FLOAT16:
-            dtype = torch.float16
-        elif data_type == DataType.FLOAT32:
-            dtype = torch.float32
-        elif data_type == DataType.FLOAT64:
-            dtype = torch.float64
-        else:
-            raise RuntimeError(f'data_type {data_type.value} not implemented')
-        print(f'Startin Torch Matmul Benchmark with data type: {data_type.value}')
-
-        experiment_names = []
-        experiment_lengths = []
-        experiment_times = []
-        experiment_mop = []
-        for shape_1, shape_2 in experiment_args:
-            tensor_1 = torch.ones(shape_1, dtype=dtype, device=self.device)
-            tensor_2 = torch.ones(shape_2, dtype=dtype, device=self.device) / (shape_2[1] - 1.0)
-
-            # warmup
-            for _ in range(20):
-                _ = tensor_1 @ tensor_2
-
-            # speed evalutaion
-            counter = 0
-            start_time = time.time()
-            while(time.time() - start_time < 0.2):
-                _ = tensor_1 @ tensor_2
-                counter += 1
-            end_time = time.time()
-
-            target_time = 0.5 / experiment_count  # in s
-            experiment_speed = counter / (end_time - start_time)  # in op/s
-            experiment_length = max(int(target_time * experiment_speed), 2)
-
-            run_times = []
-            for _ in range(experiment_count):
-                start_time = time.time()
-                for _ in range(experiment_length):
-                    _ = tensor_1 @ tensor_2
-                run_times.append(time.time() - start_time)
-            experiment_times += run_times
-            experiment_names += [f'{shape_1[0]}x{shape_1[1]} @ {shape_2[0]}x{shape_2[1]}'] * experiment_count
-            experiment_lengths += [experiment_length] * experiment_count
-            experiment_mop += [(shape_1[0] * shape_2[1] / 1000_000) * 2 * (shape_1[1] - 1)] * experiment_count
-            print(f'Run {experiment_names[-1]} (x{experiment_length})'
-                  f' in {experiment_times[-1] * 1000:0.1f}ms')
-
-        data = pd.DataFrame(
-            {
-                'run times (s)': experiment_times,
-                'count': experiment_lengths,
-                'ms/matmul': [(1000.0 * t) / l for t, l in zip(experiment_times, experiment_lengths)],
-                'Mop/matmul': experiment_mop,
-                'GFLOPS': [(mop * l) / (t * 1000.0)
-                           for mop, l, t in zip(experiment_mop, experiment_lengths, experiment_times)]
-            },
-            index=pd.Index(experiment_names, name='experiment'))
-        data.to_csv(self.output_path / f'matmul_{data_type.value}.csv', sep='\t')
-
-        mean_data = data[['ms/matmul', 'GFLOPS']].groupby(data.index, sort=False).mean()
-        max_data = data[['Mop/matmul']].groupby(data.index, sort=False).max()
-
-        figure, axes = plt.subplots(nrows=3, sharex=True, figsize=(18, 12))
-        figure.suptitle(f'Torch Matmul ({data_type.value})', fontsize=16)
-        for axe in axes[:-1]:
-            axe.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
-
-        chart = sns.barplot(x=max_data.index, y='Mop/matmul', data=max_data, ax=axes[0], order=data.index.unique())
-        axes[0].set_yscale("log")
-        for p, value in zip(chart.patches, max_data['Mop/matmul']):
-            chart.annotate(f'{value:0.3f}',
-                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
-                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
-                           textcoords='offset points')
-
-        chart = sns.barplot(x=data.index, y='ms/matmul', data=data, ax=axes[1])
-        for p, value in zip(chart.patches, mean_data['ms/matmul']):
-            chart.annotate(f'{value:.3f}',
-                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
-                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
-                           textcoords='offset points')
-
-        chart = sns.barplot(x=data.index, y='GFLOPS', data=data, ax=axes[2])
-        for p, value in zip(chart.patches, mean_data['GFLOPS']):
-            chart.annotate(f'{value:.3f}',
-                           (p.get_x() + p.get_width() / 2.0, p.get_height()),
-                           ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
-                           textcoords='offset points')
-
-        plt.xticks(rotation=20)
-        plt.subplots_adjust(hspace=0.0, wspace=0.02, top=0.93, right=0.99, bottom=0.1, left=0.05)
-        plt.savefig(self.output_path / f'matmul_{data_type.value}.png')