From 318e7e2dfcc4be064e38d330681ec5a524530ffa Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Sun, 10 Jul 2022 12:53:01 +0200 Subject: [PATCH 01/10] Refactor num_flops and fix for loopy kernels which are not a program yet. --- pyop2/local_kernel.py | 61 +++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 4807463b8..31310b0aa 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -152,28 +152,9 @@ def arguments(self): for acc, dtype in zip(self.accesses, self.dtypes)) @cached_property + @abc.abstractmethod def num_flops(self): """Compute the numbers of FLOPs if not already known.""" - if self.flop_count is not None: - return self.flop_count - - if not configuration["compute_kernel_flops"]: - return 0 - - if isinstance(self.code, coffee.base.Node): - v = coffee.visitors.EstimateFlops() - return v.visit(self.code) - elif isinstance(self.code, lp.TranslationUnit): - op_map = lp.get_op_map( - self.code.copy(options=lp.Options(ignore_boostable_into=True), - silenced_warnings=['insn_count_subgroups_upper_bound', - 'get_x_map_guessing_subgroup_size', - 'summing_if_branches_ops']), - subgroup_size='guess') - return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], - dtype=[ScalarType]).eval_and_sum({}) - else: - return 0 def __eq__(self, other): if not isinstance(other, LocalKernel): @@ -214,6 +195,13 @@ def dtypes(self): def dtypes(self, dtypes): self._dtypes = dtypes + @cached_property + def num_flops(self): + if self.flop_count is not None: + return self.flop_count + else: + return 0 + class CoffeeLocalKernel(LocalKernel): """:class:`LocalKernel` class where `code` has type :class:`coffee.base.Node`.""" @@ -231,6 +219,16 @@ def dtypes(self): def dtypes(self, dtypes): self._dtypes = dtypes + @cached_property + def num_flops(self): + if self.flop_count is not None: + return self.flop_count + elif not configuration["compute_kernel_flops"]: + return 0 + else: + v = coffee.visitors.EstimateFlops() + return v.visit(self.code) + class LoopyLocalKernel(LocalKernel): """:class:`LocalKernel` class where `code` has type :class:`loopy.LoopKernel` @@ -250,3 +248,26 @@ def _loopy_arguments(self): """Return the loopy arguments associated with the kernel.""" return tuple(a for a in self.code.callables_table[self.name].subkernel.args if isinstance(a, lp.ArrayArg)) + + @cached_property + def num_flops(self): + if self.flop_count is not None: + return self.flop_count + elif not configuration["compute_kernel_flops"]: + return 0 + else: + if isinstance(self.code, lp.TranslationUnit): + prog = self.code.with_entrypoints(self.name) + knl = prog.default_entrypoint + warnings = list(knl.silenced_warnings) + warnings.extend(['insn_count_subgroups_upper_bound', + 'get_x_map_guessing_subgroup_size', + 'summing_if_branches_ops']) + knl = knl.copy(silenced_warnings=warnings, + options=lp.Options(ignore_boostable_into=True)) + prog = prog.with_kernel(knl) + else: + prog = self.code + op_map = lp.get_op_map(prog, subgroup_size=1) + return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], + dtype=[ScalarType]).eval_and_sum({}) From 04ccf8e73c50c81d86729592320b92c1abce1f80 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Sun, 10 Jul 2022 20:59:42 +0200 Subject: [PATCH 02/10] Fix coffee FLOP counting too --- pyop2/local_kernel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 31310b0aa..63411acf7 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -4,6 +4,7 @@ from typing import Union import coffee +from coffee.visitors import EstimateFlops import loopy as lp from loopy.tools import LoopyKeyBuilder import numpy as np @@ -226,7 +227,7 @@ def num_flops(self): elif not configuration["compute_kernel_flops"]: return 0 else: - v = coffee.visitors.EstimateFlops() + v = EstimateFlops() return v.visit(self.code) From 20522fbbedc40ad037b6f22688c87ababf622ff4 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Sun, 10 Jul 2022 23:16:44 +0200 Subject: [PATCH 03/10] We need to set the layer variable to something. Usually the wrapper kernel does that, but we count the FLOPS of the local kernel here. This changes the result of the extrusion kernel but not its FLOP count I think. --- pyop2/local_kernel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 63411acf7..c599349f6 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -266,6 +266,7 @@ def num_flops(self): 'summing_if_branches_ops']) knl = knl.copy(silenced_warnings=warnings, options=lp.Options(ignore_boostable_into=True)) + knl = lp.fix_parameters(knl, layer=1) prog = prog.with_kernel(knl) else: prog = self.code From 40b06aef8948065debb34b05065b4292358f5175 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 08:54:50 +0200 Subject: [PATCH 04/10] Move check if we want to count kernel flops --- pyop2/local_kernel.py | 5 ----- pyop2/parloop.py | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index c599349f6..9cc0ed36c 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -10,7 +10,6 @@ import numpy as np from pyop2 import version -from pyop2.configuration import configuration from pyop2.datatypes import ScalarType from pyop2.exceptions import NameTypeError from pyop2.types import Access @@ -224,8 +223,6 @@ def dtypes(self, dtypes): def num_flops(self): if self.flop_count is not None: return self.flop_count - elif not configuration["compute_kernel_flops"]: - return 0 else: v = EstimateFlops() return v.visit(self.code) @@ -254,8 +251,6 @@ def _loopy_arguments(self): def num_flops(self): if self.flop_count is not None: return self.flop_count - elif not configuration["compute_kernel_flops"]: - return 0 else: if isinstance(self.code, lp.TranslationUnit): prog = self.code.with_entrypoints(self.name) diff --git a/pyop2/parloop.py b/pyop2/parloop.py index 8384268cf..3990ad242 100644 --- a/pyop2/parloop.py +++ b/pyop2/parloop.py @@ -188,7 +188,8 @@ def _compute(self, part): :arg part: The :class:`SetPartition` to compute over. """ with self._compute_event(): - PETSc.Log.logFlops(part.size*self.num_flops) + if configuration["compute_kernel_flops"]: + PETSc.Log.logFlops(part.size*self.num_flops) self.global_kernel(self.comm, part.offset, part.offset+part.size, *self.arglist) @cached_property From c744e611e8af71db3e5b4a9def707b78d8019f4d Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 13:11:51 +0200 Subject: [PATCH 05/10] Add comments --- pyop2/local_kernel.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 9cc0ed36c..a92031005 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -253,6 +253,8 @@ def num_flops(self): return self.flop_count else: if isinstance(self.code, lp.TranslationUnit): + # in order to silence the warnings we need to access + # the callable kernels in the translation prog = self.code.with_entrypoints(self.name) knl = prog.default_entrypoint warnings = list(knl.silenced_warnings) @@ -261,6 +263,9 @@ def num_flops(self): 'summing_if_branches_ops']) knl = knl.copy(silenced_warnings=warnings, options=lp.Options(ignore_boostable_into=True)) + # for extrusion utils the layer arg must be fixed + # because usually it would be a value which is passed in from the global kernel + # theoretically this changes the result but not the FLOP count knl = lp.fix_parameters(knl, layer=1) prog = prog.with_kernel(knl) else: From 25e0609ef00288801617639cd6c1d92b32b82f8c Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 13:12:16 +0200 Subject: [PATCH 06/10] ignore_boostable_into is default --- pyop2/local_kernel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index a92031005..8173815e8 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -261,8 +261,7 @@ def num_flops(self): warnings.extend(['insn_count_subgroups_upper_bound', 'get_x_map_guessing_subgroup_size', 'summing_if_branches_ops']) - knl = knl.copy(silenced_warnings=warnings, - options=lp.Options(ignore_boostable_into=True)) + knl = knl.copy(silenced_warnings=warnings) # for extrusion utils the layer arg must be fixed # because usually it would be a value which is passed in from the global kernel # theoretically this changes the result but not the FLOP count From 93f3037fbea735dbb48fde2137191aa67a491362 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 13:20:53 +0200 Subject: [PATCH 07/10] LocalLoopyKernels code should be a translation unit? --- pyop2/local_kernel.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 8173815e8..f0d51d2a4 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -252,23 +252,21 @@ def num_flops(self): if self.flop_count is not None: return self.flop_count else: - if isinstance(self.code, lp.TranslationUnit): - # in order to silence the warnings we need to access - # the callable kernels in the translation - prog = self.code.with_entrypoints(self.name) - knl = prog.default_entrypoint - warnings = list(knl.silenced_warnings) - warnings.extend(['insn_count_subgroups_upper_bound', - 'get_x_map_guessing_subgroup_size', - 'summing_if_branches_ops']) - knl = knl.copy(silenced_warnings=warnings) - # for extrusion utils the layer arg must be fixed - # because usually it would be a value which is passed in from the global kernel - # theoretically this changes the result but not the FLOP count - knl = lp.fix_parameters(knl, layer=1) - prog = prog.with_kernel(knl) - else: - prog = self.code + assert isinstance(self.code, lp.TranslationUnit), "LocalLoopyKernels code should be a translation unit." + # in order to silence the warnings we need to access + # the callable kernels in the translation unit + prog = self.code.with_entrypoints(self.name) + knl = prog.default_entrypoint + warnings = list(knl.silenced_warnings) + warnings.extend(['insn_count_subgroups_upper_bound', + 'get_x_map_guessing_subgroup_size', + 'summing_if_branches_ops']) + knl = knl.copy(silenced_warnings=warnings) + # for extrusion utils the layer arg must be fixed + # because usually it would be a value which is passed in from the global kernel + # theoretically this changes the result but not the FLOP count + knl = lp.fix_parameters(knl, layer=1) + prog = prog.with_kernel(knl) op_map = lp.get_op_map(prog, subgroup_size=1) return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], dtype=[ScalarType]).eval_and_sum({}) From f2c7c9cb319a5bafb9c12316d2b9abaa597bad14 Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 15:56:53 +0200 Subject: [PATCH 08/10] Code is always a TranslationUnit. --- pyop2/local_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index f0d51d2a4..6e01e32b3 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -233,7 +233,7 @@ class LoopyLocalKernel(LocalKernel): or :class:`loopy.TranslationUnit`. """ - @validate_type(("code", (lp.LoopKernel, lp.TranslationUnit), TypeError)) + @validate_type(("code", (lp.TranslationUnit), TypeError)) def __init__(self, code, *args, **kwargs): super().__init__(code, *args, **kwargs) From c0ea16fd30fba5b4d59dee18f4dda528ffd4d15d Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 16:03:12 +0200 Subject: [PATCH 09/10] Improve comment on method and lint --- pyop2/local_kernel.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 6e01e32b3..012f1b65f 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -197,6 +197,8 @@ def dtypes(self, dtypes): @cached_property def num_flops(self): + """Set the numbers of FLOPs to 0 if not already known, + because there is no way to measure or estimate the FLOPS for string kernels. """ if self.flop_count is not None: return self.flop_count else: @@ -221,6 +223,8 @@ def dtypes(self, dtypes): @cached_property def num_flops(self): + """Compute the numbers of FLOPs if not already known + using COFFEE's FLOP estimation algorithm.""" if self.flop_count is not None: return self.flop_count else: @@ -249,6 +253,8 @@ def _loopy_arguments(self): @cached_property def num_flops(self): + """Compute the numbers of FLOPs if not already known + using Loo.py's FLOP counting algorithm.""" if self.flop_count is not None: return self.flop_count else: @@ -259,8 +265,8 @@ def num_flops(self): knl = prog.default_entrypoint warnings = list(knl.silenced_warnings) warnings.extend(['insn_count_subgroups_upper_bound', - 'get_x_map_guessing_subgroup_size', - 'summing_if_branches_ops']) + 'get_x_map_guessing_subgroup_size', + 'summing_if_branches_ops']) knl = knl.copy(silenced_warnings=warnings) # for extrusion utils the layer arg must be fixed # because usually it would be a value which is passed in from the global kernel From 06dea260b4f87283c3fb687c741a8e68f33c322a Mon Sep 17 00:00:00 2001 From: Sophia Vorderwuelbecke Date: Mon, 11 Jul 2022 16:08:47 +0200 Subject: [PATCH 10/10] Oneliners --- pyop2/local_kernel.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pyop2/local_kernel.py b/pyop2/local_kernel.py index 012f1b65f..a6c04b9c4 100644 --- a/pyop2/local_kernel.py +++ b/pyop2/local_kernel.py @@ -199,10 +199,7 @@ def dtypes(self, dtypes): def num_flops(self): """Set the numbers of FLOPs to 0 if not already known, because there is no way to measure or estimate the FLOPS for string kernels. """ - if self.flop_count is not None: - return self.flop_count - else: - return 0 + return self.flop_count if self.flop_count is not None else 0 class CoffeeLocalKernel(LocalKernel): @@ -225,11 +222,7 @@ def dtypes(self, dtypes): def num_flops(self): """Compute the numbers of FLOPs if not already known using COFFEE's FLOP estimation algorithm.""" - if self.flop_count is not None: - return self.flop_count - else: - v = EstimateFlops() - return v.visit(self.code) + return self.flop_count if self.flop_count is not None else EstimateFlops().visit(self.code) class LoopyLocalKernel(LocalKernel):