From c1e7b0b7aeb3e3739414215321e76fc6c8225aac Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 12 Jan 2018 12:39:23 +0000 Subject: [PATCH 01/25] dse: introduce SkewingRewriter --- devito/dse/backends/__init__.py | 3 ++- devito/dse/backends/advanced.py | 29 ++++++++++++++++++++++++++++- devito/dse/transformer.py | 3 ++- examples/seismic/tti/tti_example.py | 2 +- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/devito/dse/backends/__init__.py b/devito/dse/backends/__init__.py index 233a2b1c0a..a1fae4f100 100644 --- a/devito/dse/backends/__init__.py +++ b/devito/dse/backends/__init__.py @@ -1,6 +1,7 @@ from devito.dse.backends.common import * # noqa from devito.dse.backends.basic import BasicRewriter # noqa -from devito.dse.backends.advanced import AdvancedRewriter # noqa +from devito.dse.backends.advanced import (AdvancedRewriter, # noqa + SkewingRewriter) from devito.dse.backends.speculative import (SpeculativeRewriter, # noqa AggressiveRewriter, CustomRewriter) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index 0ad2a2a7f6..236a06eedf 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -5,7 +5,7 @@ from devito.ir import clusterize from devito.dse.aliases import collect from devito.dse.backends import BasicRewriter, dse_pass -from devito.symbolics import Eq, estimate_cost, xreplace_constrained, iq_timeinvariant +from devito.symbolics import Eq, estimate_cost, xreplace_constrained, iq_timeinvariant, xreplace_indices from devito.dse.manipulation import (common_subexprs_elimination, collect_nested, compact_temporaries) from devito.types import Indexed, Scalar, Array @@ -162,3 +162,30 @@ def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): processed = [e.xreplace(rules) for e in processed] return alias_clusters + [cluster.rebuild(processed)] + + +class SkewingRewriter(AdvancedRewriter): + + def _pipeline(self, state): + self._extract_time_invariants(state) + self._eliminate_inter_stencil_redundancies(state) + self._eliminate_intra_stencil_redundancies(state) + self._factorize(state) + self._loop_skew(state) + + @dse_pass + def _loop_skew(self, cluster, template, **kwargs): + # FIXME: this is probably the wrong way to find the time dimension + t, mapper = None, {} + for dim in cluster.stencil.dimensions: + if t is not None: + mapper[dim] = dim - 2 * t + elif dim.is_Time: + t = dim.parent + # FIXME: need to modify loop headers + + if t is None: + return cluster + + processed = xreplace_indices(cluster.exprs, mapper) + return cluster.rebuild(processed) diff --git a/devito/dse/transformer.py b/devito/dse/transformer.py index de315b9332..3b6789f041 100644 --- a/devito/dse/transformer.py +++ b/devito/dse/transformer.py @@ -2,7 +2,7 @@ from devito.ir.clusters import ClusterGroup, groupby from devito.dse.backends import (BasicRewriter, AdvancedRewriter, SpeculativeRewriter, - AggressiveRewriter, CustomRewriter) + AggressiveRewriter, CustomRewriter, SkewingRewriter) from devito.exceptions import DSEException from devito.logger import dse_warning from devito.parameters import configuration @@ -13,6 +13,7 @@ modes = { 'basic': BasicRewriter, 'advanced': AdvancedRewriter, + 'skewing': SkewingRewriter, 'speculative': SpeculativeRewriter, 'aggressive': AggressiveRewriter } diff --git a/examples/seismic/tti/tti_example.py b/examples/seismic/tti/tti_example.py index 87de1fac7a..07840847e0 100644 --- a/examples/seismic/tti/tti_example.py +++ b/examples/seismic/tti/tti_example.py @@ -64,7 +64,7 @@ def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=250.0, choices=['centered', 'shifted'], help="Choice of finite-difference kernel") parser.add_argument("-dse", "-dse", default="advanced", - choices=["noop", "basic", "advanced", + choices=["noop", "basic", "advanced", "skewing", "speculative", "aggressive"], help="Devito symbolic engine (DSE) mode") parser.add_argument("-dle", default="advanced", From 3818e033bcef5b686246c08ba15c3420e7082c49 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 12 Jan 2018 14:44:12 +0000 Subject: [PATCH 02/25] dse: skew: offsets loop bounds correspondingly --- devito/dse/backends/advanced.py | 8 +++++--- devito/ir/clusters/algorithms.py | 2 +- devito/ir/clusters/cluster.py | 12 +++++++----- devito/operator.py | 13 +++++++++---- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index 236a06eedf..be765ce6a9 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -175,17 +175,19 @@ def _pipeline(self, state): @dse_pass def _loop_skew(self, cluster, template, **kwargs): - # FIXME: this is probably the wrong way to find the time dimension + skew_factor = -2 # FIXME: read parameter t, mapper = None, {} + + # FIXME: this is probably the wrong way to find the time dimension for dim in cluster.stencil.dimensions: if t is not None: - mapper[dim] = dim - 2 * t + mapper[dim] = dim + skew_factor * t elif dim.is_Time: t = dim.parent - # FIXME: need to modify loop headers if t is None: return cluster + cluster.skewed_loops = {dim: skew - dim for dim, skew in mapper.items()} processed = xreplace_indices(cluster.exprs, mapper) return cluster.rebuild(processed) diff --git a/devito/ir/clusters/algorithms.py b/devito/ir/clusters/algorithms.py index d782fb9a65..a77953afb8 100644 --- a/devito/ir/clusters/algorithms.py +++ b/devito/ir/clusters/algorithms.py @@ -259,7 +259,7 @@ def clusterize(exprs, stencils): clusters = ClusterGroup() for target, pc in mapper.items(): exprs = [i for i in pc.exprs if i.lhs.is_Symbol or i.lhs == target] - clusters.append(PartialCluster(exprs, pc.stencil)) + clusters.append(PartialCluster(exprs, pc.stencil, pc.skewed_loops)) # Attempt grouping as many PartialClusters as possible together return groupby(clusters) diff --git a/devito/ir/clusters/cluster.py b/devito/ir/clusters/cluster.py index 41ddb95512..2cf173ee98 100644 --- a/devito/ir/clusters/cluster.py +++ b/devito/ir/clusters/cluster.py @@ -18,7 +18,7 @@ class PartialCluster(object): the embedded sequence of expressions are subjected to modifications. """ - def __init__(self, exprs, stencil): + def __init__(self, exprs, stencil, skewed_loops={}): """ Initialize a PartialCluster. @@ -29,6 +29,7 @@ def __init__(self, exprs, stencil): """ self._exprs = list(exprs) self._stencil = stencil + self.skewed_loops = skewed_loops @property def exprs(self): @@ -70,9 +71,10 @@ class Cluster(PartialCluster): """A Cluster is an immutable PartialCluster.""" - def __init__(self, exprs, stencil): + def __init__(self, exprs, stencil, skewed_loops={}): self._exprs = as_tuple(exprs) self._stencil = stencil.frozen + self.skewed_loops = skewed_loops @cached_property def trace(self): @@ -90,7 +92,7 @@ def rebuild(self, exprs): """ Build a new cluster with expressions ``exprs`` having same stencil as ``self``. """ - return Cluster(exprs, self.stencil) + return Cluster(exprs, self.stencil, self.skewed_loops) @PartialCluster.exprs.setter def exprs(self, val): @@ -120,7 +122,7 @@ def unfreeze(self): Return a new ClusterGroup in which all of ``self``'s Clusters have been promoted to PartialClusters. The ``atomics`` information is lost. """ - return ClusterGroup([PartialCluster(i.exprs, i.stencil) + return ClusterGroup([PartialCluster(i.exprs, i.stencil, i.skewed_loops) if isinstance(i, Cluster) else i for i in self]) def freeze(self): @@ -131,7 +133,7 @@ def freeze(self): clusters = ClusterGroup() for i in self: if isinstance(i, PartialCluster): - cluster = Cluster(i.exprs, i.stencil) + cluster = Cluster(i.exprs, i.stencil, i.skewed_loops) clusters.append(cluster) clusters.atomics[cluster] = self.atomics[i] else: diff --git a/devito/operator.py b/devito/operator.py index f30a05ec72..2e1022b1ae 100644 --- a/devito/operator.py +++ b/devito/operator.py @@ -15,6 +15,7 @@ from devito.dse import rewrite from devito.exceptions import InvalidArgument, InvalidOperator from devito.function import Forward, Backward, CompositeFunction +from devito.ir import Cluster from devito.logger import bar, error, info from devito.ir.clusters import clusterize from devito.ir.iet import (Element, Expression, Callable, Iteration, List, @@ -300,8 +301,8 @@ def _autotune(self, arguments): best block sizes when loop blocking is in use.""" return arguments - def _schedule_expressions(self, clusters): - """Create an Iteartion/Expression tree given an iterable of + def _schedule_expressions(self, clusters: Cluster): + """Create an Iteration/Expression tree given an iterable of :class:`Cluster` objects.""" # Build the Iteration/Expression tree @@ -326,8 +327,12 @@ def _schedule_expressions(self, clusters): needed = entries[index:] # Build and insert the required Iterations - iters = [Iteration([], j.dim, j.dim.limits, offsets=j.ofs) for j in - needed] + iters = [] + for j in needed: + limits = j.dim.limits + if j.dim in i.skewed_loops: + limits = [l - i.skewed_loops[j.dim] for l in limits] + iters.append(Iteration([], j.dim, limits, offsets=j.ofs)) body, tree = compose_nodes(iters + [expressions], retrieve=True) scheduling = OrderedDict(zip(needed, tree)) if root is None: From 12579d6248d078a98af4bb9cedfa59ce2acb620c Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 19 Jan 2018 19:10:40 +0000 Subject: [PATCH 03/25] operator: skewing: bugfix on loop bounds --- devito/operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devito/operator.py b/devito/operator.py index 2e1022b1ae..bdd56f814d 100644 --- a/devito/operator.py +++ b/devito/operator.py @@ -331,7 +331,7 @@ def _schedule_expressions(self, clusters: Cluster): for j in needed: limits = j.dim.limits if j.dim in i.skewed_loops: - limits = [l - i.skewed_loops[j.dim] for l in limits] + limits = (limits[0] - i.skewed_loops[j.dim], limits[1] - i.skewed_loops[j.dim], limits[2]) iters.append(Iteration([], j.dim, limits, offsets=j.ofs)) body, tree = compose_nodes(iters + [expressions], retrieve=True) scheduling = OrderedDict(zip(needed, tree)) From 5c082a6810c496261631687b9597bd830d49b0a1 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 19 Jan 2018 19:11:13 +0000 Subject: [PATCH 04/25] test_dse: skewing: derive test (tti) for skewing --- tests/test_dse.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_dse.py b/tests/test_dse.py index 6e172bc842..fbab35844c 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -62,7 +62,7 @@ def test_acoustic_rewrite_basic(): # TTI -def tti_operator(dse=False, space_order=4): +def tti_operator(dse=False, space_order=4, dle='advanced'): nrec = 101 t0 = 0.0 tn = 250. @@ -91,7 +91,8 @@ def tti_operator(dse=False, space_order=4): rec.coordinates.data[:, 1:] = src.coordinates.data[0, 1:] return AnisotropicWaveSolver(model, source=src, receiver=rec, - time_order=2, space_order=space_order, dse=dse) + time_order=2, space_order=space_order, dse=dse, + dle=dle) @pytest.fixture(scope="session") @@ -134,6 +135,15 @@ def test_tti_rewrite_basic(tti_nodse): assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) +@skipif_yask +def test_tti_rewrite_skewing(tti_nodse): + operator = tti_operator(dse='skewing', dle='noop') + rec, u, v, _ = operator.forward() + + assert np.allclose(tti_nodse[0].data, v.data, atol=10e-3) + assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) + + @skipif_yask def test_tti_rewrite_advanced(tti_nodse): operator = tti_operator(dse='advanced') From 885f746ae54d97b1ee74daf6484476424466d0ae Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Tue, 23 Jan 2018 19:43:31 +0000 Subject: [PATCH 05/25] dle: _loop_blocking: change remainder loops to min --- devito/dle/backends/advanced.py | 38 ++++++--------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index 28c7319c74..a9e7b26847 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -8,6 +8,7 @@ import cgen import numpy as np import psutil +from sympy import Min from devito.cgen_utils import ccode from devito.dimension import Dimension @@ -152,59 +153,32 @@ def _loop_blocking(self, nodes, state): # subsequently be composed to implement loop blocking. inter_blocks = [] intra_blocks = [] - remainders = [] for i in iterations: name = "%s%d_block" % (i.dim.name, len(mapper)) # Build Iteration over blocks dim = blocked.setdefault(i, Dimension(name)) block_size = dim.symbolic_size - iter_size = i.dim.symbolic_extent - start = i.limits[0] - i.offsets[0] - finish = i.dim.symbolic_end - i.offsets[1] - innersize = iter_size - (-i.offsets[0] + i.offsets[1]) - finish = finish - (innersize % block_size) + start = i.limits[0] - i.offsets[0] # FIXME: "widen" + finish = i.dim.symbolic_end - i.offsets[1] # FIXME + inter_block = Iteration([], dim, [start, finish, block_size], properties=PARALLEL) inter_blocks.append(inter_block) # Build Iteration within a block start = inter_block.dim - finish = start + block_size + finish = Min(start + block_size, finish) # FIXME: "widen", FIXME: + eps? intra_block = i._rebuild([], limits=[start, finish, 1], offsets=None, properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) - # Build unitary-increment Iteration over the 'leftover' region. - # This will be used for remainder loops, executed when any - # dimension size is not a multiple of the block size. - start = inter_block.limits[1] - finish = i.dim.symbolic_end - i.offsets[1] - remainder = i._rebuild([], limits=[start, finish, 1], offsets=None) - remainders.append(remainder) - # Build blocked Iteration nest blocked_tree = compose_nodes(inter_blocks + intra_blocks + [iterations[-1].nodes]) - # Build remainder Iterations - remainder_trees = [] - for n in range(len(iterations)): - for c in combinations([i.dim for i in iterations], n + 1): - # First all inter-block Interations - nodes = [b._rebuild(properties=b.properties + (REMAINDER,)) - for b, r in zip(inter_blocks, remainders) - if r.dim not in c] - # Then intra-block or remainder, for each dim (in order) - properties = (REMAINDER, TAG, ELEMENTAL) - for b, r in zip(intra_blocks, remainders): - handle = r if b.dim in c else b - nodes.append(handle._rebuild(properties=properties)) - nodes.extend([iterations[-1].nodes]) - remainder_trees.append(compose_nodes(nodes)) - # Will replace with blocked loop tree - mapper[root] = List(body=[blocked_tree] + remainder_trees) + mapper[root] = List(body=[blocked_tree]) rebuilt = Transformer(mapper).visit(fold) From 68520faea738faeda2e8a3d9b939544f93452d9a Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Tue, 23 Jan 2018 20:36:37 +0000 Subject: [PATCH 06/25] dle: _loop_blocking: use iteration limit rather than symbolic end This fixes a propagation issue when blocking: if the end has been changed in Iteration.limits[1], dim.symbolic_end would have superseded it. --- devito/dle/backends/advanced.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index a9e7b26847..e4d628aace 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -160,7 +160,7 @@ def _loop_blocking(self, nodes, state): dim = blocked.setdefault(i, Dimension(name)) block_size = dim.symbolic_size start = i.limits[0] - i.offsets[0] # FIXME: "widen" - finish = i.dim.symbolic_end - i.offsets[1] # FIXME + finish = i.limits[1] - i.offsets[1] # FIXME inter_block = Iteration([], dim, [start, finish, block_size], properties=PARALLEL) From 277ce9c8d176a3938a6fb6fb8332bb0622f366fc Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 13:31:38 +0000 Subject: [PATCH 07/25] Iteration: add skew param --- devito/dse/backends/advanced.py | 2 +- devito/ir/iet/nodes.py | 4 +++- devito/operator.py | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index be765ce6a9..d7e354a845 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -175,7 +175,7 @@ def _pipeline(self, state): @dse_pass def _loop_skew(self, cluster, template, **kwargs): - skew_factor = -2 # FIXME: read parameter + skew_factor = -2 # FIXME: read parameter t, mapper = None, {} # FIXME: this is probably the wrong way to find the time dimension diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index 7d4f722371..daaa56fdea 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -256,7 +256,7 @@ class Iteration(Node): _traversable = ['nodes'] def __init__(self, nodes, dimension, limits, index=None, offsets=None, - properties=None, pragmas=None, uindices=None): + properties=None, pragmas=None, uindices=None, skew=None): # Ensure we deal with a list of Expression objects internally nodes = as_tuple(nodes) self.nodes = as_tuple([n if isinstance(n, Node) else Expression(n) @@ -290,6 +290,8 @@ def __init__(self, nodes, dimension, limits, index=None, offsets=None, self.uindices = as_tuple(uindices) assert all(isinstance(i, UnboundedIndex) for i in self.uindices) + self.skew = skew if skew else 0 + def __repr__(self): properties = "" if self.properties: diff --git a/devito/operator.py b/devito/operator.py index bdd56f814d..0213564b10 100644 --- a/devito/operator.py +++ b/devito/operator.py @@ -332,7 +332,10 @@ def _schedule_expressions(self, clusters: Cluster): limits = j.dim.limits if j.dim in i.skewed_loops: limits = (limits[0] - i.skewed_loops[j.dim], limits[1] - i.skewed_loops[j.dim], limits[2]) - iters.append(Iteration([], j.dim, limits, offsets=j.ofs)) + skew = i.skewed_loops[j.dim] + else: + skew = 0 + iters.append(Iteration([], j.dim, limits, offsets=j.ofs, skew=skew)) body, tree = compose_nodes(iters + [expressions], retrieve=True) scheduling = OrderedDict(zip(needed, tree)) if root is None: From 26b156ee9c76f999a9f75260700f7cfe9ca0755e Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 13:46:14 +0000 Subject: [PATCH 08/25] dse: skew: skewed_loops: break into factor, dim --- devito/dse/backends/advanced.py | 4 +++- devito/ir/iet/nodes.py | 3 ++- devito/operator.py | 9 +++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index d7e354a845..f0a2fed3d1 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -177,17 +177,19 @@ def _pipeline(self, state): def _loop_skew(self, cluster, template, **kwargs): skew_factor = -2 # FIXME: read parameter t, mapper = None, {} + skews = {} # FIXME: this is probably the wrong way to find the time dimension for dim in cluster.stencil.dimensions: if t is not None: mapper[dim] = dim + skew_factor * t + skews[dim] = (skew_factor, t) elif dim.is_Time: t = dim.parent if t is None: return cluster - cluster.skewed_loops = {dim: skew - dim for dim, skew in mapper.items()} + cluster.skewed_loops = skews processed = xreplace_indices(cluster.exprs, mapper) return cluster.rebuild(processed) diff --git a/devito/ir/iet/nodes.py b/devito/ir/iet/nodes.py index daaa56fdea..c90f874fb4 100644 --- a/devito/ir/iet/nodes.py +++ b/devito/ir/iet/nodes.py @@ -290,7 +290,8 @@ def __init__(self, nodes, dimension, limits, index=None, offsets=None, self.uindices = as_tuple(uindices) assert all(isinstance(i, UnboundedIndex) for i in self.uindices) - self.skew = skew if skew else 0 + # If there is no skewing factor, want to insert a dummy dimension + self.skew = skew if skew else (0, self.dim) def __repr__(self): properties = "" diff --git a/devito/operator.py b/devito/operator.py index 0213564b10..d3118ea142 100644 --- a/devito/operator.py +++ b/devito/operator.py @@ -331,11 +331,12 @@ def _schedule_expressions(self, clusters: Cluster): for j in needed: limits = j.dim.limits if j.dim in i.skewed_loops: - limits = (limits[0] - i.skewed_loops[j.dim], limits[1] - i.skewed_loops[j.dim], limits[2]) - skew = i.skewed_loops[j.dim] + skew_tuple = i.skewed_loops[j.dim] + skew = skew_tuple[0] * skew_tuple[1] + limits = (limits[0] - skew, limits[1] - skew, limits[2]) else: - skew = 0 - iters.append(Iteration([], j.dim, limits, offsets=j.ofs, skew=skew)) + skew_tuple = 0 + iters.append(Iteration([], j.dim, limits, offsets=j.ofs, skew=skew_tuple)) body, tree = compose_nodes(iters + [expressions], retrieve=True) scheduling = OrderedDict(zip(needed, tree)) if root is None: From ef9b72306128325c282358bacff32f03c22fe827 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 14:34:11 +0000 Subject: [PATCH 09/25] dle: blocking: fix bounds on intra loop --- devito/dle/backends/advanced.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index e4d628aace..710170ac44 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -8,7 +8,7 @@ import cgen import numpy as np import psutil -from sympy import Min +from sympy import Min, Max from devito.cgen_utils import ccode from devito.dimension import Dimension @@ -164,11 +164,11 @@ def _loop_blocking(self, nodes, state): inter_block = Iteration([], dim, [start, finish, block_size], properties=PARALLEL) - inter_blocks.append(inter_block) + inter_blocks.append(inter_block) # the area being blocked # Build Iteration within a block - start = inter_block.dim - finish = Min(start + block_size, finish) # FIXME: "widen", FIXME: + eps? + start = Max(inter_block.dim, start) + finish = Min(inter_block.dim + block_size, finish) intra_block = i._rebuild([], limits=[start, finish, 1], offsets=None, properties=i.properties + (TAG, ELEMENTAL)) intra_blocks.append(intra_block) From 4d6047b1762d1ffc3e64580d0b602fbe392bb7e5 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 21:55:05 +0000 Subject: [PATCH 10/25] dle: blocking: widen bounds on outer blocks This is to accomodate the possibility that skew_factor * time_bs is not a multiple of lower dimensional block sizes. --- devito/dle/backends/advanced.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index 710170ac44..4af44d9b5c 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -158,11 +158,16 @@ def _loop_blocking(self, nodes, state): # Build Iteration over blocks dim = blocked.setdefault(i, Dimension(name)) - block_size = dim.symbolic_size - start = i.limits[0] - i.offsets[0] # FIXME: "widen" - finish = i.limits[1] - i.offsets[1] # FIXME - - inter_block = Iteration([], dim, [start, finish, block_size], + block_size = dim.symbolic_size # The variable which will contain the block size + # FIXME: what if the time dimension doesn't start at 0? + # We subtract the skew here to straighten out the blocks + start = i.limits[0] - i.offsets[0] + finish = i.limits[1] - i.offsets[1] + + # FIXME: these bounds might be a little fishy + outer_start = start + i.skew[0] * i.skew[1] + outer_finish = finish + i.skew[0] * i.skew[1] - i.skew[0] * i.skew[1].symbolic_end + inter_block = Iteration([], dim, [outer_start, outer_finish, block_size], properties=PARALLEL) inter_blocks.append(inter_block) # the area being blocked From 76f50376daefa62aca8ea6cbeba9193b2a467a7e Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 21:57:28 +0000 Subject: [PATCH 11/25] dle: IterationFold: add skew parameter --- devito/dle/blocking_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/devito/dle/blocking_utils.py b/devito/dle/blocking_utils.py index b0524a982c..7e3051b309 100644 --- a/devito/dle/blocking_utils.py +++ b/devito/dle/blocking_utils.py @@ -202,9 +202,10 @@ class IterationFold(Iteration): is_IterationFold = True def __init__(self, nodes, dimension, limits, index=None, offsets=None, - properties=None, pragmas=None, uindices=None, folds=None): + properties=None, pragmas=None, uindices=None, folds=None, + skew=None): super(IterationFold, self).__init__(nodes, dimension, limits, index, offsets, - properties, uindices, pragmas) + properties, uindices, pragmas, skew=skew) self.folds = folds def __repr__(self): From 0dfe57b129fe1146112ff23024e06aaebf8fe508 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 12 Feb 2018 21:58:11 +0000 Subject: [PATCH 12/25] test_dse: skewed tiling test case --- tests/test_dse.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_dse.py b/tests/test_dse.py index fbab35844c..4a8b8cfb8e 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -135,6 +135,15 @@ def test_tti_rewrite_basic(tti_nodse): assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) +@skipif_yask +def test_dle_tiling(tti_nodse): + operator = tti_operator(dse='skewing', dle='advanced') + rec, u, v, _ = operator.forward() + + assert np.allclose(tti_nodse[0].data, v.data, atol=10e-3) + assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) + + @skipif_yask def test_tti_rewrite_skewing(tti_nodse): operator = tti_operator(dse='skewing', dle='noop') From 54704bc9fc74edc8c0790aa77c852412d1928e88 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Thu, 15 Feb 2018 16:25:51 +0000 Subject: [PATCH 13/25] dse: skew: skew before factorising --- devito/dse/backends/advanced.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index f0a2fed3d1..086f65240b 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -167,15 +167,15 @@ def _eliminate_inter_stencil_redundancies(self, cluster, template, **kwargs): class SkewingRewriter(AdvancedRewriter): def _pipeline(self, state): + self._loop_skew(state) self._extract_time_invariants(state) self._eliminate_inter_stencil_redundancies(state) self._eliminate_intra_stencil_redundancies(state) self._factorize(state) - self._loop_skew(state) @dse_pass def _loop_skew(self, cluster, template, **kwargs): - skew_factor = -2 # FIXME: read parameter + skew_factor = -1 # FIXME: read parameter t, mapper = None, {} skews = {} From 3c9b6085ccf87a1147378c5a996b3bd78444af78 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Thu, 15 Feb 2018 16:26:24 +0000 Subject: [PATCH 14/25] test_dle: time-tiling example --- tests/test_dle.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_dle.py b/tests/test_dle.py index b256900d29..0eea37545f 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -141,7 +141,7 @@ def _new_operator3(shape, time_order, **kwargs): # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults - u = TimeFunction(name='u', grid=grid, time_order=1, space_order=2) + u = TimeFunction(name='u', grid=grid, time_order=time_order, space_order=2) u.data[0, :] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape) # Derive the stencil according to devito conventions @@ -377,6 +377,30 @@ def test_cache_blocking_edge_cases_highorder(shape, blockshape): assert np.equal(wo_blocking.data, w_blocking.data).all() +@skipif_yask +@pytest.mark.parametrize("shape,blockshape", [ + ((3, 3), (3, 4)), + ((4, 4), (3, 4)), + ((5, 5), (3, 4)), + ((6, 6), (3, 4)), + ((7, 7), (3, 4)), + ((8, 8), (3, 4)), + ((9, 9), (3, 4)), + ((10, 10), (3, 4)), + ((11, 11), (3, 4)), + ((12, 12), (3, 4)), + ((13, 13), (3, 4)), + ((14, 14), (3, 4)), + ((15, 15), (3, 4)) +]) +def test_time_blocking(shape, blockshape): + wo_blocking, _ = _new_operator3(shape, time_order=2, dle='noop') + w_blocking, _ = _new_operator3(shape, time_order=2, dse='skewing', + dle=('blocking', {'blockshape': blockshape, + 'blockinner': True})) + assert np.equal(wo_blocking.data, w_blocking.data).all() + + @skipif_yask @pytest.mark.parametrize('exprs,expected', [ # trivial 1D From ccea7a1866285d64172cb7c9b4a208d8e1984ca7 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 16 Feb 2018 14:09:24 +0000 Subject: [PATCH 15/25] dle: blocking: hoist upper bound from loop to support openmp This didn't work: for ( ; x < fmin(...); ) gives 'error: invalid controlling predicate' --- devito/dle/backends/advanced.py | 22 +++++++++++++++------- devito/ir/iet/utils.py | 10 +++++++++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index 4af44d9b5c..62dee02224 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -8,7 +8,7 @@ import cgen import numpy as np import psutil -from sympy import Min, Max +from sympy import Eq, Min, Max from devito.cgen_utils import ccode from devito.dimension import Dimension @@ -21,10 +21,11 @@ PARALLEL, ELEMENTAL, REMAINDER, tagger, FindNodes, FindSymbols, IsPerfectIteration, SubstituteExpression, Transformer, compose_nodes, - retrieve_iteration_tree, filter_iterations, copy_arrays) + retrieve_iteration_tree, filter_iterations, + copy_arrays, SEQUENTIAL) from devito.logger import dle_warning from devito.tools import as_tuple, grouper, roundm -from devito.types import Array +from devito.types import Array, Scalar class DevitoRewriter(BasicRewriter): @@ -36,7 +37,7 @@ def _pipeline(self, state): self._simdize(state) if self.params['openmp'] is True: self._ompize(state) - self._create_elemental_functions(state) + # self._create_elemental_functions(state) self._minimize_remainders(state) @dle_pass @@ -173,9 +174,16 @@ def _loop_blocking(self, nodes, state): # Build Iteration within a block start = Max(inter_block.dim, start) - finish = Min(inter_block.dim + block_size, finish) - intra_block = i._rebuild([], limits=[start, finish, 1], offsets=None, - properties=i.properties + (TAG, ELEMENTAL)) + ub = Min(inter_block.dim + block_size, finish) + if i.is_Parallel: + q = Scalar(name='%s_ub' % i.dim.name) + intra_blocks.append(Expression(Eq(q, ub), np.dtype(np.int32))) + properties = [p for p in i.properties if p != SEQUENTIAL] + [PARALLEL, TAG] + else: + q = ub + properties = i.properties + (TAG, ELEMENTAL) + intra_block = i._rebuild([], limits=[start, q, 1], offsets=None, + properties=properties) intra_blocks.append(intra_block) # Build blocked Iteration nest diff --git a/devito/ir/iet/utils.py b/devito/ir/iet/utils.py index 5b8d7d47f8..3ac5a96906 100644 --- a/devito/ir/iet/utils.py +++ b/devito/ir/iet/utils.py @@ -112,7 +112,15 @@ def compose_nodes(nodes, retrieve=False): body = l.pop(-1) while l: handle = l.pop(-1) - body = handle._rebuild(body, **handle.args_frozen) + # The original code assumed nested loops only, and we wanted + # to handle [Iteration, Expression, Iteration, ...] + # See DevitoRewriter._loop_blocking() + # Easy to abuse (think of perfect loop nests) + # FIXME: can only handle one expression before an iteration + if isinstance(handle, Expression) and isinstance(body, Iteration): + body = (handle, body) + else: + body = handle._rebuild(body, **handle.args_frozen) tree.append(body) if retrieve is True: From 5f9b6eb952848980afb6c9f13add6ba1255597b8 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 16 Feb 2018 14:22:23 +0000 Subject: [PATCH 16/25] dse: skewing: read param skew_factor --- devito/dse/backends/advanced.py | 3 ++- devito/dse/transformer.py | 4 ++++ devito/parameters.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index 086f65240b..d060461edb 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -5,6 +5,7 @@ from devito.ir import clusterize from devito.dse.aliases import collect from devito.dse.backends import BasicRewriter, dse_pass +from devito.parameters import configuration from devito.symbolics import Eq, estimate_cost, xreplace_constrained, iq_timeinvariant, xreplace_indices from devito.dse.manipulation import (common_subexprs_elimination, collect_nested, compact_temporaries) @@ -175,7 +176,7 @@ def _pipeline(self, state): @dse_pass def _loop_skew(self, cluster, template, **kwargs): - skew_factor = -1 # FIXME: read parameter + skew_factor = -configuration['skew_factor'] t, mapper = None, {} skews = {} diff --git a/devito/dse/transformer.py b/devito/dse/transformer.py index 3b6789f041..633c832c7a 100644 --- a/devito/dse/transformer.py +++ b/devito/dse/transformer.py @@ -19,7 +19,11 @@ } """The DSE transformation modes.""" +# FIXME: unsure what this should be +MAX_SKEW_FACTOR = 8 + configuration.add('dse', 'advanced', list(modes)) +configuration.add('skew_factor', 0, range(MAX_SKEW_FACTOR)) def rewrite(clusters, mode='advanced'): diff --git a/devito/parameters.py b/devito/parameters.py index 180d7e5022..49a646e1d9 100644 --- a/devito/parameters.py +++ b/devito/parameters.py @@ -92,6 +92,7 @@ def name(self): 'DEVITO_LOGGING': 'log_level', 'DEVITO_FIRST_TOUCH': 'first_touch', 'DEVITO_DEBUG_COMPILER': 'debug_compiler', + 'DEVITO_SKEW_FACTOR': 'skew_factor', } configuration = Parameters("Devito-Configuration") From 93b7955b900464f60f7bf181438764eb598e2101 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Fri, 16 Feb 2018 14:46:39 +0000 Subject: [PATCH 17/25] dle: blocking: detect skewing to enable time-tiling --- devito/dle/backends/advanced.py | 11 ++++++++--- tests/test_dle.py | 9 +++++++-- tests/test_dse.py | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index 62dee02224..b7c902faea 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -24,6 +24,7 @@ retrieve_iteration_tree, filter_iterations, copy_arrays, SEQUENTIAL) from devito.logger import dle_warning +from devito.parameters import configuration from devito.tools import as_tuple, grouper, roundm from devito.types import Array, Scalar @@ -131,7 +132,12 @@ def _loop_blocking(self, nodes, state): blocked = OrderedDict() for tree in retrieve_iteration_tree(fold): # Is the Iteration tree blockable ? - iterations = [i for i in tree if i.is_Parallel] + # FIXME: change mark_parallel ensure skewed loops are is_Parallel + if configuration['skew_factor']: + iterations = tree + else: + iterations = [i for i in tree if i.is_Parallel] + if exclude_innermost: iterations = [i for i in iterations if not i.is_Vectorizable] if len(iterations) <= 1: @@ -168,8 +174,7 @@ def _loop_blocking(self, nodes, state): # FIXME: these bounds might be a little fishy outer_start = start + i.skew[0] * i.skew[1] outer_finish = finish + i.skew[0] * i.skew[1] - i.skew[0] * i.skew[1].symbolic_end - inter_block = Iteration([], dim, [outer_start, outer_finish, block_size], - properties=PARALLEL) + inter_block = Iteration([], dim, [outer_start, outer_finish, block_size]) inter_blocks.append(inter_block) # the area being blocked # Build Iteration within a block diff --git a/tests/test_dle.py b/tests/test_dle.py index 0eea37545f..210d21c858 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -9,6 +9,7 @@ from conftest import EVAL +from devito import configuration from devito.dle import transform from devito.dle.backends import DevitoRewriter as Rewriter from devito import Grid, Function, TimeFunction, Eq, Operator @@ -394,11 +395,15 @@ def test_cache_blocking_edge_cases_highorder(shape, blockshape): ((15, 15), (3, 4)) ]) def test_time_blocking(shape, blockshape): + prev = configuration['skew_factor'] if 'skew_factor' in configuration else 0 + configuration['skew_factor'] = 2 wo_blocking, _ = _new_operator3(shape, time_order=2, dle='noop') w_blocking, _ = _new_operator3(shape, time_order=2, dse='skewing', - dle=('blocking', {'blockshape': blockshape, - 'blockinner': True})) + dle=('blocking,openmp', + {'blockshape': blockshape, + 'blockinner': True})) assert np.equal(wo_blocking.data, w_blocking.data).all() + configuration['skew_factor'] = prev @skipif_yask diff --git a/tests/test_dse.py b/tests/test_dse.py index 4a8b8cfb8e..2f43e900fd 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -146,7 +146,7 @@ def test_dle_tiling(tti_nodse): @skipif_yask def test_tti_rewrite_skewing(tti_nodse): - operator = tti_operator(dse='skewing', dle='noop') + operator = tti_operator(dse='skewing') rec, u, v, _ = operator.forward() assert np.allclose(tti_nodse[0].data, v.data, atol=10e-3) From 38abdf90592706fd96045e03433e6ec5b1c92fff Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 19 Feb 2018 20:05:05 +0000 Subject: [PATCH 18/25] dle: blocking: move to Visitor Removes problem with compose_nodes --- devito/dle/backends/advanced.py | 58 +++++--------------------- devito/ir/iet/visitors.py | 74 +++++++++++++++++++++++++++++++-- 2 files changed, 81 insertions(+), 51 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index b7c902faea..0d6941862e 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -3,30 +3,26 @@ from __future__ import absolute_import from collections import OrderedDict -from itertools import combinations import cgen import numpy as np import psutil -from sympy import Eq, Min, Max from devito.cgen_utils import ccode -from devito.dimension import Dimension from devito.dle import fold_blockable_tree, unfold_blocked_tree from devito.dle.backends import (BasicRewriter, BlockingArg, dle_pass, omplang, simdinfo, get_simd_flag, get_simd_items) from devito.dse import promote_scalar_expressions from devito.exceptions import DLEException -from devito.ir.iet import (Block, Expression, Iteration, List, - PARALLEL, ELEMENTAL, REMAINDER, tagger, +from devito.ir.iet import (Block, Expression, Iteration, List, ELEMENTAL, FindNodes, FindSymbols, IsPerfectIteration, SubstituteExpression, Transformer, compose_nodes, retrieve_iteration_tree, filter_iterations, - copy_arrays, SEQUENTIAL) + copy_arrays, BlockIterations) from devito.logger import dle_warning from devito.parameters import configuration from devito.tools import as_tuple, grouper, roundm -from devito.types import Array, Scalar +from devito.types import Array class DevitoRewriter(BasicRewriter): @@ -153,47 +149,13 @@ def _loop_blocking(self, nodes, state): # sequential loop (e.g., a timestepping loop) continue - # Decorate intra-block iterations with an IterationProperty - TAG = tagger(len(mapper)) - - # Build all necessary Iteration objects, individually. These will - # subsequently be composed to implement loop blocking. - inter_blocks = [] - intra_blocks = [] - for i in iterations: - name = "%s%d_block" % (i.dim.name, len(mapper)) - - # Build Iteration over blocks - dim = blocked.setdefault(i, Dimension(name)) - block_size = dim.symbolic_size # The variable which will contain the block size - # FIXME: what if the time dimension doesn't start at 0? - # We subtract the skew here to straighten out the blocks - start = i.limits[0] - i.offsets[0] - finish = i.limits[1] - i.offsets[1] - - # FIXME: these bounds might be a little fishy - outer_start = start + i.skew[0] * i.skew[1] - outer_finish = finish + i.skew[0] * i.skew[1] - i.skew[0] * i.skew[1].symbolic_end - inter_block = Iteration([], dim, [outer_start, outer_finish, block_size]) - inter_blocks.append(inter_block) # the area being blocked - - # Build Iteration within a block - start = Max(inter_block.dim, start) - ub = Min(inter_block.dim + block_size, finish) - if i.is_Parallel: - q = Scalar(name='%s_ub' % i.dim.name) - intra_blocks.append(Expression(Eq(q, ub), np.dtype(np.int32))) - properties = [p for p in i.properties if p != SEQUENTIAL] + [PARALLEL, TAG] - else: - q = ub - properties = i.properties + (TAG, ELEMENTAL) - intra_block = i._rebuild([], limits=[start, q, 1], offsets=None, - properties=properties) - intra_blocks.append(intra_block) - - # Build blocked Iteration nest - blocked_tree = compose_nodes(inter_blocks + intra_blocks + - [iterations[-1].nodes]) + condition = lambda i: (i in iterations) + tag = len(mapper) + blocker = BlockIterations(tag, condition=condition) + intra_blocks = blocker.visit(root) + inter_blocks = blocker.inter_blocks + blocked = blocker.blocked + blocked_tree = compose_nodes(inter_blocks + [intra_blocks]) # Will replace with blocked loop tree mapper[root] = List(body=[blocked_tree]) diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 04c45ab382..3bb6a2ebe9 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -12,11 +12,14 @@ import cgen as c import numpy as np +from sympy import Max, Min, Eq from devito.cgen_utils import blankline, ccode -from devito.dimension import LoweredDimension +from devito.dimension import LoweredDimension, Dimension from devito.exceptions import VisitorException -from devito.ir.iet.nodes import Iteration, Node, UnboundedIndex +from devito.ir.iet import tagger, SEQUENTIAL, PARALLEL, ELEMENTAL +from devito.ir.iet.nodes import Iteration, Node, UnboundedIndex, Expression, \ + List from devito.types import Scalar from devito.tools import as_tuple, filter_ordered, filter_sorted, flatten, ctypes_to_C @@ -24,7 +27,8 @@ __all__ = ['FindNodes', 'FindSections', 'FindSymbols', 'MapExpressions', 'IsPerfectIteration', 'SubstituteExpression', 'printAST', 'CGen', 'ResolveTimeStepping', 'Transformer', 'NestedTransformer', - 'FindAdjacentIterations', 'MergeOuterIterations', 'MapIteration'] + 'FindAdjacentIterations', 'MergeOuterIterations', 'MapIteration', + 'BlockIterations'] class Visitor(object): @@ -806,5 +810,69 @@ def visit_list(self, o): visit_tuple = visit_list +class BlockIterations(Visitor): + """ + Tile an iteration tree, given a condition. + """ + + def __init__(self, tag, condition=lambda _: True): + super(BlockIterations, self).__init__() + self.TAG = tagger(tag) + self.tag = tag + self.condition = condition + self.inter_blocks = [] + self.blocked = {} + + def visit_Block(self, o): + #rebuilt = [self.visit(i) for i in o.children] + rebuilt = self.visit(o.children) + return o._rebuild(*rebuilt, **o.args_frozen) + + def visit_Iteration(self, o): + if not self.condition(o): + return o._rebuild(*self.visit(o.children), **o.args_frozen) + + # Do the actual blocking + name = "%s%d_block" % (o.dim.name, self.tag) + dim = self.blocked.setdefault(o, Dimension(name)) + block_size = dim.symbolic_size + + # FIXME: what if the time dimension doesn't start at 0? + # We subtract the skew here to straighten out the blocks + dim_start = o.limits[0] - o.offsets[0] + dim_finish = o.limits[1] - o.offsets[1] + + skew = o.skew[0] * o.skew[1] + skew_max = o.skew[0] * o.skew[1].symbolic_end + + outer_start = dim_start + skew + outer_finish = dim_finish + skew - skew_max + inter_block = Iteration([], dim, [outer_start, outer_finish, block_size]) + self.inter_blocks.append(inter_block) + + inner_start = Max(inter_block.dim, dim_start) + upper_bound = Min(inter_block.dim + block_size, dim_finish) + inner_finish = Scalar(name="%s_ub" % o.dim.name) + ub_expr = Expression(Eq(inner_finish, upper_bound), np.dtype(np.int32)) + if o.is_Parallel: + properties = [p for p in o.properties if p != SEQUENTIAL] + [PARALLEL, self.TAG] + else: + properties = o.properties + (self.TAG, ELEMENTAL) + + rebuilt = self.visit(o.children) + i = o._rebuild(*rebuilt, limits=[inner_start, inner_finish, 1], + offsets=None, properties=properties) + return List(body=(ub_expr, i)) + + def visit_Expression(self, o): + return o + + def visit_list(self, o): + rebuilt = [self.visit(i) for i in o] + return rebuilt + + visit_tuple = visit_list + + def printAST(node, verbose=True): return PrintAST(verbose=verbose).visit(node) From 246b8e10113b8cb8fefe8b6a0952a867bc16a9bd Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 5 Mar 2018 11:06:48 +0000 Subject: [PATCH 19/25] dse: skewing: fix time dimension detection bug --- devito/dse/backends/advanced.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/devito/dse/backends/advanced.py b/devito/dse/backends/advanced.py index d060461edb..f03e70bd65 100644 --- a/devito/dse/backends/advanced.py +++ b/devito/dse/backends/advanced.py @@ -2,6 +2,7 @@ from collections import OrderedDict +from devito.dimension import TimeDimension from devito.ir import clusterize from devito.dse.aliases import collect from devito.dse.backends import BasicRewriter, dse_pass @@ -186,7 +187,10 @@ def _loop_skew(self, cluster, template, **kwargs): mapper[dim] = dim + skew_factor * t skews[dim] = (skew_factor, t) elif dim.is_Time: - t = dim.parent + if isinstance(dim, TimeDimension): + t = dim + elif isinstance(dim.parent, TimeDimension): + t = dim.parent if t is None: return cluster From 50a40bb3efca91e9131310111bb10a79b89a3671 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 9 Apr 2018 19:50:30 +0100 Subject: [PATCH 20/25] dle: blocking: visitor: hoist lower bound to avoid fmax problems --- devito/ir/iet/visitors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 3bb6a2ebe9..0bb8c4450d 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -850,10 +850,14 @@ def visit_Iteration(self, o): inter_block = Iteration([], dim, [outer_start, outer_finish, block_size]) self.inter_blocks.append(inter_block) - inner_start = Max(inter_block.dim, dim_start) + lower_bound = Max(inter_block.dim, dim_start) + inner_start = Scalar(name="%s_lb" % o.dim.name) + lb_expr = Expression(Eq(inner_start, lower_bound), np.dtype(np.int32)) + upper_bound = Min(inter_block.dim + block_size, dim_finish) inner_finish = Scalar(name="%s_ub" % o.dim.name) ub_expr = Expression(Eq(inner_finish, upper_bound), np.dtype(np.int32)) + if o.is_Parallel: properties = [p for p in o.properties if p != SEQUENTIAL] + [PARALLEL, self.TAG] else: @@ -862,7 +866,7 @@ def visit_Iteration(self, o): rebuilt = self.visit(o.children) i = o._rebuild(*rebuilt, limits=[inner_start, inner_finish, 1], offsets=None, properties=properties) - return List(body=(ub_expr, i)) + return List(body=(lb_expr, ub_expr, i)) def visit_Expression(self, o): return o From f5683864d523428c34eb9ee9efbe37fa70ea9277 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Wed, 30 May 2018 20:47:12 +0100 Subject: [PATCH 21/25] operator: always pop autotune arg --- devito/operator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/devito/operator.py b/devito/operator.py index d3118ea142..b842592b89 100644 --- a/devito/operator.py +++ b/devito/operator.py @@ -193,6 +193,7 @@ def arguments(self, **kwargs): dim_sizes.update(dle_arguments) autotune = autotune and kwargs.pop('autotune', False) + kwargs.pop('autotune', False) # Make sure we've used all arguments passed if len(kwargs) > 0: From 6d4e644c70525f3825b19fcc1f957e338effdbea Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Wed, 30 May 2018 15:40:56 +0100 Subject: [PATCH 22/25] autotuner: tweak for time-tiling --- devito/core/autotuning.py | 65 ++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/devito/core/autotuning.py b/devito/core/autotuning.py index ce90fbdf2e..c9fb51ea0e 100644 --- a/devito/core/autotuning.py +++ b/devito/core/autotuning.py @@ -53,17 +53,40 @@ def autotune(operator, arguments, tunable): # Attempted block sizes ... mapper = OrderedDict([(i.argument.symbolic_size.name, i) for i in tunable]) + time_dim = None + for i, d in mapper.items(): + if d.original_dim.is_Time: + time_dim = i + # ... Defaults (basic mode) - blocksizes = [OrderedDict([(i, v) for i in mapper]) for v in options['at_blocksize']] + blocksizes = [OrderedDict([(i, v) for i in mapper if not mapper[i].original_dim.is_Time]) for v in options['at_blocksize']] # cubes # ... Always try the entire iteration space (degenerate block) datashape = [at_arguments[mapper[i].original_dim.symbolic_end.name] - at_arguments[mapper[i].original_dim.symbolic_start.name] for i in mapper] blocksizes.append(OrderedDict([(i, mapper[i].iteration.extent(0, j)) - for i, j in zip(mapper, datashape)])) + for i, j in zip(mapper, datashape)])) # degenerate block # ... More attempts if auto-tuning in aggressive mode if configuration.core['autotuning'] == 'aggressive': + last_dim = None + innermost = iterations[-1].dim + for k, v in mapper.items(): + if v.original_dim == innermost: + last_dim = (k, blocksizes[-1][k]) + blocksizes = more_heuristic_attempts(blocksizes) + if last_dim: + info_at("Extending the innermost dimension, %s <%s>" % (last_dim[0], last_dim[1])) + intermediate_blocks = [OrderedDict([(i, v) for i in mapper if not (mapper[i].original_dim.is_Time or mapper[i].original_dim == innermost)]) + for v in options['at_blocksize']] + intermediate_blocks = more_heuristic_attempts(intermediate_blocks) + blocksizes += cross_time_tiles(intermediate_blocks, last_dim[0], [last_dim[1]]) + # TODO: don't extend this: run generator for 2 dims, then extend that + + if time_dim: + blocksizes = cross_time_tiles(blocksizes, time_dim, [1, 2, 4, 8, 16]) + + # How many temporaries are allocated on the stack? # Will drop block sizes that might lead to a stack overflow functions = FindSymbols('symbolics').visit(operator.body + @@ -74,7 +97,14 @@ def autotune(operator, arguments, tunable): # Note: there is only a single loop over 'blocksize' because only # square blocks are tested timings = OrderedDict() + fastest, timing = None, float("inf") + unique = [] + for bs in blocksizes: + if bs in unique: + continue + unique.append(bs) + illegal = False for k, v in at_arguments.items(): if k in bs: @@ -115,12 +145,16 @@ def autotune(operator, arguments, tunable): operator.cfunction(*list(at_arguments.values())) elapsed = sum(operator.profiler.timings.values()) timings[tuple(bs.items())] = elapsed + if elapsed < timing: + fastest = tuple(bs.items()) + timing = elapsed info_at("Block shape <%s> took %f (s) in %d time steps" % (','.join('%d' % i for i in bs.values()), elapsed, timesteps)) try: - best = dict(min(timings, key=timings.get)) - info("Auto-tuned block shape: %s" % best) + # best = dict(min(timings, key=timings.get)) + best = dict(fastest) + info("Auto-tuned block shape: %s; time: %f (s)" % (best, timing)) except ValueError: info("Auto-tuning request, but couldn't find legal block sizes") return arguments @@ -140,6 +174,7 @@ def autotune(operator, arguments, tunable): def more_heuristic_attempts(blocksizes): # Ramp up to higher block sizes handle = OrderedDict([(i, options['at_blocksize'][-1]) for i in blocksizes[0]]) + # insert more cubes for i in range(3): new_bs = OrderedDict([(k, v*2) for k, v in handle.items()]) blocksizes.insert(blocksizes.index(handle) + 1, new_bs) @@ -152,22 +187,30 @@ def more_heuristic_attempts(blocksizes): handle.append(OrderedDict(list(bs.items())[:-1] + [list(i.items())[-1]])) # Some more shuffling for all block sizes for bs in list(blocksizes): - ncombs = len(bs) + ncombs = len(bs) # dimensions to tile over for i in range(ncombs): for j in combinations(bs, i+1): item = [(k, bs[k]*2 if k in j else v) for k, v in bs.items()] handle.append(OrderedDict(item)) - unique = [] - for i in blocksizes + handle: - if i not in unique: - unique.append(i) + return blocksizes + handle + + +def extend_dimension(blocksizes, dim, size): + return blocksizes + [OrderedDict([(dim, size) if dim == d else (d, s) for d, s in bs.items()]) for bs in blocksizes] + + +def cross_time_tiles(blocksizes, dim, tiles): + extended = [] + for bs in blocksizes: + for tile in tiles: + extended.append(OrderedDict([(dim, tile)] + list(bs.items()))) - return unique + return extended options = { - 'at_squeezer': 5, + 'at_squeezer': 17, 'at_blocksize': sorted({8, 16, 24, 32, 40, 64, 128}), 'at_stack_limit': resource.getrlimit(resource.RLIMIT_STACK)[0] / 4 } From 26830067317a20440c924537bb19dbea07b8986b Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Wed, 30 May 2018 15:41:41 +0100 Subject: [PATCH 23/25] test_dse: force skewing factor for skewing test --- tests/test_dse.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_dse.py b/tests/test_dse.py index 2f43e900fd..303c21043a 100644 --- a/tests/test_dse.py +++ b/tests/test_dse.py @@ -5,7 +5,7 @@ import pytest from conftest import x, y, z, time, skipif_yask # noqa -from devito import Eq # noqa +from devito import configuration from devito.ir import Stencil, clusterize, TemporariesGraph from devito.dse import rewrite, common_subexprs_elimination, collect from devito.symbolics import (xreplace_constrained, iq_timeinvariant, iq_timevarying, @@ -135,22 +135,30 @@ def test_tti_rewrite_basic(tti_nodse): assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) +# Time-tiling vs dse=None,space-tiling @skipif_yask def test_dle_tiling(tti_nodse): + prev = configuration['skew_factor'] if 'skew_factor' in configuration else 0 + configuration['skew_factor'] = 2 operator = tti_operator(dse='skewing', dle='advanced') rec, u, v, _ = operator.forward() assert np.allclose(tti_nodse[0].data, v.data, atol=10e-3) assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) + configuration['skew_factor'] = prev +# Skewed only vs. dse=None,dle=advanced (i.e. space-tiling) @skipif_yask def test_tti_rewrite_skewing(tti_nodse): - operator = tti_operator(dse='skewing') + prev = configuration['skew_factor'] if 'skew_factor' in configuration else 0 + configuration['skew_factor'] = 2 + operator = tti_operator(dse='skewing', dle=None) rec, u, v, _ = operator.forward() - assert np.allclose(tti_nodse[0].data, v.data, atol=10e-3) - assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-3) + assert np.allclose(tti_nodse[0].data, v.data, atol=10e-1) + assert np.allclose(tti_nodse[1].data, rec.data, atol=10e-1) + configuration['skew_factor'] = prev @skipif_yask From 8524985d9b09e355a3c2b304bf8b7b30b1200e78 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 4 Jun 2018 18:46:43 +0100 Subject: [PATCH 24/25] visitor: BlockIterations: remember old loops Bugfix: would destroy information about previously-blocked loops, as `blocked` would get overwritten by an empty dict. --- devito/dle/backends/advanced.py | 2 +- devito/ir/iet/visitors.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/devito/dle/backends/advanced.py b/devito/dle/backends/advanced.py index 0d6941862e..567a02ba74 100644 --- a/devito/dle/backends/advanced.py +++ b/devito/dle/backends/advanced.py @@ -151,7 +151,7 @@ def _loop_blocking(self, nodes, state): condition = lambda i: (i in iterations) tag = len(mapper) - blocker = BlockIterations(tag, condition=condition) + blocker = BlockIterations(tag, blocked, condition=condition) intra_blocks = blocker.visit(root) inter_blocks = blocker.inter_blocks blocked = blocker.blocked diff --git a/devito/ir/iet/visitors.py b/devito/ir/iet/visitors.py index 0bb8c4450d..96a1021294 100644 --- a/devito/ir/iet/visitors.py +++ b/devito/ir/iet/visitors.py @@ -815,13 +815,13 @@ class BlockIterations(Visitor): Tile an iteration tree, given a condition. """ - def __init__(self, tag, condition=lambda _: True): + def __init__(self, tag, blocked, condition=lambda _: True): super(BlockIterations, self).__init__() self.TAG = tagger(tag) self.tag = tag self.condition = condition self.inter_blocks = [] - self.blocked = {} + self.blocked = blocked def visit_Block(self, o): #rebuilt = [self.visit(i) for i in o.children] From 63abe73a8719600439c02602e4cc53691c5c09c5 Mon Sep 17 00:00:00 2001 From: Nicholas Sim Date: Mon, 4 Jun 2018 19:37:39 +0100 Subject: [PATCH 25/25] test_dle: add test_time_blocking_edge_cases --- tests/test_dle.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_dle.py b/tests/test_dle.py index 210d21c858..0dcf05360e 100644 --- a/tests/test_dle.py +++ b/tests/test_dle.py @@ -378,6 +378,33 @@ def test_cache_blocking_edge_cases_highorder(shape, blockshape): assert np.equal(wo_blocking.data, w_blocking.data).all() +@skipif_yask +@pytest.mark.parametrize("shape,blockshape", [ + ((25, 25, 46), (None, None, None)), + ((25, 25, 46), (7, None, None)), + ((25, 25, 46), (None, None, 7)), + ((25, 25, 46), (None, 7, None)), + ((25, 25, 46), (5, None, 7)), + ((25, 25, 46), (10, 3, None)), + ((25, 25, 46), (None, 7, 11)), + ((25, 25, 46), (8, 2, 4)), + ((25, 25, 46), (2, 4, 8)), + ((25, 25, 46), (4, 8, 2)), + ((25, 46), (None, 7)), + ((25, 46), (7, None)) +]) +def test_time_blocking_edge_cases(shape, blockshape): + prev = configuration['skew_factor'] if 'skew_factor' in configuration else 0 + configuration['skew_factor'] = 2 + wo_blocking, _ = _new_operator2(shape, time_order=2, dle='noop') + w_blocking, _ = _new_operator2(shape, time_order=2, dse='skewing', + dle=('blocking,openmp', + {'blockshape': blockshape, + 'blockinner': True})) + assert np.equal(wo_blocking.data, w_blocking.data).all() + configuration['skew_factor'] = prev + + @skipif_yask @pytest.mark.parametrize("shape,blockshape", [ ((3, 3), (3, 4)),