Skip to content

Commit

Permalink
merge conflict in operator
Browse files Browse the repository at this point in the history
  • Loading branch information
ZoeLeibowitz committed Jul 9, 2024
2 parents c74a29e + 5cff56d commit df13bd6
Show file tree
Hide file tree
Showing 59 changed files with 1,246 additions and 754 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pytest-core-nompi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ jobs:
os: ubuntu-20.04
arch: "gcc-10"
language: "C"
sympy: "1.10"
sympy: "1.11"

- name: pytest-ubuntu-py312-gcc13-omp
python-version: '3.12'
os: ubuntu-24.04
arch: "gcc-13"
language: "openmp"
sympy: "1.11"
sympy: "1.13"

- name: pytest-ubuntu-py39-gcc9-omp
python-version: '3.9'
Expand Down
15 changes: 14 additions & 1 deletion devito/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,21 @@ def reinit_compiler(val):
# optimisations.
configuration.add('safe-math', 0, [0, 1], preprocessor=bool, callback=reinit_compiler)


# Enable/disable automatic padding for allocated data
configuration.add('autopadding', False, [False, True])
def _preprocess_autopadding(v):
return {
'0': False,
'1': np.float32,
True: np.float32,
'fp16': np.float16,
'fp32': np.float32,
'fp64': np.float64
}.get(v, v)

configuration.add('autopadding', False, # noqa: E305
[False, True, 0, 1, np.float16, np.float32, np.float64],
preprocessor=_preprocess_autopadding)

# Select target device
configuration.add('deviceid', -1, preprocessor=int, impacts_jit=False)
Expand Down
8 changes: 6 additions & 2 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
'get_cuda_path', 'get_hip_path', 'check_cuda_runtime', 'get_m1_llvm_path',
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice',
# Brand-agnostic
'ANYCPU', 'ANYGPU',
# Intel CPUs
'INTEL64', 'SNB', 'IVB', 'HSW', 'BDW', 'KNL', 'KNL7210',
'SKX', 'KLX', 'CLX', 'CLK', 'SPR',
Expand Down Expand Up @@ -592,7 +594,7 @@ def get_platform():
pass

# Unable to detect platform. Stick to default...
return CPU64
return ANYCPU


class Platform:
Expand Down Expand Up @@ -893,7 +895,7 @@ def march(cls):


# CPUs
CPU64 = Cpu64('cpu64')
ANYCPU = Cpu64('cpu64')
CPU64_DUMMY = Intel64('cpu64-dummy', cores_logical=2, cores_physical=1, isa='sse')

INTEL64 = Intel64('intel64')
Expand Down Expand Up @@ -921,6 +923,8 @@ def march(cls):
POWER9 = Power('power9')

# Devices
ANYGPU = Cpu64('gpu')

NVIDIAX = NvidiaDevice('nvidiaX')

AMDGPUX = AmdDevice('amdgpuX')
Expand Down
6 changes: 3 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ def _rcompile_wrapper(cls, **kwargs0):
options0 = kwargs0.pop('options')

def wrapper(expressions, mode='default', options=None, **kwargs1):
options = {**options0, **(options or {})}
kwargs = {**kwargs0, **kwargs1}

if mode == 'host':
par_disabled = options['par-disabled']
options = options or {}
target = {
'platform': 'cpu64',
'language': 'C' if par_disabled else 'openmp',
'language': 'C' if options0['par-disabled'] else 'openmp',
'compiler': 'custom'
}
else:
options = {**options0, **(options or {})}
target = None

return rcompile(expressions, kwargs, options, target=target)
Expand Down
18 changes: 13 additions & 5 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from collections.abc import Iterable
from functools import cached_property

from devito.core.autotuning import autotune
from devito.exceptions import InvalidArgument, InvalidOperator
from devito.ir import FindSymbols
from devito.logger import warning
from devito.mpi.routines import mpi_registry
from devito.parameters import configuration
from devito.operator import Operator
from devito.tools import (as_tuple, is_integer, timed_pass,
UnboundTuple, UnboundedMultiTuple)
from devito.types import NThreads
from devito.types import NThreads, PThreadArray

__all__ = ['CoreOperator', 'CustomOperator',
# Optimization options
Expand Down Expand Up @@ -113,10 +115,10 @@ class BasicOperator(Operator):
stencil-like data accesses.
"""

INDEX_MODE = "int64"
INDEX_MODE = "int32"
"""
The type of the expression used to compute array indices. Either `int64`
(default) or `int32`.
The type of the expression used to compute array indices. Either `int32`
(default) or `int64`.
"""

ERRCTL = None
Expand Down Expand Up @@ -190,7 +192,7 @@ def _autotune(self, args, setup):

return args

@property
@cached_property
def nthreads(self):
nthreads = [i for i in self.input if isinstance(i, NThreads)]
if len(nthreads) == 0:
Expand All @@ -199,6 +201,12 @@ def nthreads(self):
assert len(nthreads) == 1
return nthreads.pop()

@cached_property
def npthreads(self):
symbols = FindSymbols().visit(self.body)
ptas = [i for i in symbols if isinstance(i, PThreadArray)]
return sum(i.size for i in ptas)


class CoreOperator(BasicOperator):
pass
Expand Down
5 changes: 3 additions & 2 deletions devito/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ def __array_finalize__(self, obj):
self._allocator = ALLOC_ALIGNED
elif obj._index_stash is not None:
# From `__getitem__`
self._is_distributed = obj._is_distributed
self._distributor = obj._distributor
glb_idx = obj._normalize_index(obj._index_stash)
self._modulo = tuple(m for i, m in zip(glb_idx, obj._modulo)
Expand All @@ -131,10 +130,12 @@ def __array_finalize__(self, obj):
decomposition.append(dec.reshape(i))
self._decomposition = tuple(decomposition)
self._allocator = obj._allocator
decomp = any(i is not None for i in self._decomposition)
self._is_distributed = decomp and obj._is_distributed
else:
self._is_distributed = obj._is_distributed
self._distributor = obj._distributor
self._allocator = obj._allocator
self._is_distributed = obj._is_distributed
if self.ndim == obj.ndim:
# E.g., from a ufunc, such as `np.add`
self._modulo = obj._modulo
Expand Down
2 changes: 1 addition & 1 deletion devito/finite_differences/derivative.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def func(self, expr, *args, **kwargs):

def _subs(self, old, new, **hints):
# Basic case
if old == self:
if self == old:
return new
# Is it in expr?
if self.expr.has(old):
Expand Down
8 changes: 6 additions & 2 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
import sympy
from sympy.core.add import _addsort
from sympy.core.mul import _keep_coeff, _mulsort
from sympy.core.core import ordering_of_classes
from sympy.core.decorators import call_highest_priority
from sympy.core.evalf import evalf_table
try:
from sympy.core.core import ordering_of_classes
except ImportError:
# Moved in 1.13
from sympy.core.basic import ordering_of_classes

from devito.finite_differences.tools import make_shift_x0, coeff_priority
from devito.logger import warning
Expand Down Expand Up @@ -123,7 +127,7 @@ def _symbolic_functions(self):
@cached_property
def function(self):
if len(self._functions) == 1:
return self._functions.pop()
return set(self._functions).pop()
else:
return None

Expand Down
38 changes: 26 additions & 12 deletions devito/ir/clusters/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,24 +343,31 @@ def rule(size, e):
# Reconstruct the Clusters
processed = []
for c in clusters:
exprs = c.exprs

sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = [i for i in sub_iterators[d] if i not in subiters]

# Apply substitutions to expressions
# Note: In an expression, there could be `u[t+1, ...]` and `v[t+1,
# ...]`, where `u` and `v` are TimeFunction with circular time
# buffers (save=None) *but* different modulo extent. The `t+1`
# indices above are therefore conceptually different, so they will
# be replaced with the proper ModuloDimension through two different
# calls to `xreplace_indices`
exprs = c.exprs
groups = as_mapper(mds, lambda d: d.modulo)
for size, v in groups.items():
subs = {md.origin: md for md in v}
func = partial(xreplace_indices, mapper=subs, key=partial(rule, size))
key = partial(rule, size)
if size == 1:
# Optimization -- avoid useless "% 1" ModuloDimensions
subs = {md.origin: 0 for md in v}
else:
subs = {md.origin: md for md in v}
sub_iterators[d].extend(v)

func = partial(xreplace_indices, mapper=subs, key=key)
exprs = [e.apply(func) for e in exprs]

# Augment IterationSpace
sub_iterators = dict(c.ispace.sub_iterators)
sub_iterators[d] = tuple(i for i in sub_iterators[d] + tuple(mds)
if i not in subiters)
ispace = IterationSpace(c.ispace.intervals, sub_iterators,
c.ispace.directions)

Expand Down Expand Up @@ -433,7 +440,7 @@ def callback(self, clusters, prefix, seen=None):

key = lambda i: i in prefix[:-1] or i in hs.loc_indices
ispace = c.ispace.project(key)
# HaloTOuch are not parallel
# HaloTouches are not parallel
properties = c.properties.sequentialize()

halo_touch = c.rebuild(exprs=expr, ispace=ispace, properties=properties)
Expand Down Expand Up @@ -614,18 +621,25 @@ def _normalize_reductions_dense(cluster, sregistry, mapper):
# of the target backend
lhs, rhs = e.args

try:
f = rhs.function
except AttributeError:
f = None

if lhs.function.is_Array:
# Probably a compiler-generated reduction, e.g. via
# recursive compilation; it's an Array already, so nothing to do
processed.append(e)
elif rhs in mapper:
# Seen this RHS already, so reuse the Array that was created for it
processed.append(e.func(lhs, mapper[rhs].indexify()))
elif f and f.is_Array and sum(flatten(f._size_nodomain)) == 0:
# Special case: the RHS is an Array with no halo/padding, meaning
# that the written data values are contiguous in memory, hence
# we can simply reuse the Array itself as we're already in the
# desired memory layout
processed.append(e)
else:
# Here the LHS could be a Symbol or a user-level Function
# In the latter case we copy the data into a temporary Array
# because the Function might be padded, and reduction operations
# require, in general, the data values to be contiguous
name = sregistry.make_name()
try:
grid = cluster.grid
Expand Down
4 changes: 0 additions & 4 deletions devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,6 @@ def scope(self):
def functions(self):
return self.scope.functions

@cached_property
def has_increments(self):
return any(e.is_Increment for e in self.exprs)

@cached_property
def grid(self):
grids = set(f.grid for f in self.functions if f.is_AbstractFunction)
Expand Down
16 changes: 10 additions & 6 deletions devito/ir/iet/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1273,21 +1273,25 @@ def __repr__(self):
class Pragma(Node):

"""
One or more pragmas floating in the IET constructed through a callback.
One or more pragmas floating in the IET.
"""

def __init__(self, callback, arguments=None):
def __init__(self, pragma, arguments=None):
super().__init__()

self.callback = callback
if not isinstance(pragma, str):
raise TypeError("Pragma name must be a string, not %s" % type(pragma))

self.pragma = pragma
self.arguments = as_tuple(arguments)

def __repr__(self):
return '<Pragmas>'
return '<Pragma>'

@cached_property
def pragmas(self):
return as_tuple(self.callback(*self.arguments))
def _generate(self):
# Subclasses may override this property to customize the pragma generation
return self.pragma % self.arguments


class Transfer:
Expand Down
3 changes: 2 additions & 1 deletion devito/ir/iet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def derive_parameters(iet, drop_locals=False, ordering='default'):
basics = FindSymbols('basics').visit(iet)
candidates.extend(i.function for i in basics)

# Filter off duplicates (e.g., `x_size` is extracted by both calls to FindSymbols)
# Filter off duplicates (e.g., `x_size` is extracted by both calls to
# FindSymbols)
candidates = filter_ordered(candidates)

# Filter off symbols which are defined somewhere within `iet`
Expand Down
15 changes: 7 additions & 8 deletions devito/ir/iet/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def visit_Expression(self, o):
code = c.Assign(lhs, rhs)

if o.pragmas:
code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))

return code

Expand All @@ -489,7 +489,7 @@ def visit_AugmentedExpression(self, o):
c_rhs = ccode(o.expr.rhs, dtype=o.dtype, compiler=self._compiler)
code = c.Statement("%s %s= %s" % (c_lhs, o.op, c_rhs))
if o.pragmas:
code = c.Module(list(o.pragmas) + [code])
code = c.Module(self._visit(o.pragmas) + (code,))
return code

def visit_Call(self, o, nested_call=False):
Expand Down Expand Up @@ -555,15 +555,13 @@ def visit_Iteration(self, o):

# Attach pragmas, if any
if o.pragmas:
handle = c.Module(o.pragmas + (handle,))
pragmas = tuple(self._visit(i) for i in o.pragmas)
handle = c.Module(pragmas + (handle,))

return handle

def visit_Pragma(self, o):
if len(o.pragmas) == 1:
return o.pragmas[0]
else:
return c.Collection(o.pragmas)
return c.Pragma(o._generate)

def visit_While(self, o):
condition = ccode(o.condition)
Expand Down Expand Up @@ -1233,9 +1231,10 @@ def visit_Iteration(self, o):
nodes = self._visit(o.nodes)
dimension = uxreplace(o.dim, self.mapper)
limits = [uxreplace(i, self.mapper) for i in o.limits]
pragmas = self._visit(o.pragmas)
uindices = [uxreplace(i, self.mapper) for i in o.uindices]
return o._rebuild(nodes=nodes, dimension=dimension, limits=limits,
uindices=uindices)
pragmas=pragmas, uindices=uindices)

def visit_Definition(self, o):
try:
Expand Down
5 changes: 2 additions & 3 deletions devito/ir/stree/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,8 @@ def stree_build(clusters, profiler=None, **kwargs):
if i.is_Halo:
found = i
elif i.is_Sync:
if profiler._verbosity > 0 or not i.is_async:
attach_section(i)
section = None
attach_section(i)
section = None
break
elif i.is_Iteration:
if (i.dim.is_Time and SEQUENTIAL in i.properties):
Expand Down
Loading

0 comments on commit df13bd6

Please sign in to comment.