prevent too much memory overlap in parallel

slimgroup · Jul 11, 2024 · 328490f · 328490f
1 parent 8582119
commit 328490f
Show file tree

Hide file tree

Showing 9 changed files with 49 additions and 62 deletions.
diff --git a/Project.toml b/Project.toml
@@ -17,6 +17,16 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SegyIO = "157a0f19-4d44-4de5-a0d0-07e2f0ac4dfa"
 
+[weakdeps]
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[extensions]
+FluxJUDIExt = "Flux"
+JLD2JUDIExt = "JLD2"
+ZygoteJUDIExt = "Zygote"
+
 [compat]
 Aqua = "0.5"
 ChainRulesCore = "1"
@@ -32,11 +42,6 @@ SegyIO = "0.7.7 - 0.8.5"
 TimerOutputs = "0.5"
 julia = "1.6"
 
-[extensions]
-FluxJUDIExt = "Flux"
-JLD2JUDIExt = "JLD2"
-ZygoteJUDIExt = "Zygote"
-
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
@@ -47,8 +52,3 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 
 [targets]
 test = ["Aqua", "JLD2", "Printf", "Test", "TimerOutputs", "Flux"]
-
-[weakdeps]
-Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/docs/src/helper.md b/docs/src/helper.md
@@ -104,8 +104,6 @@ remove_out_of_bounds_receivers
 ```@docs
 devito_model
 setup_grid
-pad_sizes
-pad_array
 remove_padding
 convertToCell
 process_input_data

diff --git a/examples/scripts/fwi_example_2D.jl b/examples/scripts/fwi_example_2D.jl
@@ -4,7 +4,7 @@
 #
 
 using Statistics, Random, LinearAlgebra
-using JUDI, SlimOptim, HDF5, SegyIO, PyPlot
+using JUDI, HDF5, SegyIO, SlimOptim, SlimPlotting
 
 # Load starting model
 n,d,o,m0 = read(h5open("$(JUDI.JUDI_DATA)/overthrust_model.h5","r"), "n", "d", "o", "m0")
@@ -66,4 +66,6 @@ for j=1:niterations
     model0.m .= proj(model0.m .+ step .* p)
 end
 
-figure(); imshow(sqrt.(1f0./adjoint(model0.m))); title("FWI with SGD")
+figure()
+plot_velocity(model0.m'.^(-.5))
+title("FWI with SGD")
diff --git a/examples/scripts/modeling_basic_2D.jl b/examples/scripts/modeling_basic_2D.jl
@@ -9,7 +9,7 @@
 #' This example is converted to a markdown file for the documentation.
 
 #' # Import JUDI, Linear algebra utilities and Plotting
-using JUDI, PyPlot, LinearAlgebra, SlimPlotting
+using JUDI, LinearAlgebra, SlimPlotting, SegyIO, SlimOptim
 
 #+ echo = false; results = "hidden"
 close("all")

diff --git a/src/JUDI.jl b/src/JUDI.jl
@@ -178,9 +178,6 @@ function __init__()
     copy!(devito, pyimport("devito"))
     # Initialize lock at session start
     PYLOCK[] = ReentrantLock()
-
-    # Prevent autopadding to use external allocator
-    set_devito_config("autopadding", false)
 
     # Make sure there is no conflict for the cuda init thread with CUDA.jl
     if get(ENV, "DEVITO_PLATFORM", "") == "nvidiaX"

diff --git a/src/TimeModeling/Modeling/distributed.jl b/src/TimeModeling/Modeling/distributed.jl
@@ -21,12 +21,6 @@ end
     x
 end
 
-"""
-    safe_gc()
-
-Generic GC, compatible with different julia versions of it.
-"""
-safe_gc() = try Base.GC.gc(); catch; gc() end
 
 """
     local_reduce!(future, other)
@@ -64,9 +58,27 @@ Adapted from `DistributedOperations.jl` (MIT license). Striped from custom types
 with different reduction functions.
 """
 function reduce!(futures::Vector{_TFuture})
+    # Number of parallel workers
+    nwork = length(_worker_pool())
+    nf = length(futures)
+    # Reduction batch. We want to avoid finished task to hang waiting for the
+    # binary tree reduction to reach their index holding memory.
+    bsize = min(nwork, nf)
+    # First batch
+    res = reduce_all_workers!(futures[1:bsize])
+    # Loop until all reduced
+    for i = bsize+1:bsize:nf
+        last = min(nf, i + bsize - 1)
+        single_reduce!(res, reduce_all_workers!(futures[i:last]))
+    end
+    return res
+end
+
+
+function reduce_all_workers!(futures::Vector{_TFuture})
     # Get length a next power of two for binary reduction
     M = length(futures)
-    L = round(Int,log2(prevpow(2,M)))
+    L = round(Int, log2(prevpow(2,M)))
     m = 2^L
     # remainder
     R = M - m

diff --git a/src/TimeModeling/Utils/auxiliaryFunctions.jl b/src/TimeModeling/Utils/auxiliaryFunctions.jl
@@ -9,7 +9,7 @@ export ricker_wavelet, get_computational_nt, calculate_dt, setup_grid, setup_3D_
 export convertToCell, limit_model_to_receiver_area, remove_out_of_bounds_receivers, extend_gradient
 export remove_padding, subsample, process_input_data
 export generate_distribution, select_frequencies
-export devito_model, pad_sizes, pad_array
+export devito_model, pad_array
 export transducer, as_vec
 export Gardner
 
@@ -24,7 +24,6 @@ Parameters
 * `dm`: Squared slowness perturbation (optional), Array or PhysicalParameter.
 """
 function devito_model(model::MT, options::JUDIOptions, dm) where {MT<:AbstractModel}
-    pad = pad_sizes(model, options)
     # Set up Python model structure
     physpar = Dict((n, isa(v, PhysicalParameter) ? v.data : v) for (n, v) in _params(model))
 
@@ -39,31 +38,6 @@ devito_model(model::AbstractModel, options::JUDIOptions, dm::PhysicalParameter)
 devito_model(model::AbstractModel, options::JUDIOptions, dm::Vector{T}) where T = devito_model(model, options, reshape(dm, size(model)))
 devito_model(model::AbstractModel, options::JUDIOptions) = devito_model(model, options, nothing)
 
-"""
-    pad_sizes(model, options; so=nothing)
-
-Computes ABC padding sizes according to the model's numbr of abc points and spatial order
-
-Parameters
-* `model`: JUDI or Python side Model.
-* `options`: JUDI Options structure.
-* `so`: Space order (optional) defaults to options.space_order.
-"""
-function pad_sizes(model::PyObject, options; so=nothing)
-    isnothing(so) && (so = options.space_order)
-    N = model.grid.dim
-    return tuple([(nbl + so, nbr + so) for (nbl, nbr)=model.padsizes]...)
-end
-
-function pad_sizes(model::AbstractModel{T, N}, options; so=nothing) where {T, N}
-    isnothing(so) && (so = options.space_order)
-    padsizes = [(nbl(model) + so, nbl(model) + so) for i=1:N]
-    if options.free_surface
-        padsizes[end] = (so, nbl(model) + so)
-    end
-    return tuple(padsizes...)
-end
-
 """
     pad_array(m, nb; mode=:border)
 

diff --git a/src/pysource/models.py b/src/pysource/models.py
@@ -70,20 +70,21 @@ def damp_op(ndim, padsizes, abc_type, fs):
 
     Parameters
     ----------
-    padsize : List of tuple
+    ndim : int
+        Number of dimensions in the model.
+    padsizes : List of tuple
         Number of points in the damping layer for each dimension and side.
-    spacing :
-        Grid spacing coefficient.
-    mask : bool, optional
+    abc_type : mask or damp
         whether the dampening is a mask or layer.
         mask => 1 inside the domain and decreases in the layer
-        not mask => 0 inside the domain and increase in the layer
+        damp => 0 inside the domain and increase in the layer
+    fs: bool
+        Whether the model is with free surface or not
     """
     damp = Function(name="damp", grid=Grid(tuple([11]*ndim)), space_order=0)
     eqs = [Eq(damp, 1.0 if abc_type == "mask" else 0.0)]
     for (nbl, nbr), d in zip(padsizes, damp.dimensions):
         # 3 Point buffer to avoid weird interaction with abc
-        nbr = nbr - 3
         if not fs or d is not damp.dimensions[-1]:
             nbl = nbl - 3
             dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbl)
@@ -95,6 +96,7 @@ def damp_op(ndim, padsizes, abc_type, fs):
             val = -val if abc_type == "mask" else val
             eqs += [Inc(damp.subs({d: dim_l}), val/d.spacing)]
         # right
+        nbr = nbr - 3
         dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbr)
         dim_r = SubDimension.right(name='abc_%s_r' % d.name, parent=d,
                                    thickness=nbr)
@@ -205,6 +207,7 @@ def __init__(self, origin, spacing, shape, space_order=8, nbl=40, dtype=np.float
             self._m = self._gen_phys_param(m, 'm', space_order)
         # density
         self._init_density(rho, b, space_order)
+        # Perturbation for linearized modeling
         self._dm = self._gen_phys_param(dm, 'dm', space_order)
 
         # Model type
@@ -545,7 +548,7 @@ def __init_abox__(self, src, rec, fw=True):
     @cached_property
     def physical(self):
         if ABox is None:
-            return phys
+            return None
         else:
             return self._abox
 
@@ -631,7 +634,7 @@ def __init_abox__(self, src, rec, fw=True):
     @cached_property
     def physical(self):
         if ABox is None:
-            return phys
+            return None
         else:
             return self._abox
 

diff --git a/src/pysource/utils.py b/src/pysource/utils.py
@@ -8,7 +8,8 @@
 from sympy import sqrt
 
 from devito import configuration
-from devito.arch import Device, NvidiaCompiler, CudaCompiler
+from devito.arch import Device
+from devito.arch.compiler import NvidiaCompiler, CudaCompiler
 from devito.tools import as_tuple
 
 try:
@@ -152,7 +153,7 @@ def cleanup_wf(u):
     """
     Delete serialized snapshots
     """
-    for ui in u:
+    for ui in as_tuple(u):
         try:
             serialized = ui._parent._fnames
             basedir = '/'.join(str(serialized[0]).split('/')[:-1])