diff --git a/CHANGELOG.md b/CHANGELOG.md
index d9b373f..8a5fac5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
 and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.0]
+
+### Added
+* Support for GPU accelerated processing using CUDA.
 
 ## [0.3.0]
 
diff --git a/Dockerfile b/Dockerfile
index 899d007..954d99c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,6 +18,8 @@ LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu"
 
 ARG DEBIAN_FRONTEND=noninteractive
 
+ENV USEGPU="false"
+
 ENV PYTHONDONTWRITEBYTECODE=true
 ENV PROC_HOME=/home/conda/back-projection
 ENV MYHOME=/home/conda
@@ -36,9 +38,9 @@ RUN groupadd -g "${CONDA_GID}" --system conda && \
     echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \
     echo "conda activate base" >> /home/conda/.profile
 
+SHELL ["/bin/bash", "-l", "-c"]
 
 USER ${CONDA_UID}
-SHELL ["/bin/bash", "-l", "-c"]
 WORKDIR /home/conda/
 
 RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \
@@ -47,10 +49,10 @@ RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BAC
     rm ./back-projection.tar.gz && \
     rm -rf ./back-projection/fft
 
-COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc_cpu.sh ./back-projection
+COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection
 RUN cd /home/conda/back-projection && \
-    chmod +x ./build_proc_cpu.sh && \
-    ./build_proc_cpu.sh && \
+    chmod +x ./build_proc.sh && \
+    ./build_proc.sh && \
     find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \
     cd /home/conda/
 
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
new file mode 100644
index 0000000..9066135
--- /dev/null
+++ b/Dockerfile.gpu
@@ -0,0 +1,86 @@
+FROM nvidia/cuda:12.4.1-devel-ubuntu20.04
+
+# For opencontainers label definitions, see:
+#    https://github.com/opencontainers/image-spec/blob/master/annotations.md
+LABEL org.opencontainers.image.title="HyP3 back-projection"
+LABEL org.opencontainers.image.description="HyP3 plugin for back-projection processing"
+LABEL org.opencontainers.image.vendor="Alaska Satellite Facility"
+LABEL org.opencontainers.image.authors="ASF Tools Team <UAF-asf-apd@alaska.edu>"
+LABEL org.opencontainers.image.licenses="BSD-3-Clause"
+LABEL org.opencontainers.image.url="https://github.com/ASFHyP3/hyp3-back-projection"
+LABEL org.opencontainers.image.source="https://github.com/ASFHyP3/hyp3-back-projection"
+LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu"
+
+ARG DEBIAN_FRONTEND=noninteractive
+ARG CONDA_UID=1000
+ARG CONDA_GID=1000
+ARG BACK_PROJECTION_TAG=0.2.0
+ARG FFTW_TAG=3.3.9
+ARG MINIFORGE_NAME=Miniforge3
+ARG MINIFORGE_VERSION=24.3.0-0
+
+# USEGPU environment variable used by build_proc.sh
+ENV USEGPU="true" 
+ENV CONDA_DIR=/opt/conda
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ENV PATH=${CONDA_DIR}/bin:${PATH}
+ENV PYTHONDONTWRITEBYTECODE=true
+ENV PROC_HOME=/home/conda/back-projection
+ENV MYHOME=/home/conda
+
+# Conda setup
+RUN apt-get update > /dev/null && \
+    apt-get install --no-install-recommends --yes \
+        wget bzip2 ca-certificates \
+        git \
+        tini \
+        > /dev/null && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
+    wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \
+    /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \
+    rm /tmp/miniforge.sh && \
+    conda clean --tarballs --index-cache --packages --yes && \
+    find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \
+    find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \
+    conda clean --force-pkgs-dirs --all --yes  && \
+    echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
+    echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc
+
+RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl build-essential gfortran libfftw3-dev && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN groupadd -g "${CONDA_GID}" --system conda && \
+    useradd -l -u "${CONDA_UID}" -g "${CONDA_GID}" --system -d /home/conda -m  -s /bin/bash conda && \
+    chown -R conda:conda /opt && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> /home/conda/.profile && \
+    echo "conda activate base" >> /home/conda/.profile
+
+SHELL ["/bin/bash", "-l", "-c"]
+
+USER ${CONDA_UID}
+WORKDIR /home/conda/
+
+RUN curl -sL https://github.com/ASFHyP3/back-projection/archive/refs/tags/v${BACK_PROJECTION_TAG}.tar.gz > ./back-projection.tar.gz && \
+    mkdir -p ./back-projection && \
+    tar -xvf ./back-projection.tar.gz -C ./back-projection/ --strip=1 && \
+    rm ./back-projection.tar.gz && \
+    rm -rf ./back-projection/fft
+
+COPY --chown=${CONDA_UID}:${CONDA_GID} ./scripts/build_proc.sh ./back-projection
+RUN cd /home/conda/back-projection && \
+    chmod +x ./build_proc.sh && \
+    ./build_proc.sh && \
+    find $PROC_HOME -type f -name "*.py" -exec chmod +x {} + && \
+    cd /home/conda/
+
+COPY --chown=${CONDA_UID}:${CONDA_GID} . /hyp3-back-projection/
+
+RUN mamba env create -f /hyp3-back-projection/environment.yml && \
+    conda clean -afy && \
+    conda activate hyp3-back-projection && \
+    sed -i 's/conda activate base/conda activate hyp3-back-projection/g' /home/conda/.profile && \
+    python -m pip install --no-cache-dir /hyp3-back-projection
+
+ENTRYPOINT ["/hyp3-back-projection/src/hyp3_back_projection/etc/entrypoint.sh"]
+CMD ["-h"]
diff --git a/README.md b/README.md
index 1dd4320..933c3f2 100644
--- a/README.md
+++ b/README.md
@@ -51,3 +51,36 @@ Your credentials can be passed to the workflows via command-line options (`--esa
 
 If you haven't set up a `.netrc` file
 before, check out this [guide](https://harmony.earthdata.nasa.gov/docs#getting-started) to get started.
+
+## GPU Setup:
+In order for Docker to be able to use the host's GPU, the host must have the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) installed and configured. 
+The process is different for different OS's and Linux distros. The setup process for the most common distros, including Ubuntu, 
+can be found [here](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuration). Make sure to follow the [Docker configuration steps](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuration) after installing the package.
+
+### EC2 Setup
+When running on an EC2 instance, the following setup is recommended:
+1. Create a [P3-family EC2 instance](https://aws.amazon.com/ec2/instance-types/p3/) with the [Amazon Linux 2 AMI with NVIDIA TESLA GPU Driver](https://aws.amazon.com/marketplace/pp/prodview-64e4rx3h733ru?sr=0-4&ref_=beagle&applicationId=AWSMPContessa)
+2. Install Docker and the nvidia-container-toolkit on the EC2 instance:
+```bash
+sudo yum-config-manager --disable amzn2-graphics
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
+sudo yum install docker -y
+sudo yum install nvidia-container-toolkit -y
+sudo yum-config-manager --enable amzn2-graphics
+```
+3. Optionally, set up Docker to not require `sudo` and to start when the EC2 instance starts
+```bash
+sudo systemctl start docker && \
+sudo usermod -a -G docker ec2-user && \
+sudo systemctl enable docker
+```
+4. Exit the EC2 instance and re-enter
+5. To test the GPU setup, run the base NVIDIA container:
+```bash
+docker run -it --gpus all nvidia/cuda:12.4.1-devel-ubuntu20.04 nvidia-smi
+```
+6. Build the actual container and run it:
+```bash
+docker build -t back-projection:gpu -f Dockerfile.gpu .
+docker run --gpus=all --rm -it back-projection:gpu ++process back_projection --help
+```
diff --git a/scripts/build_proc_cpu.sh b/scripts/build_proc.sh
similarity index 80%
rename from scripts/build_proc_cpu.sh
rename to scripts/build_proc.sh
index 544030f..08e080e 100644
--- a/scripts/build_proc_cpu.sh
+++ b/scripts/build_proc.sh
@@ -3,8 +3,10 @@
 MULTIARCH_DIR=/usr/lib/$(gcc -print-multiarch)
 FFTW_LIB=$MULTIARCH_DIR/libfftw3f.a
 echo 'using FFTW library:' $FFTW_LIB
+if [[ "$USEGPU" == "true" ]]; then
+    echo 'building with GPU support'
+fi
 
-# Works
 cd DEM
 gfortran -o mosaicDEM mosaicDEM.f90
 gfortran -o createspecialdem createspecialdem.f90
@@ -19,9 +21,10 @@ gfortran -c processsubcpu.f90 backprojectcpusub.f90 bounds.f90 orbitrangetime.f9
 gcc -o sentinel_raw_process_cpu sentinel_raw_process_cpu.o decode_line_memory.o processsubcpu.o backprojectcpusub.o azimuth_compress_cpu.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lgfortran -lgomp -lm -lrt -lpthread
 echo 'built sentinel_raw_process_cpu'
 
-# nvcc -o howmanygpus howmanygpus.cu
-#
-# echo 'built howmanygpus'
+if [[ "$USEGPU" == "true" ]]; then
+    nvcc -o howmanygpus howmanygpus.cu
+    echo 'built howmanygpus'
+fi
 
 cd geo2rdr
 gfortran -o estimatebaseline estimatebaseline.f90 intp_orbit.f90 latlon.f90 orbithermite.f -ffixed-line-length-none
@@ -80,20 +83,20 @@ cd ..
 
 echo 'built snaphu'
 
-# nvcc -o gpu_arch gpu_arch.cu
-# echo 'built gpu architecture probe'
-#
-# ./gpu_arch | cat > GPU_ARCH; source ./GPU_ARCH; rm GPU_ARCH
-
 cd sentinel
 
 gcc -c filelen.c io.c sentinel_raw_process.c decode_line_memory.c -lm -fopenmp
 
 echo 'built raw_process components in sentinel'
 
-# nvcc -gencode arch=compute_$GPU_ARCH,code=sm_$GPU_ARCH -c azimuth_compress.cu -Wno-deprecated-gpu-targets
+if [[ "$USEGPU" == "true" ]]; then
+    nvcc -gencode arch=compute_89,code=sm_89 -c azimuth_compress.cu -Wno-deprecated-gpu-targets
+fi
 
 gfortran -c processsub.f90 backprojectgpusub.f90 bounds.f90 orbitrangetime.f90 latlon.f90 intp_orbit.f90 radar_to_xyz.f90 unitvec.f90 tcnbasis.f90 curvature.f90 cross.f90 orbithermite.f sentineltimingsub.f90 getburststatevectors.f90 -ffixed-line-length-none -fopenmp
 
-# nvcc -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp
+if [[ "$USEGPU" == "true" ]]; then
+    nvcc -o sentinel_raw_process sentinel_raw_process.o decode_line_memory.o processsub.o backprojectgpusub.o azimuth_compress.o bounds.o orbitrangetime.o latlon.o intp_orbit.o radar_to_xyz.o unitvec.o tcnbasis.o curvature.o cross.o orbithermite.o filelen.o io.o sentineltimingsub.o getburststatevectors.o $FFTW_LIB -lstdc++ -lgfortran -lgomp
+fi
+
 cd ..
diff --git a/src/hyp3_back_projection/back_projection.py b/src/hyp3_back_projection/back_projection.py
index cbbe7f2..d702eef 100644
--- a/src/hyp3_back_projection/back_projection.py
+++ b/src/hyp3_back_projection/back_projection.py
@@ -4,6 +4,7 @@
 
 import argparse
 import logging
+import os
 import zipfile
 from pathlib import Path
 from typing import Iterable, Optional
@@ -30,26 +31,40 @@ def create_param_file(dem_path: Path, dem_rsc_path: Path, output_dir: Path):
         f.write('\n'.join(lines))
 
 
-def back_project_single_granule(granule_path: Path, orbit_path: Path, work_dir: Path) -> None:
-    """Back-project a single Sentinel-1 level-0 granule.
-
-    Args:
-        granule_path: Path to the granule to back-project
-        orbit_path: Path to the orbit file for the granule
-    """
-    required_files = ['elevation.dem', 'elevation.dem.rsc', 'params']
+def check_required_files(required_files: Iterable, work_dir: Path) -> None:
     for file in required_files:
         if not (work_dir / file).exists():
             raise FileNotFoundError(f'Missing required file: {file}')
 
-    args = [str(granule_path.with_suffix('')), str(orbit_path)]
-    utils.call_stanford_module('sentinel/sentinel_scene_cpu.py', args, work_dir=work_dir)
+
+def clean_up_after_back_projection(work_dir: Path) -> None:
     patterns = ['*hgt*', 'dem*', 'DEM*', 'q*', '*positionburst*']
     for pattern in patterns:
         [f.unlink() for f in work_dir.glob(pattern)]
 
 
-def create_product(work_dir):
+def back_project_granules(granule_orbit_pairs: Iterable, work_dir: Path, gpu: bool = False) -> None:
+    """Back-project a set of Sentinel-1 level-0 granules using the CPU-based workflow.
+
+    Args:
+        granule_orbit_pairs: List of tuples of granule and orbit file paths
+        work_dir: Working directory for processing
+    """
+    check_required_files(['elevation.dem', 'elevation.dem.rsc', 'params'], work_dir)
+
+    if gpu:
+        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+    cmd = 'sentinel/sentinel_scene_multigpu.py' if gpu else 'sentinel/sentinel_scene_cpu.py'
+    for granule_path, orbit_path in granule_orbit_pairs:
+        args = [str(granule_path.with_suffix('')), str(orbit_path)]
+        utils.call_stanford_module(cmd, args, work_dir=work_dir)
+
+    clean_up_after_back_projection(work_dir)
+
+
+def create_product(work_dir) -> Path:
     """Create a product zip file.
     Includes files needed for further processing (gslc, orbit, and parameter file).
 
@@ -88,7 +103,8 @@ def back_project(
     bucket: str = None,
     bucket_prefix: str = '',
     work_dir: Optional[Path] = None,
-) -> Path:
+    gpu: bool = False,
+):
     """Back-project a set of Sentinel-1 level-0 granules.
 
     Args:
@@ -100,6 +116,7 @@ def back_project(
         bucket: AWS S3 bucket for uploading the final product(s)
         bucket_prefix: Add a bucket prefix to the product(s)
         work_dir: Working directory for processing
+        gpu: Use the GPU-based version of the workflow
     """
     utils.set_creds('EARTHDATA', earthdata_username, earthdata_password)
     utils.set_creds('ESA', esa_username, esa_password)
@@ -108,19 +125,18 @@ def back_project(
 
     print('Downloading data...')
     bboxs = []
-    back_project_args = []
+    granule_orbit_pairs = []
     for granule in granules:
-        granule_path, granule_bbox = utils.download_raw_granule(granule, work_dir)
+        granule_path, granule_bbox = utils.download_raw_granule(granule, work_dir, unzip=True)
         orbit_path = utils.download_orbit(granule, work_dir)
         bboxs.append(granule_bbox)
-        back_project_args.append((granule_path, orbit_path))
+        granule_orbit_pairs.append((granule_path, orbit_path))
 
     full_bbox = unary_union(bboxs).buffer(0.1)
     dem_path = dem.download_dem_for_back_projection(full_bbox, work_dir)
     create_param_file(dem_path, dem_path.with_suffix('.dem.rsc'), work_dir)
 
-    for granule_path, orbit_path in back_project_args:
-        back_project_single_granule(granule_path, orbit_path, work_dir=work_dir)
+    back_project_granules(granule_orbit_pairs, work_dir=work_dir, gpu=gpu)
 
     utils.call_stanford_module('util/merge_slcs.py', work_dir=work_dir)
 
@@ -128,7 +144,7 @@ def back_project(
         zip_path = create_product(work_dir)
         upload_file_to_s3(zip_path, bucket, bucket_prefix)
 
-    print(f'Finish back-projection for {list(work_dir.glob("S1*.geo"))[0].with_suffix("").name}!')
+    print(f'Finished back-projection for {list(work_dir.glob("S1*.geo"))[0].with_suffix("").name}!')
 
 
 def main():
@@ -146,6 +162,7 @@ def main():
     parser.add_argument('--esa-password', default=None, help="Password for ESA's Copernicus Data Space Ecosystem")
     parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)')
     parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)')
+    parser.add_argument('--gpu', default=False, action='store_true', help='Use the GPU-based version of the workflow.')
     parser.add_argument('granules', nargs='+', help='Level-0 S1 granule to back-project.')
     args = parser.parse_args()
 
diff --git a/src/hyp3_back_projection/utils.py b/src/hyp3_back_projection/utils.py
index 5408a57..5994109 100644
--- a/src/hyp3_back_projection/utils.py
+++ b/src/hyp3_back_projection/utils.py
@@ -135,13 +135,14 @@ def get_earthdata_credentials() -> Tuple[str, str]:
     )
 
 
-def download_raw_granule(granule_name: str, output_dir: Path) -> Tuple[Path, Polygon]:
+def download_raw_granule(granule_name: str, output_dir: Path, unzip: bool = False) -> Tuple[Path, Polygon]:
     """Download a S1 granule using asf_search. Return its path
     and buffered extent.
 
     Args:
         granule_name: Name of the granule to download
         output_dir: Directory to save the granule in
+        unzip: Unzip the granule if it is a zip file
 
     Returns:
         Tuple of the granule path and its extent as a Polygon
@@ -155,17 +156,21 @@ def download_raw_granule(granule_name: str, output_dir: Path) -> Tuple[Path, Pol
     bbox = shape(result.geojson()['geometry'])
 
     zip_path = output_dir / f'{granule_name[:-4]}.zip'
-    out_path = output_dir / f'{granule_name[:-4]}.SAFE'
-
-    if not out_path.exists() and not zip_path.exists():
-        result.download(path=output_dir, session=session)
-
-    if not out_path.exists():
-        with ZipFile(zip_path, 'r') as zip_ref:
-            zip_ref.extractall('.')
-
-    if zip_path.exists():
-        zip_path.unlink()
+    if not unzip:
+        out_path = zip_path
+        if not out_path.exists():
+            result.download(path=output_dir, session=session)
+    else:
+        out_path = output_dir / f'{granule_name[:-4]}.SAFE'
+        if not out_path.exists() and not zip_path.exists():
+            result.download(path=output_dir, session=session)
+
+        if not out_path.exists():
+            with ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extractall('.')
+
+        if zip_path.exists() and unzip:
+            zip_path.unlink()
 
     return out_path, bbox
 
@@ -201,3 +206,12 @@ def call_stanford_module(local_name, args: List = [], work_dir: Optional[Path] =
     args = [str(x) for x in args]
     print(f'Calling {local_name} {" ".join(args)} in directory {work_dir}')
     subprocess.run([script, *args], cwd=work_dir, check=True)
+
+
+def how_many_gpus():
+    """Get the number of GPUs available on the system using Stanford script."""
+    cmd = (get_proc_home() / 'sentinel' / 'howmanygpus').resolve()
+    proc = subprocess.Popen(str(cmd), stdout=subprocess.PIPE, shell=True)
+    (param, err) = proc.communicate()
+    ngpus = int(str(param, 'UTF-8').split()[0])
+    return ngpus
diff --git a/tests/test_back_projection.py b/tests/test_back_projection.py
index e116242..05ee50d 100644
--- a/tests/test_back_projection.py
+++ b/tests/test_back_projection.py
@@ -20,11 +20,11 @@ def test_create_param_file(tmp_path):
     assert lines[1] == str(dem_rsc_path)
 
 
-def test_back_project_single_granule(tmp_path, monkeypatch):
+def test_back_project_granules(tmp_path, monkeypatch):
     granule_path = tmp_path / 'granule.SAFE'
     orbit_path = tmp_path / 'orbit.xml'
     with pytest.raises(FileNotFoundError):
-        back_projection.back_project_single_granule(granule_path, orbit_path, tmp_path)
+        back_projection.back_project_granules([(granule_path, orbit_path)], tmp_path)
 
     for f in ['elevation.dem', 'elevation.dem.rsc', 'params']:
         (tmp_path / f).touch()
@@ -35,7 +35,7 @@ def test_back_project_single_granule(tmp_path, monkeypatch):
     with monkeypatch.context() as m:
         mock_call_stanford_module = mock.Mock()
         m.setattr(utils, 'call_stanford_module', mock_call_stanford_module)
-        back_projection.back_project_single_granule(granule_path, orbit_path, tmp_path)
+        back_projection.back_project_granules([(granule_path, orbit_path)], tmp_path)
         mock_call_stanford_module.assert_called_once_with(
             'sentinel/sentinel_scene_cpu.py',
             [str(granule_path.with_suffix('')), str(orbit_path)],