From 9f5165f53d8da2e261695ea86deb90f9839c5601 Mon Sep 17 00:00:00 2001
From: caleb <etemesicaleb@gmail.com>
Date: Wed, 3 Jan 2024 18:06:02 +0300
Subject: [PATCH] crate/imageprocs: Add multithreaded resize

---
 crates/zune-imageprocs/src/resize.rs          | 173 +++++++++++++-----
 crates/zune-imageprocs/src/resize/bicubic.rs  |  69 +++++++
 crates/zune-imageprocs/src/resize/bilinear.rs |  53 +++++-
 3 files changed, 247 insertions(+), 48 deletions(-)
 create mode 100644 crates/zune-imageprocs/src/resize/bicubic.rs
diff --git a/crates/zune-imageprocs/src/resize.rs b/crates/zune-imageprocs/src/resize.rs
index 5172f3cd..07aa0193 100644
--- a/crates/zune-imageprocs/src/resize.rs
+++ b/crates/zune-imageprocs/src/resize.rs
@@ -17,6 +17,14 @@ use zune_image::traits::OperationsTrait;
 
 use crate::traits::NumOps;
 
+mod bicubic;
+mod bilinear;
+
+#[derive(Copy, Clone, Debug)]
+pub enum ResizeMethod {
+    Bilinear //Bicubic
+}
+
 /// Resize an image to a new width and height
 /// using the resize method specified
 #[derive(Copy, Clone)]
@@ -48,80 +56,155 @@ impl OperationsTrait for Resize {
         "Resize"
     }
 
+    #[allow(clippy::too_many_lines)]
     fn execute_impl(&self, image: &mut Image) -> Result<(), ImageErrors> {
         let (old_w, old_h) = image.dimensions();
         let depth = image.depth().bit_type();
 
         let new_length = self.new_width * self.new_height * image.depth().size_of();
 
-        match depth {
-            BitType::U8 => {
+        #[cfg(feature = "threads")]
+        {
+            std::thread::scope(|f| {
+                let mut errors = vec![];
+
                 for old_channel in image.channels_mut(false) {
-                    let mut new_channel = Channel::new_with_bit_type(new_length, depth);
+                    let result = f.spawn(|| {
+                        let mut new_channel = Channel::new_with_bit_type(new_length, depth);
+                        match depth {
+                            BitType::U8 => resize::<u8>(
+                                old_channel.reinterpret_as()?,
+                                new_channel.reinterpret_as_mut()?,
+                                self.method,
+                                old_w,
+                                old_h,
+                                self.new_width,
+                                self.new_height
+                            ),
+                            BitType::U16 => resize::<u16>(
+                                old_channel.reinterpret_as()?,
+                                new_channel.reinterpret_as_mut()?,
+                                self.method,
+                                old_w,
+                                old_h,
+                                self.new_width,
+                                self.new_height
+                            ),
 
-                    resize::<u8>(
-                        old_channel.reinterpret_as().unwrap(),
-                        new_channel.reinterpret_as_mut().unwrap(),
-                        self.method,
-                        old_w,
-                        old_h,
-                        self.new_width,
-                        self.new_height
-                    );
-                    *old_channel = new_channel;
+                            BitType::F32 => {
+                                resize::<f32>(
+                                    old_channel.reinterpret_as()?,
+                                    new_channel.reinterpret_as_mut()?,
+                                    self.method,
+                                    old_w,
+                                    old_h,
+                                    self.new_width,
+                                    self.new_height
+                                );
+                            }
+                            d => return Err(ImageErrors::ImageOperationNotImplemented("resize", d))
+                        }
+                        *old_channel = new_channel;
+                        Ok(())
+                    });
+                    errors.push(result);
                 }
-            }
-            BitType::U16 => {
-                for old_channel in image.channels_mut(true) {
-                    let mut new_channel = Channel::new_with_bit_type(new_length, depth);
+                errors
+                    .into_iter()
+                    .map(|x| x.join().unwrap())
+                    .collect::<Result<Vec<()>, ImageErrors>>()
+            })?;
+        }
 
-                    resize::<u16>(
-                        old_channel.reinterpret_as().unwrap(),
-                        new_channel.reinterpret_as_mut().unwrap(),
+        #[cfg(not(feature = "threads"))]
+        {
+            for old_channel in image.channels_mut(false) {
+                let mut new_channel = Channel::new_with_bit_type(new_length, depth);
+                match depth {
+                    BitType::U8 => resize::<u8>(
+                        old_channel.reinterpret_as()?,
+                        new_channel.reinterpret_as_mut()?,
                         self.method,
                         old_w,
                         old_h,
                         self.new_width,
                         self.new_height
-                    );
-                    *old_channel = new_channel;
-                }
-            }
-            BitType::F32 => {
-                for old_channel in image.channels_mut(true) {
-                    let mut new_channel = Channel::new_with_bit_type(new_length, depth);
-
-                    resize::<f32>(
-                        old_channel.reinterpret_as().unwrap(),
-                        new_channel.reinterpret_as_mut().unwrap(),
+                    ),
+                    BitType::U16 => resize::<u16>(
+                        old_channel.reinterpret_as()?,
+                        new_channel.reinterpret_as_mut()?,
                         self.method,
                         old_w,
                         old_h,
                         self.new_width,
                         self.new_height
-                    );
-                    *old_channel = new_channel;
+                    ),
+
+                    BitType::F32 => {
+                        resize::<f32>(
+                            old_channel.reinterpret_as()?,
+                            new_channel.reinterpret_as_mut()?,
+                            self.method,
+                            old_w,
+                            old_h,
+                            self.new_width,
+                            self.new_height
+                        );
+                    }
+                    d => return Err(ImageErrors::ImageOperationNotImplemented("resize", d))
                 }
+                *old_channel = new_channel;
             }
-            d => return Err(ImageErrors::ImageOperationNotImplemented(self.name(), d))
         }
-
         image.set_dimensions(self.new_width, self.new_height);
 
         Ok(())
     }
     fn supported_types(&self) -> &'static [BitType] {
-        &[BitType::U8, BitType::U16]
+        &[BitType::U8, BitType::U16, BitType::F32]
     }
 }
-mod bilinear;
 
-#[derive(Copy, Clone, Debug)]
-pub enum ResizeMethod {
-    Bilinear
+/// Return the image resize dimensions that would not cause a distortion
+/// taking into consideration the smaller dimension
+#[must_use]
+#[allow(
+    clippy::cast_precision_loss,
+    clippy::cast_possible_truncation,
+    clippy::cast_sign_loss
+)]
+pub fn ratio_dimensions_smaller(
+    old_w: usize, old_h: usize, new_w: usize, new_h: usize
+) -> (usize, usize) {
+    let ratio_w = old_w as f64 / new_w as f64;
+    let ratio_h = old_h as f64 / new_h as f64;
+    let percent = if ratio_h < ratio_w { ratio_w } else { ratio_h };
+
+    let t = (old_w as f64 / percent) as usize;
+    let u = (old_h as f64 / percent) as usize;
+    (t, u)
 }
 
-/// Resize an image to new dimensions
+/// Return the image resize dimensions that would not cause a distortion
+/// taking into consideration the larger dimension
+#[must_use]
+#[allow(
+    clippy::cast_precision_loss,
+    clippy::cast_possible_truncation,
+    clippy::cast_sign_loss
+)]
+pub fn ratio_dimensions_larger(
+    old_w: usize, old_h: usize, new_w: usize, new_h: usize
+) -> (usize, usize) {
+    let ratio_w = old_w as f64 / new_w as f64;
+    let ratio_h = old_h as f64 / new_h as f64;
+    let percent = if ratio_h < ratio_w { ratio_w } else { ratio_h };
+
+    let t = (old_w as f64 / percent) as usize;
+    let u = (old_h as f64 / percent) as usize;
+    (t, u)
+}
+/// Resize an image **channel** to new dimensions
 ///
 /// # Arguments
 /// - in_image: A contiguous slice of a single channel of an image
@@ -139,13 +222,17 @@ pub fn resize<T>(
     out_width: usize, out_height: usize
 ) where
     T: Copy + NumOps<T>,
-    f64: std::convert::From<T>
+    f32: std::convert::From<T>
 {
     match method {
         ResizeMethod::Bilinear => {
             bilinear::bilinear_impl(
                 in_image, out_image, in_width, in_height, out_width, out_height
             );
-        }
+        } // ResizeMethod::Bicubic => {
+          //     bicubic::resize_image_bicubic(
+          //         in_image, out_image, in_width, in_height, out_width, out_height
+          //     );
+          // }
     }
 }
diff --git a/crates/zune-imageprocs/src/resize/bicubic.rs b/crates/zune-imageprocs/src/resize/bicubic.rs
new file mode 100644
index 00000000..af8ffc76
--- /dev/null
+++ b/crates/zune-imageprocs/src/resize/bicubic.rs
@@ -0,0 +1,69 @@
+#![allow(dead_code)]
+
+use crate::traits::NumOps;
+pub fn resize_image_bicubic<T>(
+    pixels: &[T], output: &mut [T], width: usize, height: usize, new_width: usize,
+    new_height: usize
+) where
+    T: Copy + NumOps<T>,
+    f32: std::convert::From<T>
+{
+    // Filter coefficients for bicubic interpolation with Mitchell-Netravali kernel
+    #[rustfmt::skip]
+    let filter_coefficients = [
+        -0.772, 0.270, -0.024, 0.006,
+        0.826, -0.688, 0.491, -0.090,
+        -0.254, 0.870, 0.647, -0.166,
+        0.064, -0.703, 0.728,  0.319
+    ];
+
+    for y in 0..new_height {
+        for x in 0..new_width {
+            let new_x = x as f32 / new_width as f32 * width as f32;
+            let new_y = y as f32 / new_height as f32 * height as f32;
+
+            let x0 = (new_x - 1.0).floor() as usize;
+            let x1 = x0 + 1;
+            let x2 = x1 + 1;
+            let x3 = x2 + 1;
+
+            let y0 = (new_y - 1.0).floor() as usize;
+            let y1 = y0 + 1;
+            let y2 = y1 + 1;
+            let y3 = y2 + 1;
+
+            // Clamp pixel indices to image boundaries
+            let x0 = x0.min(width - 1);
+            let x3 = x3.min(width - 1);
+            let y3 = y3.min(height - 1);
+
+            // Calculate cubic coefficients
+            let mut a_coeffs = [0.0; 4];
+            for i in 0..4 {
+                a_coeffs[i] =
+                    calculate_cubic_coefficient(new_x - x0 as f32, filter_coefficients[i * 4]);
+            }
+
+            // Interpolate pixel values
+            let mut a = 0.0;
+            for i in 0..4 {
+                for j in 0..4 {
+                    let offset = (y3 - i) * width + (x3 - j);
+                    a += f32::from(pixels[offset]) * a_coeffs[i] * a_coeffs[j];
+                }
+            }
+
+            output[y * new_width + x] = T::from_f32(a);
+        }
+    }
+}
+
+fn calculate_cubic_coefficient(x: f32, a: f32) -> f32 {
+    return if x < 1.0 {
+        a * x * x * x + (a + 2.0) * x * x + (a + 1.0) * x
+    } else if x < 2.0 {
+        -a * x * x * x + (5.0 * a + 2.0) * x * x - (8.0 * a + 4.0) * x + (4.0 * a + 6.0)
+    } else {
+        0.0
+    };
+}
diff --git a/crates/zune-imageprocs/src/resize/bilinear.rs b/crates/zune-imageprocs/src/resize/bilinear.rs
index ad036f68..3b455c83 100644
--- a/crates/zune-imageprocs/src/resize/bilinear.rs
+++ b/crates/zune-imageprocs/src/resize/bilinear.rs
@@ -1,5 +1,7 @@
 use crate::traits::NumOps;
 
+/// Bilinear interpolation of a single channel, this interpolates a single channel, but not an image
+///
 ///
 #[allow(
     clippy::cast_precision_loss,
@@ -7,12 +9,53 @@ use crate::traits::NumOps;
     clippy::cast_possible_truncation
 )]
 pub fn bilinear_impl<T>(
-    _in_image: &[T], _out_image: &mut [T], _in_width: usize, _in_height: usize, _out_width: usize,
-    _out_height: usize
+    in_channel: &[T], out_channel: &mut [T], in_width: usize, in_height: usize, out_width: usize,
+    out_height: usize
 ) where
     T: Copy + NumOps<T>,
-    f64: std::convert::From<T>
+    f32: std::convert::From<T>
 {
-    // stump
-    return;
+    let w_ratio = 1.0 / out_width as f32 * in_width as f32;
+    let h_ratio = 1.0 / out_height as f32 * in_height as f32;
+
+    let smaller_image_to_larger = w_ratio < 1.0 && h_ratio < 1.0;
+
+    for y in 0..out_height {
+        for x in 0..out_width {
+            let new_x = x as f32 * w_ratio;
+            let new_y = y as f32 * h_ratio;
+            // floor and truncate are slow due to handling overflow and such, so avoid them here
+            let mut x0 = new_x.floor() as usize;
+            let mut y0 = new_y.floor() as usize;
+            let mut x1 = x0 + 1;
+            let mut y1 = y0 + 1;
+
+            // PS: I'm not sure about the impact, but it cuts down on code executed
+            // the branch is deterministic hence the CPU should have an easy time predicting it
+            if smaller_image_to_larger {
+                // in case of result image being greater than source image, it may happen that
+                // the above go beyond picture dimensions, so clamp them here if they do
+                // clamp to image width and height
+                y1 = y1.min(in_height - 1);
+                y0 = y0.min(in_height - 1);
+                x1 = x1.min(in_width - 1);
+                x0 = x0.min(in_width - 1);
+            }
+
+            let a = new_x - x0 as f32;
+            let b = new_y - y0 as f32;
+
+            let p00 = f32::from(in_channel[y0 * in_width + x0]);
+            let p10 = f32::from(in_channel[y0 * in_width + x1]);
+            let p01 = f32::from(in_channel[y1 * in_width + x0]);
+            let p11 = f32::from(in_channel[y1 * in_width + x1]);
+
+            let interpolated_pixel = p00 * (1.0 - a) * (1.0 - b)
+                + p10 * a * (1.0 - b)
+                + p01 * (1.0 - a) * b
+                + p11 * a * b;
+
+            out_channel[y * out_width + x] = T::from_f32(interpolated_pixel);
+        }
+    }
 }