[MediaVision] Add new inference APIs (#6316)

* [MediaVision] Add new inference APIs
Samsung · Sep 19, 2024 · e7acef9 · e7acef9
1 parent 37c3f0a
commit e7acef9
Show file tree

Hide file tree

Showing 12 changed files with 1,490 additions and 1 deletion.
diff --git a/src/Tizen.Multimedia.Vision/Interop/Interop.Libraries.cs b/src/Tizen.Multimedia.Vision/Interop/Interop.Libraries.cs
@@ -20,12 +20,17 @@ internal static partial class Libraries
     {
         public const string MediaVisionCommon = "libmv_common.so";
         public const string MediaVisionFace = "libmv_face.so";
-        public const string MediaVisionInference = "libmv_inference.so";
         public const string MediaVisionImage = "libmv_image.so";
         public const string MediaVisionSurveillance = "libmv_surveillance.so";
         public const string MediaVisionBarcodeDetector = "libmv_barcode_detector.so";
         public const string MediaVisionBarcodeGenerator = "libmv_barcode_generator.so";
         public const string MediaVisionRoiTracker = "libmv_roi_tracker.so";
         public const string MediaVisionFaceRecognition = "libmv_face_recognition.so"; // It's based on machine learning
+        public const string MediaVisionInference = "libmv_inference.so";
+        public const string MediaVisionInferenceImageClassification = "libmv_image_classification.so"; // Inference image classification
+        public const string MediaVisionInferenceObjectDetection = "libmv_object_detection.so";
+        public const string MediaVisionInferenceFaceDetection = MediaVisionInferenceObjectDetection; // Inference object detection and face detection
+        public const string MediaVisionInferenceFacialLandmarkDetection = "libmv_landmark_detection.so";
+        public const string MediaVisionInferencePoseLandmarkDetection = "libmv_landmark_detection.so"; // Inference facial landmark detection and pose landmark detection
     }
 }
diff --git a/src/Tizen.Multimedia.Vision/Interop/Interop.MediaVision.Inference.cs b/src/Tizen.Multimedia.Vision/Interop/Interop.MediaVision.Inference.cs
@@ -112,5 +112,145 @@ internal static extern MediaVisionError DetectFacialLandmark(IntPtr source, IntP
             internal static extern MediaVisionError DetectPoseLandmark(IntPtr source, IntPtr inference,
                 IntPtr roi, PoseLandmarkDetectedCallback callback, IntPtr userData = default(IntPtr)); // Deprecated in API 12
         }
+
+        internal static partial class InferenceImageClassification
+        {
+            // Newly added inferernce APIs
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_create")]
+            internal static extern MediaVisionError Create(out IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_destroy")]
+            internal static extern MediaVisionError Destroy(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_configure")]
+            internal static extern MediaVisionError Configure(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_prepare")]
+            internal static extern MediaVisionError Prepare(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference")]
+            internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference_async")]
+            internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_result_count")]
+            internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);
+
+            [DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_label")]
+            internal static extern MediaVisionError GetLabels(IntPtr handle, uint index, out IntPtr label);
+        }
+
+        internal static partial class InferenceFaceDetection
+        {
+            // Newly added inferernce APIs
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_create")]
+            internal static extern MediaVisionError Create(out IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_destroy")]
+            internal static extern MediaVisionError Destroy(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_configure")]
+            internal static extern MediaVisionError Configure(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_prepare")]
+            internal static extern MediaVisionError Prepare(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference")]
+            internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference_async")]
+            internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_result_count")]
+            internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);
+
+            [DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_bound_box")]
+            internal static extern MediaVisionError GetBoundingBoxes(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
+        }
+
+        internal static partial class InferenceObjectDetection
+        {
+            // Newly added inferernce APIs
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_create")]
+            internal static extern MediaVisionError Create(out IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_destroy")]
+            internal static extern MediaVisionError Destroy(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_configure")]
+            internal static extern MediaVisionError Configure(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_prepare")]
+            internal static extern MediaVisionError Prepare(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference")]
+            internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference_async")]
+            internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_result_count")]
+            internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);
+
+            [DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_bound_box")]
+            internal static extern MediaVisionError GetBoundingBoxes(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
+        }
+
+        internal static partial class InferenceFacialLandmarkDetection
+        {
+            // Newly added inferernce APIs
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_create")]
+            internal static extern MediaVisionError Create(out IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_destroy")]
+            internal static extern MediaVisionError Destroy(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_configure")]
+            internal static extern MediaVisionError Configure(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_prepare")]
+            internal static extern MediaVisionError Prepare(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference")]
+            internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference_async")]
+            internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_result_count")]
+            internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);
+
+            [DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_position")]
+            internal static extern MediaVisionError GetPoints(IntPtr handle, uint index, out uint posX, out uint posY);
+        }
+
+        internal static partial class InferencePoseLandmarkDetection
+        {
+            // Newly added inferernce APIs
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_create")]
+            internal static extern MediaVisionError Create(out IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_destroy")]
+            internal static extern MediaVisionError Destroy(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_configure")]
+            internal static extern MediaVisionError Configure(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_prepare")]
+            internal static extern MediaVisionError Prepare(IntPtr handle);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference")]
+            internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference_async")]
+            internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_result_count")]
+            internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);
+
+            [DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_position")]
+            internal static extern MediaVisionError GetPoints(IntPtr handle, uint index, out uint posX, out uint posY);
+        }
     }
 }
diff --git a/src/Tizen.Multimedia.Vision/MediaVision/InferenceFaceDetector.cs b/src/Tizen.Multimedia.Vision/MediaVision/InferenceFaceDetector.cs
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the License);
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using InteropFD = Interop.MediaVision.InferenceFaceDetection;
+
+namespace Tizen.Multimedia.Vision
+{
+    /// <summary>
+    /// Provides the ability to detect faces.
+    /// </summary>
+    /// <feature>http://tizen.org/feature/vision.inference</feature>
+    /// <feature>http://tizen.org/feature/vision.inference.face</feature>
+    /// <since_tizen> 12 </since_tizen>
+    public class InferenceFaceDetector : IDisposable
+    {
+        private IntPtr _handle;
+        private bool _disposed;
+
+        /// <summary>Initializes a new instance of the <see cref="InferenceFaceDetector"/> class.</summary>
+        /// <exception cref="NotSupportedException">The required features are not supported.</exception>
+        /// <since_tizen> 12 </since_tizen>
+        public InferenceFaceDetector()
+        {
+            ValidationUtil.ValidateFeatureSupported(VisionFeatures.Inference);
+            ValidationUtil.ValidateFeatureSupported(VisionFeatures.InferenceFace);
+
+            InteropFD.Create(out _handle).Validate("Failed to create inference face detector.");
+
+            try
+            {
+                InteropFD.Configure(_handle).Validate("Failed to configure inference face detector.");
+                InteropFD.Prepare(_handle).Validate("Failed to prepare inference face detector.");
+            }
+            catch (Exception e)
+            {
+                Log.Error(MediaVisionLog.Tag, e.ToString());
+                InteropFD.Destroy(_handle);
+                throw;
+            }
+        }
+
+        /// <summary>
+        /// Finalizes an instance of the InferenceFaceDetector class.
+        /// </summary>
+        ~InferenceFaceDetector()
+        {
+            Dispose(false);
+        }
+
+        /// <summary>
+        /// Detects faces on the source image synchronously.
+        /// </summary>
+        /// <remarks>
+        /// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.
+        /// </remarks>
+        /// <param name="source">The image data to detect faces.</param>
+        /// <returns>The BoundingBoxes of detected face.</returns>
+        /// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
+        /// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
+        /// <since_tizen> 12 </since_tizen>
+        public InferenceFaceDetectorResult Inference(MediaVisionSource source)
+        {
+            ValidateNotDisposed();
+
+            if (source == null)
+            {
+                throw new ArgumentNullException(nameof(source));
+            }
+
+            InteropFD.Inference(_handle, source.Handle).Validate("Failed to inference face detection.");
+
+            return new InferenceFaceDetectorResult(_handle);
+        }
+
+        /// <summary>
+        /// Detects faces on the source image asynchronously.
+        /// </summary>
+        /// <remarks>
+        /// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.<br/>
+        /// This method uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
+        /// </remarks>
+        /// <param name="source">The image data to detect faces.</param>
+        /// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
+        /// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
+        /// <since_tizen> 12 </since_tizen>
+        public async Task<InferenceFaceDetectorResult> InferenceAsync(MediaVisionSource source)
+        {
+            ValidateNotDisposed();
+
+            if (source == null)
+            {
+                throw new ArgumentNullException(nameof(source));
+            }
+
+            InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");
+
+            return await Task.Factory.StartNew(() => new InferenceFaceDetectorResult(_handle),
+                CancellationToken.None,
+                TaskCreationOptions.DenyChildAttach | TaskCreationOptions.LongRunning,
+                TaskScheduler.Default);
+        }
+
+        private ulong _requestId = 1;
+        /// <summary>
+        /// Requests detecting faces to get their bounding boxes asynchronously.
+        /// </summary>
+        /// <remarks>
+        /// This function does not guarantee that inference is done when this method returns. The user can get the result by using <see cref="GetRequestResults"/>.<br/>
+        /// If the user calls this method again before the previous one is finished internally, the call will be ignored.<br/>
+        /// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.<br/>
+        /// Note that this method could use about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
+        /// </remarks>
+        /// <param name="source">The image data to detect faces.</param>
+        /// <returns>The request ID that indicates the order of requests.</returns>
+        /// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
+        /// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
+        /// <seealso cref="GetRequestResults"/>
+        /// <since_tizen> 12 </since_tizen>
+        public ulong RequestInference(MediaVisionSource source)
+        {
+            ValidateNotDisposed();
+
+            if (source == null)
+            {
+                throw new ArgumentNullException(nameof(source));
+            }
+
+            InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");
+
+            return _requestId++;
+        }
+
+        /// <summary>
+        /// Gets the bounding boxes as a result of <see cref="RequestInference"/>.
+        /// </summary>
+        /// <remarks>
+        /// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> will be empty.<br/>
+        /// This method uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
+        /// </remarks>
+        /// <returns>The bounding boxes of detected face.</returns>
+        /// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
+        /// <seealso cref="RequestInference"/>
+        /// <since_tizen> 12 </since_tizen>
+        public InferenceFaceDetectorResult GetRequestResults()
+        {
+            return new InferenceFaceDetectorResult(_handle);
+        }
+
+        /// <summary>
+        /// Releases the unmanaged resources used by the InferenceFaceDetector.
+        /// </summary>
+        /// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
+        /// <since_tizen> 12 </since_tizen>
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!_disposed)
+            {
+                if (disposing)
+                {
+                    // to be used if there are any other disposable objects
+                }
+
+                if (_handle != IntPtr.Zero)
+                {
+                    InteropFD.Destroy(_handle);
+                    _handle = IntPtr.Zero;
+                }
+
+                _disposed = true;
+            }
+        }
+
+        /// <summary>
+        /// Releases all resources used by the InferenceFaceDetector.
+        /// </summary>
+        /// <since_tizen> 12 </since_tizen>
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        internal void ValidateNotDisposed()
+        {
+            if (_disposed)
+            {
+                Log.Error(MediaVisionLog.Tag, "InferenceFaceDetector handle is disposed.");
+                throw new ObjectDisposedException(nameof(InferenceFaceDetector));
+            }
+        }
+    }
+}