Skip to content

Commit

Permalink
[MediaVision] Add new inference APIs (#6316)
Browse files Browse the repository at this point in the history
* [MediaVision] Add new inference APIs
  • Loading branch information
hsgwon authored Sep 19, 2024
1 parent 37c3f0a commit e7acef9
Show file tree
Hide file tree
Showing 12 changed files with 1,490 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/Tizen.Multimedia.Vision/Interop/Interop.Libraries.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ internal static partial class Libraries
{
public const string MediaVisionCommon = "libmv_common.so";
public const string MediaVisionFace = "libmv_face.so";
public const string MediaVisionInference = "libmv_inference.so";
public const string MediaVisionImage = "libmv_image.so";
public const string MediaVisionSurveillance = "libmv_surveillance.so";
public const string MediaVisionBarcodeDetector = "libmv_barcode_detector.so";
public const string MediaVisionBarcodeGenerator = "libmv_barcode_generator.so";
public const string MediaVisionRoiTracker = "libmv_roi_tracker.so";
public const string MediaVisionFaceRecognition = "libmv_face_recognition.so"; // It's based on machine learning
public const string MediaVisionInference = "libmv_inference.so";
public const string MediaVisionInferenceImageClassification = "libmv_image_classification.so"; // Inference image classification
public const string MediaVisionInferenceObjectDetection = "libmv_object_detection.so";
public const string MediaVisionInferenceFaceDetection = MediaVisionInferenceObjectDetection; // Inference object detection and face detection
public const string MediaVisionInferenceFacialLandmarkDetection = "libmv_landmark_detection.so";
public const string MediaVisionInferencePoseLandmarkDetection = "libmv_landmark_detection.so"; // Inference facial landmark detection and pose landmark detection
}
}
140 changes: 140 additions & 0 deletions src/Tizen.Multimedia.Vision/Interop/Interop.MediaVision.Inference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,145 @@ internal static extern MediaVisionError DetectFacialLandmark(IntPtr source, IntP
internal static extern MediaVisionError DetectPoseLandmark(IntPtr source, IntPtr inference,
IntPtr roi, PoseLandmarkDetectedCallback callback, IntPtr userData = default(IntPtr)); // Deprecated in API 12
}

internal static partial class InferenceImageClassification
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestOrder, out uint count);

[DllImport(Libraries.MediaVisionInferenceImageClassification, EntryPoint = "mv_image_classification_get_label")]
internal static extern MediaVisionError GetLabels(IntPtr handle, uint index, out IntPtr label);
}

internal static partial class InferenceFaceDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);

[DllImport(Libraries.MediaVisionInferenceFaceDetection, EntryPoint = "mv_face_detection_get_bound_box")]
internal static extern MediaVisionError GetBoundingBoxes(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
}

internal static partial class InferenceObjectDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);

[DllImport(Libraries.MediaVisionInferenceObjectDetection, EntryPoint = "mv_object_detection_get_bound_box")]
internal static extern MediaVisionError GetBoundingBoxes(IntPtr handle, uint index, out int left, out int top, out int right, out int bottom);
}

internal static partial class InferenceFacialLandmarkDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);

[DllImport(Libraries.MediaVisionInferenceFacialLandmarkDetection, EntryPoint = "mv_facial_landmark_get_position")]
internal static extern MediaVisionError GetPoints(IntPtr handle, uint index, out uint posX, out uint posY);
}

internal static partial class InferencePoseLandmarkDetection
{
// Newly added inferernce APIs
[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_create")]
internal static extern MediaVisionError Create(out IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_destroy")]
internal static extern MediaVisionError Destroy(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_configure")]
internal static extern MediaVisionError Configure(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_prepare")]
internal static extern MediaVisionError Prepare(IntPtr handle);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference")]
internal static extern MediaVisionError Inference(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_inference_async")]
internal static extern MediaVisionError InferenceAsync(IntPtr handle, IntPtr source);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_result_count")]
internal static extern MediaVisionError GetResultCount(IntPtr handle, out ulong requestId, out uint count);

[DllImport(Libraries.MediaVisionInferencePoseLandmarkDetection, EntryPoint = "mv_pose_landmark_get_position")]
internal static extern MediaVisionError GetPoints(IntPtr handle, uint index, out uint posX, out uint posY);
}
}
}
208 changes: 208 additions & 0 deletions src/Tizen.Multimedia.Vision/MediaVision/InferenceFaceDetector.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
/*
* Copyright (c) 2024 Samsung Electronics Co., Ltd All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Threading;
using System.Threading.Tasks;
using InteropFD = Interop.MediaVision.InferenceFaceDetection;

namespace Tizen.Multimedia.Vision
{
/// <summary>
/// Provides the ability to detect faces.
/// </summary>
/// <feature>http://tizen.org/feature/vision.inference</feature>
/// <feature>http://tizen.org/feature/vision.inference.face</feature>
/// <since_tizen> 12 </since_tizen>
public class InferenceFaceDetector : IDisposable
{
private IntPtr _handle;
private bool _disposed;

/// <summary>Initializes a new instance of the <see cref="InferenceFaceDetector"/> class.</summary>
/// <exception cref="NotSupportedException">The required features are not supported.</exception>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetector()
{
ValidationUtil.ValidateFeatureSupported(VisionFeatures.Inference);
ValidationUtil.ValidateFeatureSupported(VisionFeatures.InferenceFace);

InteropFD.Create(out _handle).Validate("Failed to create inference face detector.");

try
{
InteropFD.Configure(_handle).Validate("Failed to configure inference face detector.");
InteropFD.Prepare(_handle).Validate("Failed to prepare inference face detector.");
}
catch (Exception e)
{
Log.Error(MediaVisionLog.Tag, e.ToString());
InteropFD.Destroy(_handle);
throw;
}
}

/// <summary>
/// Finalizes an instance of the InferenceFaceDetector class.
/// </summary>
~InferenceFaceDetector()
{
Dispose(false);
}

/// <summary>
/// Detects faces on the source image synchronously.
/// </summary>
/// <remarks>
/// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.
/// </remarks>
/// <param name="source">The image data to detect faces.</param>
/// <returns>The BoundingBoxes of detected face.</returns>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetectorResult Inference(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.Inference(_handle, source.Handle).Validate("Failed to inference face detection.");

return new InferenceFaceDetectorResult(_handle);
}

/// <summary>
/// Detects faces on the source image asynchronously.
/// </summary>
/// <remarks>
/// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.<br/>
/// This method uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
/// <param name="source">The image data to detect faces.</param>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <since_tizen> 12 </since_tizen>
public async Task<InferenceFaceDetectorResult> InferenceAsync(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");

return await Task.Factory.StartNew(() => new InferenceFaceDetectorResult(_handle),
CancellationToken.None,
TaskCreationOptions.DenyChildAttach | TaskCreationOptions.LongRunning,
TaskScheduler.Default);
}

private ulong _requestId = 1;
/// <summary>
/// Requests detecting faces to get their bounding boxes asynchronously.
/// </summary>
/// <remarks>
/// This function does not guarantee that inference is done when this method returns. The user can get the result by using <see cref="GetRequestResults"/>.<br/>
/// If the user calls this method again before the previous one is finished internally, the call will be ignored.<br/>
/// <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> can be empty, if there's no detected face.<br/>
/// Note that this method could use about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
/// <param name="source">The image data to detect faces.</param>
/// <returns>The request ID that indicates the order of requests.</returns>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <exception cref="ArgumentNullException"><paramref name="source"/> is null.</exception>
/// <seealso cref="GetRequestResults"/>
/// <since_tizen> 12 </since_tizen>
public ulong RequestInference(MediaVisionSource source)
{
ValidateNotDisposed();

if (source == null)
{
throw new ArgumentNullException(nameof(source));
}

InteropFD.InferenceAsync(_handle, source.Handle).Validate("Failed to inference face detection.");

return _requestId++;
}

/// <summary>
/// Gets the bounding boxes as a result of <see cref="RequestInference"/>.
/// </summary>
/// <remarks>
/// If there's no detected face, <see cref="InferenceFaceDetectorResult.BoundingBoxes"/> will be empty.<br/>
/// This method uses about twice as much memory as <see cref="InferenceFaceDetector.Inference"/>.
/// </remarks>
/// <returns>The bounding boxes of detected face.</returns>
/// <exception cref="ObjectDisposedException">The InferenceFaceDetector already has been disposed.</exception>
/// <seealso cref="RequestInference"/>
/// <since_tizen> 12 </since_tizen>
public InferenceFaceDetectorResult GetRequestResults()
{
return new InferenceFaceDetectorResult(_handle);
}

/// <summary>
/// Releases the unmanaged resources used by the InferenceFaceDetector.
/// </summary>
/// <param name="disposing">true to release both managed and unmanaged resources; false to release only unmanaged resources.</param>
/// <since_tizen> 12 </since_tizen>
protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (disposing)
{
// to be used if there are any other disposable objects
}

if (_handle != IntPtr.Zero)
{
InteropFD.Destroy(_handle);
_handle = IntPtr.Zero;
}

_disposed = true;
}
}

/// <summary>
/// Releases all resources used by the InferenceFaceDetector.
/// </summary>
/// <since_tizen> 12 </since_tizen>
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}

internal void ValidateNotDisposed()
{
if (_disposed)
{
Log.Error(MediaVisionLog.Tag, "InferenceFaceDetector handle is disposed.");
throw new ObjectDisposedException(nameof(InferenceFaceDetector));
}
}
}
}
Loading

0 comments on commit e7acef9

Please sign in to comment.