fix: abstract image preprocessing in C++ computer vision (#376)

JakubGonera · web-flow · commit 1dc5b347d296 · 2025-06-10T10:40:14.000+02:00
## Description Abstract image loading into an Executorch tensor in C++ computer vision native code. ### Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Documentation update (improves or adds clarity to existing documentation) ### Tested on - [x] iOS - [x] Android ### Related issues #374 ### Checklist - [x] I have performed a self-review of my code - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have updated the documentation accordingly - [x] My changes generate no new warnings
diff --git a/apps/computer-vision/ios/Podfile.lock b/apps/computer-vision/ios/Podfile.lock
@@ -2356,7 +2356,7 @@ SPEC CHECKSUMS:
   React-logger: 8edfcedc100544791cd82692ca5a574240a16219
   React-Mapbuffer: c3f4b608e4a59dd2f6a416ef4d47a14400194468
   React-microtasksnativemodule: 054f34e9b82f02bd40f09cebd4083828b5b2beb6
-  react-native-executorch: 30047a5076fa3c91119618147627d895d87af51b
+  react-native-executorch: 53f918e0e3905243cc39d2d1a9df018bcd49c77b
   react-native-image-picker: 8a3f16000e794f5381a7fe47bb48fd8d06741e47
   react-native-safe-area-context: 562163222d999b79a51577eda2ea8ad2c32b4d06
   react-native-skia: b6cb66e99a953dae6880348c92cfb20a76d90b4f
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.cpp
@@ -111,20 +111,43 @@ cv::Mat readImage(const std::string &imageURI) {
     throw std::runtime_error("Read image error: invalid argument");
   }
 
-  cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
   return image;
 }
 
-TensorPtr getTensorFromMatrix(const std::vector<int32_t> &sizes,
+TensorPtr getTensorFromMatrix(const std::vector<int32_t> &tensorDims,
                               const cv::Mat &matrix) {
   std::vector<float> inputVector = colorMatToVector(matrix);
-  return executorch::extension::make_tensor_ptr(sizes, inputVector);
+  return executorch::extension::make_tensor_ptr(tensorDims, inputVector);
 }
 
 cv::Mat getMatrixFromTensor(cv::Size size, const Tensor &tensor) {
   auto resultData = static_cast<const float *>(tensor.const_data_ptr());
   return bufferToColorMat(std::span<const float>(resultData, tensor.numel()),
                           size);
 }
+
+std::pair<TensorPtr, cv::Size>
+readImageToTensor(const std::string &path,
+                  const std::vector<int32_t> &tensorDims) {
+  cv::Mat input = imageprocessing::readImage(path);
+  cv::Size imageSize = input.size();
+
+  if (tensorDims.size() < 2) {
+    char errorMessage[100];
+    std::snprintf(errorMessage, sizeof(errorMessage),
+                  "Unexpected tensor size, expected at least 2 dimentions "
+                  "but got: %zu.",
+                  tensorDims.size());
+    throw std::runtime_error(errorMessage);
+  }
+  cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1],
+                                 tensorDims[tensorDims.size() - 2]);
+
+  cv::resize(input, input, tensorSize);
+
+  cv::cvtColor(input, input, cv::COLOR_BGR2RGB);
+
+  return {imageprocessing::getTensorFromMatrix(tensorDims, input), imageSize};
+}
 } // namespace imageprocessing
 } // namespace rnexecutorch
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/ImageProcessing.h
@@ -1,12 +1,15 @@
 #pragma once
 
-#include <executorch/extension/tensor/tensor.h>
-#include <executorch/extension/tensor/tensor_ptr.h>
-#include <opencv2/opencv.hpp>
+#include <optional>
 #include <span>
 #include <string>
 #include <vector>
 
+#include <executorch/extension/tensor/tensor.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
+
+#include <opencv2/opencv.hpp>
+
 namespace rnexecutorch::imageprocessing {
 using executorch::aten::Tensor;
 using executorch::extension::TensorPtr;
@@ -21,9 +24,15 @@ std::vector<float> colorMatToVector(const cv::Mat &mat);
 cv::Mat bufferToColorMat(const std::span<const float> &buffer,
                          cv::Size matSize);
 std::string saveToTempFile(const cv::Mat &image);
+/// @brief Read image in a BGR format to a cv::Mat
 cv::Mat readImage(const std::string &imageURI);
-TensorPtr getTensorFromMatrix(const std::vector<int32_t> &sizes,
+TensorPtr getTensorFromMatrix(const std::vector<int32_t> &tensorDims,
                               const cv::Mat &mat);
 cv::Mat getMatrixFromTensor(cv::Size size, const Tensor &tensor);
+/// @brief Read image, resize it and copy it to an ET tensor to store it.
+/// @return Returns a tensor pointer and the original size of the image.
+std::pair<TensorPtr, cv::Size>
+readImageToTensor(const std::string &path,
+                  const std::vector<int32_t> &tensorDims);
 
 } // namespace rnexecutorch::imageprocessing
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp
@@ -30,9 +30,10 @@ Classification::Classification(const std::string &modelSource,
 
 std::unordered_map<std::string_view, float>
 Classification::forward(std::string imageSource) {
-  auto tensor = preprocess(imageSource);
+  auto inputTensor =
+      imageprocessing::readImageToTensor(imageSource, getInputShape()[0]).first;
 
-  auto forwardResult = forwardET(tensor);
+  auto forwardResult = forwardET(inputTensor);
   if (!forwardResult.ok()) {
     throw std::runtime_error(
         "Failed to forward, error: " +
@@ -42,13 +43,6 @@ Classification::forward(std::string imageSource) {
   return postprocess(forwardResult->at(0).toTensor());
 }
 
-TensorPtr Classification::preprocess(const std::string &imageSource) {
-  cv::Mat image = imageprocessing::readImage(imageSource);
-  cv::resize(image, image, modelImageSize);
-
-  return imageprocessing::getTensorFromMatrix(getInputShape()[0], image);
-}
-
 std::unordered_map<std::string_view, float>
 Classification::postprocess(const Tensor &tensor) {
   std::span<const float> resultData(
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h b/packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h
@@ -18,7 +18,6 @@ class Classification : public BaseModel {
   std::unordered_map<std::string_view, float> forward(std::string imageSource);
 
 private:
-  TensorPtr preprocess(const std::string &imageSource);
   std::unordered_map<std::string_view, float> postprocess(const Tensor &tensor);
 
   cv::Size modelImageSize{0, 0};
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp
@@ -37,7 +37,8 @@ std::shared_ptr<jsi::Object>
 ImageSegmentation::forward(std::string imageSource,
                            std::set<std::string, std::less<>> classesOfInterest,
                            bool resize) {
-  auto [inputTensor, originalSize] = preprocess(imageSource);
+  auto [inputTensor, originalSize] =
+      imageprocessing::readImageToTensor(imageSource, getInputShape()[0]);
 
   auto forwardResult = forwardET(inputTensor);
   if (!forwardResult.ok()) {
@@ -50,19 +51,6 @@ ImageSegmentation::forward(std::string imageSource,
                      classesOfInterest, resize);
 }
 
-std::pair<TensorPtr, cv::Size>
-ImageSegmentation::preprocess(const std::string &imageSource) {
-  cv::Mat input = imageprocessing::readImage(imageSource);
-  cv::Size inputSize = input.size();
-
-  cv::resize(input, input, modelImageSize);
-
-  std::vector<float> inputVector = imageprocessing::colorMatToVector(input);
-  return {
-      executorch::extension::make_tensor_ptr(getInputShape()[0], inputVector),
-      inputSize};
-}
-
 std::shared_ptr<jsi::Object> ImageSegmentation::postprocess(
     const Tensor &tensor, cv::Size originalSize,
     std::set<std::string, std::less<>> classesOfInterest, bool resize) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h b/packages/react-native-executorch/common/rnexecutorch/models/image_segmentation/ImageSegmentation.h
@@ -26,7 +26,6 @@ class ImageSegmentation : public BaseModel {
           std::set<std::string, std::less<>> classesOfInterest, bool resize);
 
 private:
-  std::pair<TensorPtr, cv::Size> preprocess(const std::string &imageSource);
   std::shared_ptr<jsi::Object>
   postprocess(const Tensor &tensor, cv::Size originalSize,
               std::set<std::string, std::less<>> classesOfInterest,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -25,16 +25,6 @@ ObjectDetection::ObjectDetection(
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
-std::pair<TensorPtr, cv::Size>
-ObjectDetection::preprocess(const std::string &imageSource) {
-  cv::Mat image = imageprocessing::readImage(imageSource);
-  auto originalSize = image.size();
-  cv::resize(image, image, modelImageSize);
-
-  return {imageprocessing::getTensorFromMatrix(getInputShape()[0], image),
-          originalSize};
-}
-
 std::vector<Detection>
 ObjectDetection::postprocess(const std::vector<EValue> &tensors,
                              cv::Size originalSize, double detectionThreshold) {
@@ -77,9 +67,10 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
 
 std::vector<Detection> ObjectDetection::forward(std::string imageSource,
                                                 double detectionThreshold) {
-  auto [tensor, originalSize] = preprocess(imageSource);
+  auto [inputTensor, originalSize] =
+      imageprocessing::readImageToTensor(imageSource, getInputShape()[0]);
 
-  auto forwardResult = forwardET(tensor);
+  auto forwardResult = forwardET(inputTensor);
   if (!forwardResult.ok()) {
     throw std::runtime_error(
         "Failed to forward, error: " +
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
@@ -21,7 +21,6 @@ class ObjectDetection : public BaseModel {
                                  double detectionThreshold);
 
 private:
-  std::pair<TensorPtr, cv::Size> preprocess(const std::string &imageSource);
   std::vector<Detection> postprocess(const std::vector<EValue> &tensors,
                                      cv::Size originalSize,
                                      double detectionThreshold);
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.cpp
@@ -33,16 +33,6 @@ StyleTransfer::StyleTransfer(const std::string &modelSource,
                             modelInputShape[modelInputShape.size() - 2]);
 }
 
-std::pair<TensorPtr, cv::Size>
-StyleTransfer::preprocess(const std::string &imageSource) {
-  cv::Mat image = imageprocessing::readImage(imageSource);
-  auto originalSize = image.size();
-  cv::resize(image, image, modelImageSize);
-
-  return {imageprocessing::getTensorFromMatrix(getInputShape()[0], image),
-          originalSize};
-}
-
 std::string StyleTransfer::postprocess(const Tensor &tensor,
                                        cv::Size originalSize) {
   cv::Mat mat = imageprocessing::getMatrixFromTensor(modelImageSize, tensor);
@@ -52,9 +42,10 @@ std::string StyleTransfer::postprocess(const Tensor &tensor,
 }
 
 std::string StyleTransfer::forward(std::string imageSource) {
-  auto [tensor, originalSize] = preprocess(imageSource);
+  auto [inputTensor, originalSize] =
+      imageprocessing::readImageToTensor(imageSource, getInputShape()[0]);
 
-  auto forwardResult = forwardET(tensor);
+  auto forwardResult = forwardET(inputTensor);
   if (!forwardResult.ok()) {
     throw std::runtime_error(
         "Failed to forward, error: " +
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h b/packages/react-native-executorch/common/rnexecutorch/models/style_transfer/StyleTransfer.h
@@ -22,7 +22,6 @@ class StyleTransfer : public BaseModel {
   std::string forward(std::string imageSource);
 
 private:
-  std::pair<TensorPtr, cv::Size> preprocess(const std::string &imageSource);
   std::string postprocess(const Tensor &tensor, cv::Size originalSize);
 
   cv::Size modelImageSize{0, 0};