Skip to content

Commit 3814de0

Browse files
cdtwiggfacebook-github-bot
authored andcommitted
Support rendering text in the rasterizer. (#786)
Summary: I find myself wanting to add text labels to things in the image, currently the only way to do that is to fork out to OpenCV. It would be neat if we could write text to the image but support z-buffer checking, and if we didn't have to fork out to a separate library. Reviewed By: jeongseok-meta, cstollmeta Differential Revision: D85690037
1 parent f6f1ecd commit 3814de0

File tree

7 files changed

+1019
-0
lines changed

7 files changed

+1019
-0
lines changed

cmake/build_variables.bzl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,7 @@ rasterizer_public_headers = [
700700
"rasterizer/image.h",
701701
"rasterizer/rasterizer.h",
702702
"rasterizer/tensor.h",
703+
"rasterizer/text_rasterizer.h",
703704
"rasterizer/utility.h",
704705
]
705706

@@ -708,12 +709,14 @@ rasterizer_sources = [
708709
"rasterizer/geometry.cpp",
709710
"rasterizer/rasterizer.cpp",
710711
"rasterizer/image.cpp",
712+
"rasterizer/text_rasterizer.cpp",
711713
]
712714

713715
rasterizer_test_sources = [
714716
"test/rasterizer/test_camera.cpp",
715717
"test/rasterizer/test_geometry.cpp",
716718
"test/rasterizer/test_software_rasterizer.cpp",
719+
"test/rasterizer/test_text_rasterizer.cpp",
717720
]
718721

719722
#===========

momentum/rasterizer/text_rasterizer.cpp

Lines changed: 500 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#pragma once
9+
10+
#include <momentum/rasterizer/camera.h>
11+
#include <momentum/rasterizer/fwd.h>
12+
#include <momentum/rasterizer/rasterizer.h>
13+
#include <Eigen/Core>
14+
#include <gsl/span>
15+
#include <string>
16+
17+
namespace momentum::rasterizer {
18+
19+
/// Horizontal alignment options for text rendering
20+
enum class HorizontalAlignment {
21+
Left,
22+
Center,
23+
Right,
24+
};
25+
26+
/// Vertical alignment options for text rendering
27+
enum class VerticalAlignment {
28+
Top,
29+
Center,
30+
Bottom,
31+
};
32+
33+
/// Rasterize text at 3D world positions
34+
///
35+
/// Projects 3D positions to image space using the camera and renders text strings at those
36+
/// locations. Uses an embedded bitmap font for rendering.
37+
///
38+
/// @param positionsWorld 3D positions in world coordinates where text should be rendered
39+
/// @param texts Text strings to render at each position
40+
/// @param camera Camera to render from
41+
/// @param modelMatrix Model transformation matrix
42+
/// @param nearClip Near clipping distance
43+
/// @param color RGB color for the text
44+
/// @param textScale Integer scaling factor for text size (1 = 1 pixel per font pixel)
45+
/// @param zBuffer Input/output depth buffer (SIMD-aligned)
46+
/// @param rgbBuffer Optional input/output RGB color buffer
47+
/// @param imageOffset Pixel offset for positioning
48+
/// @param horizontalAlignment Horizontal text alignment relative to position
49+
/// @param verticalAlignment Vertical text alignment relative to position
50+
void rasterizeText(
51+
gsl::span<const Eigen::Vector3f> positionsWorld,
52+
gsl::span<const std::string> texts,
53+
const Camera& camera,
54+
const Eigen::Matrix4f& modelMatrix,
55+
float nearClip,
56+
const Eigen::Vector3f& color,
57+
int textScale,
58+
Span2f zBuffer,
59+
Span3f rgbBuffer = {},
60+
float depthOffset = 0,
61+
const Eigen::Vector2f& imageOffset = {0, 0},
62+
HorizontalAlignment horizontalAlignment = HorizontalAlignment::Left,
63+
VerticalAlignment verticalAlignment = VerticalAlignment::Top);
64+
65+
/// Rasterize text directly in 2D image space
66+
///
67+
/// Renders text at 2D image positions without camera projection or depth testing.
68+
///
69+
/// @param positionsImage 2D positions in image coordinates where text should be rendered
70+
/// @param texts Text strings to render at each position
71+
/// @param color RGB color for the text
72+
/// @param textScale Integer scaling factor for text size (1 = 1 pixel per font pixel)
73+
/// @param rgbBuffer Input/output RGB color buffer
74+
/// @param zBuffer Optional depth buffer (fills with zeros when provided)
75+
/// @param imageOffset Pixel offset for positioning
76+
/// @param horizontalAlignment Horizontal text alignment relative to position
77+
/// @param verticalAlignment Vertical text alignment relative to position
78+
void rasterizeText2D(
79+
gsl::span<const Eigen::Vector2f> positionsImage,
80+
gsl::span<const std::string> texts,
81+
const Eigen::Vector3f& color,
82+
int textScale,
83+
Span3f rgbBuffer,
84+
Span2f zBuffer = {},
85+
const Eigen::Vector2f& imageOffset = {0, 0},
86+
HorizontalAlignment horizontalAlignment = HorizontalAlignment::Left,
87+
VerticalAlignment verticalAlignment = VerticalAlignment::Top);
88+
89+
} // namespace momentum::rasterizer
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include <momentum/rasterizer/camera.h>
9+
#include <momentum/rasterizer/image.h>
10+
#include <momentum/rasterizer/text_rasterizer.h>
11+
12+
#include <gtest/gtest.h>
13+
14+
using namespace momentum::rasterizer;
15+
16+
TEST(TextRasterizer, BasicText3D) {
17+
const int width = 200;
18+
const int height = 100;
19+
20+
OpenCVDistortionParametersT<float> distortionParams;
21+
auto intrinsics = std::make_shared<OpenCVIntrinsicsModel>(
22+
width, height, width / 2.0f, height / 2.0f, width / 2.0f, height / 2.0f, distortionParams);
23+
24+
Camera camera(intrinsics);
25+
26+
auto zBuffer = makeRasterizerZBuffer(camera);
27+
auto rgbBuffer = makeRasterizerRGBBuffer(camera);
28+
29+
std::vector<Eigen::Vector3f> positions = {Eigen::Vector3f(0.0f, 0.0f, 1.5f)};
30+
std::vector<std::string> texts = {"Hello"};
31+
32+
rasterizeText(
33+
positions,
34+
texts,
35+
camera,
36+
Eigen::Matrix4f::Identity(),
37+
0.1f,
38+
Eigen::Vector3f(1.0f, 0.0f, 0.0f),
39+
1,
40+
zBuffer.view(),
41+
rgbBuffer.view());
42+
43+
int pixelsSet = 0;
44+
for (int y = 0; y < height; ++y) {
45+
for (int x = 0; x < width; ++x) {
46+
if (zBuffer(y, x) < FLT_MAX) {
47+
pixelsSet++;
48+
EXPECT_NEAR(rgbBuffer(y, x, 0), 1.0f, 1e-5f);
49+
EXPECT_NEAR(rgbBuffer(y, x, 1), 0.0f, 1e-5f);
50+
EXPECT_NEAR(rgbBuffer(y, x, 2), 0.0f, 1e-5f);
51+
EXPECT_NEAR(zBuffer(y, x), 1.5f, 1e-5f);
52+
}
53+
}
54+
}
55+
56+
EXPECT_GT(pixelsSet, 0);
57+
}
58+
59+
TEST(TextRasterizer, BasicText2D) {
60+
const int width = 200;
61+
const int height = 100;
62+
63+
OpenCVDistortionParametersT<float> distortionParams;
64+
auto intrinsics = std::make_shared<OpenCVIntrinsicsModel>(
65+
width, height, width / 2.0f, height / 2.0f, width / 2.0f, height / 2.0f, distortionParams);
66+
67+
Camera camera(intrinsics);
68+
69+
auto zBuffer = makeRasterizerZBuffer(camera);
70+
auto rgbBuffer = makeRasterizerRGBBuffer(camera);
71+
72+
std::vector<Eigen::Vector2f> positions = {Eigen::Vector2f(10.0f, 10.0f)};
73+
std::vector<std::string> texts = {"Test"};
74+
75+
rasterizeText2D(
76+
positions, texts, Eigen::Vector3f(0.0f, 1.0f, 0.0f), 1, rgbBuffer.view(), zBuffer.view());
77+
78+
int pixelsSet = 0;
79+
for (int y = 0; y < height; ++y) {
80+
for (int x = 0; x < width; ++x) {
81+
if (zBuffer(y, x) < FLT_MAX) {
82+
pixelsSet++;
83+
EXPECT_NEAR(rgbBuffer(y, x, 0), 0.0f, 1e-5f);
84+
EXPECT_NEAR(rgbBuffer(y, x, 1), 1.0f, 1e-5f);
85+
EXPECT_NEAR(rgbBuffer(y, x, 2), 0.0f, 1e-5f);
86+
EXPECT_NEAR(zBuffer(y, x), 0.0f, 1e-5f);
87+
}
88+
}
89+
}
90+
91+
EXPECT_GT(pixelsSet, 0);
92+
}
93+
94+
TEST(TextRasterizer, TextScaling) {
95+
const int width = 400;
96+
const int height = 200;
97+
98+
OpenCVDistortionParametersT<float> distortionParams;
99+
auto intrinsics = std::make_shared<OpenCVIntrinsicsModel>(
100+
width, height, width / 2.0f, height / 2.0f, width / 2.0f, height / 2.0f, distortionParams);
101+
102+
Camera camera(intrinsics);
103+
104+
auto rgbBuffer1 = makeRasterizerRGBBuffer(camera);
105+
auto rgbBuffer2 = makeRasterizerRGBBuffer(camera);
106+
107+
std::vector<Eigen::Vector2f> positions = {Eigen::Vector2f(10.0f, 10.0f)};
108+
std::vector<std::string> texts = {"A"};
109+
110+
rasterizeText2D(positions, texts, Eigen::Vector3f(1.0f, 1.0f, 1.0f), 1, rgbBuffer1.view());
111+
112+
int pixelsScale1 = 0;
113+
for (int y = 0; y < height; ++y) {
114+
for (int x = 0; x < width; ++x) {
115+
if (rgbBuffer1(y, x, 0) > 0.5f) {
116+
pixelsScale1++;
117+
}
118+
}
119+
}
120+
121+
rasterizeText2D(positions, texts, Eigen::Vector3f(1.0f, 1.0f, 1.0f), 2, rgbBuffer2.view());
122+
123+
int pixelsScale2 = 0;
124+
for (int y = 0; y < height; ++y) {
125+
for (int x = 0; x < width; ++x) {
126+
if (rgbBuffer2(y, x, 0) > 0.5f) {
127+
pixelsScale2++;
128+
}
129+
}
130+
}
131+
132+
EXPECT_GT(pixelsScale1, 0);
133+
EXPECT_GT(pixelsScale2, pixelsScale1);
134+
EXPECT_NEAR(static_cast<float>(pixelsScale2) / pixelsScale1, 4.0f, 1.0f);
135+
}
136+
137+
TEST(TextRasterizer, MultipleTexts) {
138+
const int width = 400;
139+
const int height = 200;
140+
141+
OpenCVDistortionParametersT<float> distortionParams;
142+
auto intrinsics = std::make_shared<OpenCVIntrinsicsModel>(
143+
width, height, width / 2.0f, height / 2.0f, width / 2.0f, height / 2.0f, distortionParams);
144+
145+
Camera camera(intrinsics);
146+
147+
auto zBuffer = makeRasterizerZBuffer(camera);
148+
auto rgbBuffer = makeRasterizerRGBBuffer(camera);
149+
150+
std::vector<Eigen::Vector2f> positions = {
151+
Eigen::Vector2f(10.0f, 10.0f), Eigen::Vector2f(10.0f, 30.0f)};
152+
std::vector<std::string> texts = {"Line1", "Line2"};
153+
154+
rasterizeText2D(
155+
positions, texts, Eigen::Vector3f(1.0f, 0.0f, 1.0f), 1, rgbBuffer.view(), zBuffer.view());
156+
157+
int pixelsSet = 0;
158+
for (int y = 0; y < height; ++y) {
159+
for (int x = 0; x < width; ++x) {
160+
if (zBuffer(y, x) < FLT_MAX) {
161+
pixelsSet++;
162+
EXPECT_NEAR(rgbBuffer(y, x, 0), 1.0f, 1e-5f);
163+
EXPECT_NEAR(rgbBuffer(y, x, 1), 0.0f, 1e-5f);
164+
EXPECT_NEAR(rgbBuffer(y, x, 2), 1.0f, 1e-5f);
165+
}
166+
}
167+
}
168+
169+
EXPECT_GT(pixelsSet, 0);
170+
}

pymomentum/renderer/renderer_pybind.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <momentum/character/skeleton_state.h>
1616
#include <momentum/rasterizer/camera.h>
1717
#include <momentum/rasterizer/rasterizer.h>
18+
#include <momentum/rasterizer/text_rasterizer.h>
1819

1920
#include <pybind11/eigen.h>
2021
#include <pybind11/pybind11.h>
@@ -645,6 +646,18 @@ PYBIND11_MODULE(renderer, m) {
645646
.value("Ambient", momentum::rasterizer::LightType::Ambient)
646647
.value("Directional", momentum::rasterizer::LightType::Directional)
647648
.value("Point", momentum::rasterizer::LightType::Point);
649+
650+
py::enum_<momentum::rasterizer::HorizontalAlignment>(
651+
m, "HorizontalAlignment", "Horizontal text alignment options.")
652+
.value("Left", momentum::rasterizer::HorizontalAlignment::Left)
653+
.value("Center", momentum::rasterizer::HorizontalAlignment::Center)
654+
.value("Right", momentum::rasterizer::HorizontalAlignment::Right);
655+
656+
py::enum_<momentum::rasterizer::VerticalAlignment>(
657+
m, "VerticalAlignment", "Vertical text alignment options.")
658+
.value("Top", momentum::rasterizer::VerticalAlignment::Top)
659+
.value("Center", momentum::rasterizer::VerticalAlignment::Center)
660+
.value("Bottom", momentum::rasterizer::VerticalAlignment::Bottom);
648661
py::class_<momentum::rasterizer::Light>(
649662
m,
650663
"Light",
@@ -1459,4 +1472,65 @@ This is useful for rendering shadows using the classic projection shadows techni
14591472
py::arg("light"),
14601473
py::arg("plane_normal") = std::optional<Eigen::Vector3f>{},
14611474
py::arg("plane_origin") = std::optional<Eigen::Vector3f>{});
1475+
1476+
m.def(
1477+
"rasterize_text",
1478+
&rasterizeText,
1479+
R"(Rasterize text at 3D world positions.
1480+
1481+
Projects 3D positions to image space using the camera and renders text strings at those locations using an embedded bitmap font.
1482+
1483+
:param positions: (nTexts x 3) torch.Tensor of 3D positions in world coordinates.
1484+
:param texts: List of strings to render at each position.
1485+
:param camera: Camera to render from.
1486+
:param z_buffer: Z-buffer to render geometry onto; can be reused for multiple renders.
1487+
:param rgb_buffer: Optional RGB-buffer to render geometry onto.
1488+
:param color: RGB color for the text. Defaults to white (1, 1, 1).
1489+
:param text_scale: Integer scaling factor for text size (1 = 1 pixel per font pixel). Defaults to 1.
1490+
:param horizontal_alignment: Horizontal text alignment (Left, Center, or Right). Defaults to Left.
1491+
:param vertical_alignment: Vertical text alignment (Top, Center, or Bottom). Defaults to Top.
1492+
:param model_matrix: Additional matrix to apply to the model. Defaults to identity matrix.
1493+
:param near_clip: Clip any text closer than this depth. Defaults to 0.1.
1494+
:param depth_offset: Offset the depth values. Defaults to 0.
1495+
:param image_offset: Offset by (x, y) pixels in image space.
1496+
)",
1497+
py::arg("positions"),
1498+
py::arg("texts"),
1499+
py::arg("camera"),
1500+
py::arg("z_buffer"),
1501+
py::arg("rgb_buffer") = std::optional<at::Tensor>{},
1502+
py::kw_only(),
1503+
py::arg("color") = Eigen::Vector3f(1.0f, 1.0f, 1.0f),
1504+
py::arg("text_scale") = 1,
1505+
py::arg("horizontal_alignment") = momentum::rasterizer::HorizontalAlignment::Left,
1506+
py::arg("vertical_alignment") = momentum::rasterizer::VerticalAlignment::Top,
1507+
py::arg("model_matrix") = std::optional<Eigen::Matrix4f>{},
1508+
py::arg("near_clip") = 0.1f,
1509+
py::arg("depth_offset") = 0.0f,
1510+
py::arg("image_offset") = std::optional<Eigen::Vector2f>{});
1511+
1512+
m.def(
1513+
"rasterize_text_2d",
1514+
&rasterizeText2D,
1515+
R"(Rasterize text directly in 2D image space without camera projection or depth testing.
1516+
1517+
:param positions: (nTexts x 2) torch.Tensor of 2D positions in image coordinates.
1518+
:param texts: List of strings to render at each position.
1519+
:param rgb_buffer: RGB-buffer to render geometry onto.
1520+
:param color: RGB color for the text. Defaults to white (1, 1, 1).
1521+
:param text_scale: Integer scaling factor for text size (1 = 1 pixel per font pixel). Defaults to 1.
1522+
:param horizontal_alignment: Horizontal text alignment (Left, Center, or Right). Defaults to Left.
1523+
:param vertical_alignment: Vertical text alignment (Top, Center, or Bottom). Defaults to Top.
1524+
:param z_buffer: Optional Z-buffer to write zeros to for alpha matting.
1525+
:param image_offset: Offset by (x, y) pixels in image space.
1526+
)",
1527+
py::arg("positions"),
1528+
py::arg("texts"),
1529+
py::arg("rgb_buffer"),
1530+
py::arg("color") = Eigen::Vector3f(1.0f, 1.0f, 1.0f),
1531+
py::arg("text_scale") = 1,
1532+
py::arg("horizontal_alignment") = momentum::rasterizer::HorizontalAlignment::Left,
1533+
py::arg("vertical_alignment") = momentum::rasterizer::VerticalAlignment::Top,
1534+
py::arg("z_buffer") = std::optional<at::Tensor>{},
1535+
py::arg("image_offset") = std::optional<Eigen::Vector2f>{});
14621536
}

0 commit comments

Comments
 (0)