Skip to content

Commit eab604a

Browse files
authored
Implement automatic text sizing based on frame dimensions and boundin… (#26)
The text-scale-ratio property allows users to override the automatic text sizing while maintaining proportional sizing as a baseline. Values > 1.0 make text larger, < 1.0 make text smaller, providing better control than the previous percentage-based approach.
1 parent f4d022c commit eab604a

File tree

3 files changed

+112
-46
lines changed

3 files changed

+112
-46
lines changed

README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,8 @@ Key features:
529529
- Draws bounding boxes for object detection and visual anomaly detection results (from VideoRegionOfInterestMeta)
530530
- Displays class labels with confidence scores
531531
- Supports wide range of video formats
532+
- Automatic text sizing: Calculates optimal font size based on frame dimensions and bounding box sizes
533+
- Text scale control: Use `text-scale-ratio` property to fine-tune text size (0.1x to 5.0x scaling)
532534

533535
Properties:
534536
1. `stroke-width` (integer):
@@ -551,10 +553,11 @@ Properties:
551553
- Font family to use for text rendering
552554
- Default: "Sans"
553555

554-
5. `font-size-percentage` (double):
555-
- Font size as percentage of output image height
556-
- Range: 0.0 - 1.0 (0% - 100%, where 0.1 = 10%)
557-
- Default: 0.09 (9%)
556+
5. `text-scale-ratio` (double):
557+
- Scale factor for text size. Values > 1.0 make text larger, < 1.0 make text smaller
558+
- Range: 0.1 - 5.0
559+
- Default: 1.0 (no scaling)
560+
- **Note:** This property overrides the automatic text sizing. The element calculates optimal font size based on frame/bbox dimensions, then applies this scale factor.
558561

559562
Example pipeline:
560563
```bash
@@ -564,7 +567,7 @@ gst-launch-1.0 avfvideosrc ! \
564567
videoscale ! \
565568
video/x-raw,format=RGB,width=384,height=384 ! \
566569
edgeimpulsevideoinfer ! \
567-
edgeimpulseoverlay stroke-width=3 font-size-percentage=0.12 text-color=0x00FF00 background-color=0x000000 ! \
570+
edgeimpulseoverlay stroke-width=3 text-scale-ratio=1.5 text-color=0x00FF00 background-color=0x000000 ! \
568571
autovideosink sync=false
569572

570573
# EIM mode (legacy)
@@ -573,7 +576,7 @@ gst-launch-1.0 avfvideosrc ! \
573576
videoscale ! \
574577
video/x-raw,format=RGB,width=384,height=384 ! \
575578
edgeimpulsevideoinfer model-path=<path-to-model> ! \
576-
edgeimpulseoverlay stroke-width=3 font-size-percentage=0.12 text-color=0x00FF00 background-color=0x000000 ! \
579+
edgeimpulseoverlay stroke-width=3 text-scale-ratio=1.5 text-color=0x00FF00 background-color=0x000000 ! \
577580
autovideosink sync=false
578581
```
579582

@@ -688,7 +691,7 @@ cargo run --example video_inference \
688691
cargo run --example video_inference \
689692
--width 224 \
690693
--height 224 \
691-
--font-size-percentage 0.12 \
694+
--text-scale-ratio 1.5 \
692695
--stroke-width 3 \
693696
--text-color 0x00FF00 \
694697
--background-color 0x000000
@@ -781,7 +784,7 @@ cargo run --example image_inference \
781784
--image input.jpg \
782785
--width 224 \
783786
--height 224 \
784-
--font-size-percentage 0.12 \
787+
--text-scale-ratio 1.5 \
785788
--stroke-width 3 \
786789
--text-color 0x00FF00 \
787790
--background-color 0x000000
@@ -790,7 +793,7 @@ cargo run --example image_inference \
790793
cargo run --example image_inference \
791794
--image input.jpg \
792795
--output output_with_overlay.png \
793-
--font-size-percentage 0.10
796+
--text-scale-ratio 0.8
794797

795798
# EIM mode (legacy)
796799
cargo run --example image_inference \

examples/video_inference.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ struct VideoClassifyParams {
5656
#[arg(long)]
5757
perf: bool,
5858

59-
/// Font size percentage for overlay (0.0-1.0, where 0.1 = 10%)
60-
#[arg(long, default_value = "0.09")]
61-
font_size_percentage: f64,
62-
6359
/// Stroke width for bounding boxes
6460
#[arg(long, default_value = "2")]
6561
stroke_width: i32,
@@ -71,6 +67,10 @@ struct VideoClassifyParams {
7167
/// Background color in hex format (e.g., 0x000000 for black)
7268
#[arg(long, default_value = "0x000000")]
7369
background_color: String,
70+
71+
/// Text scale ratio (0.1 to 5.0). Values > 1.0 make text larger, < 1.0 make text smaller
72+
#[arg(long, default_value = "1.0")]
73+
text_scale_ratio: f64,
7474
}
7575

7676
// Performance tracking structure
@@ -352,7 +352,6 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
352352
.expect("Could not create queue element.");
353353

354354
let overlay = gst::ElementFactory::make("edgeimpulseoverlay")
355-
.property("font-size-percentage", &args.font_size_percentage)
356355
.property("stroke-width", &args.stroke_width)
357356
.property(
358357
"text-color",
@@ -362,6 +361,7 @@ fn create_pipeline(args: &VideoClassifyParams) -> Result<gst::Pipeline, Box<dyn
362361
"background-color",
363362
&u32::from_str_radix(&args.background_color[2..], 16).unwrap_or(0x000000),
364363
)
364+
.property("text-scale-ratio", &args.text_scale_ratio)
365365
.build()
366366
.expect("Could not create edgeimpulseoverlay element.");
367367

src/overlay/imp.rs

Lines changed: 95 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
//! Configurable via GStreamer properties:
3939
//! - stroke-width: Line width for boxes
4040
//! - text-color: Override text color
41-
//! - font-size-percentage: Font size as percentage of output image height
41+
//! - text-scale-ratio: Scale factor for text size (0.1 to 5.0, default 1.0)
4242
//! - font-type: Font selection
4343
//! - text-position: Label placement
4444
//! - show-labels: Toggle labels
@@ -93,12 +93,12 @@ pub struct Settings {
9393
pub stroke_width: i32,
9494
pub text_color: u32,
9595
pub background_color: u32,
96-
pub font_size_percentage: f64,
9796
pub font_type: String,
9897
pub text_position: String,
9998
pub show_labels: bool,
10099
pub model_input_width: i32,
101100
pub model_input_height: i32,
101+
pub text_scale_ratio: f64,
102102
}
103103

104104
impl Default for Settings {
@@ -107,12 +107,12 @@ impl Default for Settings {
107107
stroke_width: 2,
108108
text_color: 0xFFFFFF,
109109
background_color: 0x000000,
110-
font_size_percentage: 0.09, // 9% of image height as default
111110
font_type: "Sans".to_string(),
112111
text_position: "top-left".to_string(),
113112
show_labels: true,
114113
model_input_width: 160,
115114
model_input_height: 160,
115+
text_scale_ratio: 1.0, // Default scale ratio (no scaling)
116116
}
117117
}
118118
}
@@ -133,6 +133,8 @@ struct TextParams {
133133
x: i32,
134134
y: i32,
135135
settings: Settings,
136+
bbox_color: Option<(u8, u8, u8)>, // Optional bounding box color for text background
137+
bbox_height: Option<i32>, // Optional bounding box height for font size calculation
136138
}
137139

138140
#[derive(Default)]
@@ -170,15 +172,7 @@ impl ObjectImpl for EdgeImpulseOverlay {
170172
.blurb("Color of the text background in RGB format (0xRRGGBB)")
171173
.default_value(0x000000)
172174
.build(),
173-
glib::ParamSpecDouble::builder("font-size-percentage")
174-
.nick("Font Size Percentage")
175-
.blurb(
176-
"Font size as percentage of output image height (0.0-1.0, where 0.1 = 10%)",
177-
)
178-
.minimum(0.0)
179-
.maximum(1.0)
180-
.default_value(0.09)
181-
.build(),
175+
182176
glib::ParamSpecString::builder("font-type")
183177
.nick("Font Type")
184178
.blurb("Type of font to use")
@@ -206,6 +200,13 @@ impl ObjectImpl for EdgeImpulseOverlay {
206200
.minimum(1)
207201
.default_value(160)
208202
.build(),
203+
glib::ParamSpecDouble::builder("text-scale-ratio")
204+
.nick("Text Scale Ratio")
205+
.blurb("Scale factor for text size. Values > 1.0 make text larger, < 1.0 make text smaller. Default is 1.0 (no scaling)")
206+
.minimum(0.1)
207+
.maximum(5.0)
208+
.default_value(1.0)
209+
.build(),
209210
]
210211
});
211212
PROPERTIES.as_ref()
@@ -223,9 +224,7 @@ impl ObjectImpl for EdgeImpulseOverlay {
223224
"background-color" => {
224225
settings.background_color = value.get().unwrap();
225226
}
226-
"font-size-percentage" => {
227-
settings.font_size_percentage = value.get().unwrap();
228-
}
227+
229228
"font-type" => {
230229
settings.font_type = value.get().unwrap();
231230
}
@@ -241,6 +240,9 @@ impl ObjectImpl for EdgeImpulseOverlay {
241240
"model-input-height" => {
242241
settings.model_input_height = value.get().unwrap();
243242
}
243+
"text-scale-ratio" => {
244+
settings.text_scale_ratio = value.get().unwrap();
245+
}
244246
_ => unimplemented!(),
245247
}
246248
}
@@ -251,12 +253,13 @@ impl ObjectImpl for EdgeImpulseOverlay {
251253
"stroke-width" => settings.stroke_width.to_value(),
252254
"text-color" => settings.text_color.to_value(),
253255
"background-color" => settings.background_color.to_value(),
254-
"font-size-percentage" => settings.font_size_percentage.to_value(),
256+
255257
"font-type" => settings.font_type.to_value(),
256258
"text-position" => settings.text_position.to_value(),
257259
"show-labels" => settings.show_labels.to_value(),
258260
"model-input-width" => settings.model_input_width.to_value(),
259261
"model-input-height" => settings.model_input_height.to_value(),
262+
"text-scale-ratio" => settings.text_scale_ratio.to_value(),
260263
_ => unimplemented!(),
261264
}
262265
}
@@ -519,8 +522,9 @@ impl VideoFilterImpl for EdgeImpulseOverlay {
519522
// Draw classification text
520523
let text = format!("{} {:.1}%", label, confidence * 100.0);
521524

522-
// Calculate dynamic font size based on frame height
523-
let dynamic_font_size = self.calculate_font_size(&settings, frame.height() as i32);
525+
// Calculate dynamic font size based on frame height (no bounding box for classification)
526+
let dynamic_font_size =
527+
self.calculate_font_size(&settings, frame.height() as i32, None);
524528

525529
let text_x = if settings.text_position == "top-left"
526530
|| settings.text_position == "bottom-left"
@@ -556,6 +560,8 @@ impl VideoFilterImpl for EdgeImpulseOverlay {
556560
x: text_x,
557561
y: text_y,
558562
settings: settings.clone(),
563+
bbox_color: None, // No bounding box for classification text
564+
bbox_height: None, // No bounding box for classification text
559565
},
560566
&video_info,
561567
) {
@@ -696,6 +702,8 @@ impl VideoFilterImpl for EdgeImpulseOverlay {
696702
x: text_x,
697703
y: text_y,
698704
settings: settings.clone(),
705+
bbox_color: Some(color), // Use bounding box color for text background
706+
bbox_height: Some(height), // Use actual bounding box height for font size
699707
},
700708
&video_info,
701709
) {
@@ -725,14 +733,6 @@ impl VideoFilterImpl for EdgeImpulseOverlay {
725733

726734
// Implementation of element specific methods
727735
impl EdgeImpulseOverlay {
728-
/// Calculate dynamic font size based on output image height and percentage
729-
fn calculate_font_size(&self, settings: &Settings, frame_height: i32) -> i32 {
730-
let font_size = (frame_height as f64 * settings.font_size_percentage) as i32;
731-
// Ensure minimum font size of 4px and maximum of 48px for readability
732-
// Lower minimum allows for very small percentages to work as expected
733-
font_size.clamp(4, 48)
734-
}
735-
736736
/// Renders a bounding box with colored borders and semi-transparent fill.
737737
/// Used for both object detection boxes and anomaly grid cells.
738738
///
@@ -890,8 +890,9 @@ impl EdgeImpulseOverlay {
890890
let mut font_desc = pango::FontDescription::new();
891891
font_desc.set_family(&params.settings.font_type);
892892

893-
// Calculate dynamic font size based on frame height
894-
let dynamic_font_size = self.calculate_font_size(&params.settings, height);
893+
// Calculate dynamic font size based on bounding box height if available
894+
let dynamic_font_size =
895+
self.calculate_font_size(&params.settings, height, params.bbox_height);
895896
// Scale up the font size for high resolution
896897
font_desc.set_absolute_size(dynamic_font_size as f64 * pango::SCALE as f64);
897898
if height < 200 {
@@ -910,11 +911,23 @@ impl EdgeImpulseOverlay {
910911
total_width = text_width + (bg_padding * 2.0);
911912
total_height = text_height + (bg_padding * 2.0);
912913

913-
// Draw background rectangle at high resolution using user-specified background color
914+
// Draw background rectangle at high resolution
915+
// Use bounding box color if available, otherwise use user-specified background color
914916
cr.rectangle(params.x as f64, params.y as f64, total_width, total_height);
915-
let bg_r = ((params.settings.background_color >> 16) & 0xFF) as f64 / 255.0;
916-
let bg_g = ((params.settings.background_color >> 8) & 0xFF) as f64 / 255.0;
917-
let bg_b = (params.settings.background_color & 0xFF) as f64 / 255.0;
917+
let (bg_r, bg_g, bg_b) = if let Some(bbox_color) = params.bbox_color {
918+
// Use bounding box color for text background
919+
(
920+
bbox_color.0 as f64 / 255.0,
921+
bbox_color.1 as f64 / 255.0,
922+
bbox_color.2 as f64 / 255.0,
923+
)
924+
} else {
925+
// Use user-specified background color
926+
let bg_r = ((params.settings.background_color >> 16) & 0xFF) as f64 / 255.0;
927+
let bg_g = ((params.settings.background_color >> 8) & 0xFF) as f64 / 255.0;
928+
let bg_b = (params.settings.background_color & 0xFF) as f64 / 255.0;
929+
(bg_r, bg_g, bg_b)
930+
};
918931
cr.set_source_rgba(bg_r, bg_g, bg_b, 0.7);
919932
cr.fill()
920933
.map_err(|e| gst::loggable_error!(CAT, "Cairo fill failed: {}", e))?;
@@ -1040,6 +1053,56 @@ impl EdgeImpulseOverlay {
10401053
}
10411054
}
10421055

1056+
/// Calculates font size based on bounding box dimensions or frame height.
1057+
/// For bounding box labels, uses a size proportional to the box height.
1058+
/// For classification text, uses a size proportional to frame height.
1059+
/// Ensures minimum readable size based on screen dimensions.
1060+
fn calculate_font_size(
1061+
&self,
1062+
settings: &Settings,
1063+
frame_height: i32,
1064+
bbox_height: Option<i32>,
1065+
) -> i32 {
1066+
// Calculate minimum readable font size based on screen dimensions
1067+
// For small screens (e.g., mobile), use larger minimum
1068+
// For large screens, we can use smaller minimum
1069+
let min_font_size = if frame_height <= 240 {
1070+
9 // Small screens (e.g., 320x240)
1071+
} else if frame_height <= 480 {
1072+
8 // Medium screens (e.g., 640x480)
1073+
} else if frame_height <= 720 {
1074+
6 // Large screens (e.g., 1280x720)
1075+
} else {
1076+
5 // Very large screens (e.g., 1920x1080)
1077+
};
1078+
1079+
let base_size = match bbox_height {
1080+
Some(height) => {
1081+
// For bounding box labels: use 6% of bounding box height
1082+
let base_size = (height as f64 * 0.06) as i32;
1083+
// Ensure minimum readable size, but don't exceed bounding box height
1084+
base_size.max(min_font_size).min(height.min(14))
1085+
}
1086+
None => {
1087+
// For classification text: use 2.5% of frame height
1088+
let base_size = (frame_height as f64 * 0.025) as i32;
1089+
// Ensure minimum readable size with reasonable maximum
1090+
base_size.max(min_font_size).min(20)
1091+
}
1092+
};
1093+
1094+
// Apply the text scale ratio
1095+
let scaled_size = (base_size as f64 * settings.text_scale_ratio) as i32;
1096+
1097+
// Ensure the scaled size doesn't go below minimum or above reasonable maximum
1098+
let max_size = match bbox_height {
1099+
Some(height) => height.min(20), // For bounding box labels
1100+
None => 30, // For classification text
1101+
};
1102+
1103+
scaled_size.max(1).min(max_size) // Ensure at least 1px and not more than max
1104+
}
1105+
10431106
/// Reads a pixel's color from the video frame.
10441107
/// Used for transparency calculations when filling boxes.
10451108
/// Handles different color formats (RGB, NV12/NV21).

0 commit comments

Comments
 (0)