On 2021-06-17 11:31, Lingjiang Fang wrote:
---
  doc/filters.texi     |  7 +++++++
  libavfilter/vf_ocr.c | 30 +++++++++++++++++++++++++++++-
  2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index da8f7d7726..9c650a2a5a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -15451,6 +15451,13 @@ Set character whitelist.
@item blacklist
  Set character blacklist.
+
+@item x, y
+Set top point position of subregion, not support expression now

--> Set position of top-left corner, in pixels.

+
+@item w, h
+Set Width and height of subregion

s/Width/width


+
  @end table
The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
index 6de474025a..7beb101679 100644
--- a/libavfilter/vf_ocr.c
+++ b/libavfilter/vf_ocr.c
@@ -33,6 +33,8 @@ typedef struct OCRContext {
      char *language;
      char *whitelist;
      char *blacklist;
+    int x, y;
+    int w, h;
TessBaseAPI *tess;
  } OCRContext;
@@ -45,6 +47,10 @@ static const AVOption ocr_options[] = {
      { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, 
{.str="eng"}, 0, 0, FLAGS },
      { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, 
{.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ 
"}, 0, 0, FLAGS },
      { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, 
{.str=""},    0, 0, FLAGS },
+    { "x",         "top x of sub region",     OFFSET(x),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "y",         "top y of sub region",     OFFSET(y),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "w",         "width of sub region",     OFFSET(w),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
+    { "h",         "height of sub region",    OFFSET(h),         
AV_OPT_TYPE_INT,    {.i64=0},     0, INT_MAX, FLAGS },
      { NULL }
  };
@@ -73,6 +79,19 @@ static av_cold int init(AVFilterContext *ctx)
      return 0;
  }
+static int config_input(AVFilterLink *inlink)
+{
+    OCRContext  *s = inlink->dst->priv;
+
+    // may call many times, we don't check w/h here
+    if (s->x < 0 || s->y < 0) {
+        s->x = 0;
+        s->y = 0;

These are AV_OPT_TYPE_INT with range set as 0 to INT_MAX, so the opt parser should disallow negative values.

Regards,
Gyan

+    }
+
+    return 0;
+}
+
  static int query_formats(AVFilterContext *ctx)
  {
      static const enum AVPixelFormat pix_fmts[] = {
@@ -101,9 +120,17 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
      OCRContext *s = ctx->priv;
      char *result;
      int *confs;
+    int w = s->w;
+    int h = s->h;
+
+    if (w <= 0 || h <= 0) {
+        w = in->width;
+        h = in->height;
+    }
+ av_log(s, AV_LOG_ERROR, "x=%d, y=%d, w=%d, h=%d\n", s->x, s->y, w, h);
      result = TessBaseAPIRect(s->tess, in->data[0], 1,
-                             in->linesize[0], 0, 0, in->width, in->height);
+                             in->linesize[0], s->x, s->y, w, h);
      confs = TessBaseAPIAllWordConfidences(s->tess);
      av_dict_set(metadata, "lavfi.ocr.text", result, 0);
      for (int i = 0; confs[i] != -1; i++) {
@@ -134,6 +161,7 @@ static const AVFilterPad ocr_inputs[] = {
          .name         = "default",
          .type         = AVMEDIA_TYPE_VIDEO,
          .filter_frame = filter_frame,
+        .config_props = config_input,
      },
      { NULL }
  };

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to