Currentlt LSTM engine does not provide font info. ShreeDevi ____________________________________________________________ भजन - कीर्तन - आरती @ http://bhajans.ramparivar.com
On Wed, Sep 13, 2017 at 6:52 PM, Supriya Das <[email protected]> wrote: > *Hi All,* > > *When i am trying to get information of word wise font attributes not > getting result. I choose ocr engine as a OEM_LSTM_ONLY. If i > choose OEM_TESSERACT_ONLY then getting correct result. Please suggest. > Thanks in advance. * > > > tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); > // Initialize tesseract-ocr with English, without specifying tessdata path > if(api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY)) { > fprintf(stderr, "Could not initialize tesseract.\n"); > exit(1); > } > //api->SetVariable("save_blob_choices", "T"); > //api->SetVariable("tessedit_char_whitelist"," > 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ./-"); > //api->SetVariable("tessedit_ocr_engine_mode","10"); > api->SetPageSegMode(tesseract::PSM_AUTO_ONLY); > api->SetVariable("chop_enable", "1"); > api->SetImage((uchar*)Result.data,Result.cols,Result.rows,1,Result.cols); > > > time(& Start); > char* out = api->GetUTF8Text(); > time(& End); > Result.release(); > dif = difftime(End,Start); > printf("\nTesseract Processing Time %lf\n",dif); > // Tesseract bounding box finding Word Wise > time(& Start); > tesseract::ResultIterator* ri = api->GetIterator(); > char* word; > const char *font_name; > bool bold, italic, underlined, monospace, serif, smallcaps; > int pointsize, font_id; > if(ri != 0){ > do{ > //tesseract::Orientation orientation; > //tesseract::WritingDirection writing_direction; > //tesseract::TextlineOrder textline_order; > //float deskew_angle; > //ri->Orientation(&orientation,&writing_ > direction,&textline_order,&deskew_angle); > word = ri->GetUTF8Text(tesseract::RIL_WORD); > > if(word != 0 && word[0] != '\0' && word[1] != ' ' && word[0] != ' '){ > float conf = ri->Confidence(tesseract::RIL_WORD); > int x1, y1, x2, y2; > if(conf > 0) > { > font_name = ri->WordFontAttributes(&bold, &italic, &underlined, > &monospace, &serif, > &smallcaps, &pointsize, > &font_id); > ri->BoundingBox(tesseract::RIL_WORD, &x1, &y1, &x2, &y2); > fprintf(fpout,"%s %d %d %d %d %d %d %d %d %d %d %d %d %s\n",word, x1, y1, > x2, y2,pointsize,bold,italic,underlined,monospace,serif, > smallcaps,font_id,font_name); > } > } > } while((ri->Next(tesseract::RIL_WORD))); > } > > -- > You received this message because you are subscribed to the Google Groups > "tesseract-ocr" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > To post to this group, send email to [email protected]. > Visit this group at https://groups.google.com/group/tesseract-ocr. > To view this discussion on the web visit https://groups.google.com/d/ > msgid/tesseract-ocr/ab0377f5-46ab-4bfe-97a2-7dfac5836a82% > 40googlegroups.com > <https://groups.google.com/d/msgid/tesseract-ocr/ab0377f5-46ab-4bfe-97a2-7dfac5836a82%40googlegroups.com?utm_medium=email&utm_source=footer> > . > For more options, visit https://groups.google.com/d/optout. > -- You received this message because you are subscribed to the Google Groups "tesseract-ocr" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at https://groups.google.com/group/tesseract-ocr. To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/CAG2NduUcxUGbgRG%3DotP9FzCQFp9LAuu7b1un1Qrz3kTh1TBJdQ%40mail.gmail.com. For more options, visit https://groups.google.com/d/optout.

