HI,

After i add all your code, the project build successfully. The output 
always returns tilde (~) character for all the images. 

Please guide me in a correct way to get the correct output text.

Thanks in advance.

Code
>>>>>>>>>>>>>>>>>>>>>>>
using System; 
using System.Collections.Generic; 
using System.Text; 
using System.Drawing; 
using System.Runtime.InteropServices; 
using System.Drawing.Imaging; 
using System.IO; 

[StructLayout(LayoutKind.Sequential)] 
public struct CharDescription                 /*single character */ 
{ 

    public UInt16 char_code;              /*character itself */ 
    public Int16 left;                    /*of char (-1) */ 
    public Int16 right;                   /*of char (-1) */ 
    public Int16 top;                     /*of char (-1) */ 
    public Int16 bottom;                  /*of char (-1) */ 
    public Int16 font_index;              /*what font (0) */ 
    public Byte confidence;              /*0=perfect, 100=reject 
(0/100) */ 
    public Byte point_size;              /*of char, 72=i inch, (10) */ 
    public SByte blanks;                   /*no of spaces before this 
char (1) */ 
    public Byte formatting;              /*char formatting (0) */ 

}                  /*single character */ 

[StructLayout(LayoutKind.Sequential)] 
public struct GeneralInfos     /*output header */ 
{ 
    public Int16 count;                   /*chars in this buffer(0) */ 
    public Int16 progress;                /*percent complete 
increasing (0-100) */ 
    public SByte more_to_come;             /*true if not last */ 
    public SByte ocr_alive;                /*ocr sets to 1, HP 0 */ 
    public SByte err_code;                 /*for errcode use */ 
    public Byte cancelSignature; 
    public IntPtr cancel;            /*returns true to cancel */ 
    public IntPtr cancel_this;             /*this or other data for 
cancel*/ 
    public Int32 end_time;              /*time to stop if not 0*/ 
} 

/// <summary> 
/// Used for returning a managed structure contains OCR extraction results 
/// </summary> 
[StructLayout(LayoutKind.Sequential)] 
public class AnalysisResults 
{ 
    public GeneralInfos generalInfos=new GeneralInfos(); 
    public List<CharDescription> charactersFound = new 
List<CharDescription>(); 
    public string stringFound = ""; 
} 

namespace tesseractOCR 
{ 
    /// <summary> 
    /// A wrapper that expose some fonctionnalies of TesseractOCR 
    /// </summary> 
    class WrapperTesseract 
    { 
        private static bool firstRun = true; 

        [DllImport("tessdll.dll", 
CallingConvention=CallingConvention.Cdecl)] 
        private unsafe static extern int 
TessDllBeginPageUpright(UInt32 xsize, UInt32 ysize, byte* buf, string 
lang); 

        [DllImport("tessdll.dll", CallingConvention = 
CallingConvention.Cdecl)] 
        private unsafe static extern int 
TessDllBeginPageUprightBPP(UInt32 xsize, UInt32 ysize, byte* buf, 
string lang, Byte bpp); 

        [DllImport("tessdll.dll", CallingConvention = 
CallingConvention.Cdecl)] 
        private static extern IntPtr TessDllRecognize_all_Words(); 

        [DllImport("tessdll.dll", CallingConvention = 
CallingConvention.Cdecl)] 
        private static extern void TessDllEndPage(); 

        [DllImport("tessdll.dll", CallingConvention = 
CallingConvention.Cdecl)] 
        private static extern void TessDllRelease(); 

        [DllImport("tessdll.dll", CallingConvention = 
CallingConvention.Cdecl, SetLastError=true)] 
        private static unsafe extern IntPtr 
ExtractDataWithExeLogic(UInt32 xsize, UInt32 ysize, byte* buf, string 
lang, Byte bpp); 


        /// <summary> 
        /// Converts the image to byte array without Strides data.. 
        /// </summary> 
        /// <param name="imageToConvert">The image to convert.</param> 
        /// <returns>A array of bytes</returns> 
        private static byte[] ConvertImageToByteArray(System.Drawing.Bitmap 
imageToConvert, Byte bpp) 
        { 
            //Create a BitmapData and Lock all pixels to be read 
            BitmapData bmpData = imageToConvert.LockBits(new Rectangle(0, 
0, imageToConvert.Width, imageToConvert.Height), 
                   ImageLockMode.ReadOnly, imageToConvert.PixelFormat); 

            byte[] imgBytes = new byte[bmpData.Height * bmpData.Width * 3]; 

            unsafe 
            { 
                IntPtr imgPtr = (bmpData.Scan0); 

                int indexImg = 0; 

                int pointerOffset = 0; 

                for (int i = 0; i < bmpData.Height; i++) 
                { 
                    for (int j = 0; j < bmpData.Width; j++) 
                    { 
                        imgBytes[indexImg] = 
Marshal.ReadByte(imgPtr,pointerOffset); 
                        indexImg++; 
                        pointerOffset += (bpp / 8) / 3; 

                        imgBytes[indexImg] = 
Marshal.ReadByte(imgPtr,pointerOffset); 
                        indexImg++; 
                        pointerOffset += (bpp / 8) / 3; 

                        imgBytes[indexImg] = 
Marshal.ReadByte(imgPtr,pointerOffset); 
                        indexImg++; 
                        pointerOffset += (bpp / 8) / 3; 

                    } 
                    pointerOffset += bmpData.Stride - bmpData.Width * 
(bpp/8); 
                } 
            } 

            //Unlock the pixels 
            imageToConvert.UnlockBits(bmpData); 

            return imgBytes; 
        } 

        /// <summary> 
        /// Take an image and extract any text found using the specified 
language with TesseractOCR DLL logic. 
        /// </summary> 
        /// <param name="bmp">The image to analyse.</param> 
        /// <param name="language">The language to be used to extract 
text.</param> 
        /// <param name="bpp">The number of bits per pixel(bpp) of the 
image.</param> 
        /// <returns>The extracted string </returns> 
        public static String RecognizeCharsDll(System.Drawing.Bitmap 
bmp, string language) 
        { 

            if (firstRun) 
                firstRun = false; 
            else 
                TessDllEndPage(); 

            Byte bpp = 0; 
            AnalysisResults tmp = new AnalysisResults(); 
            IntPtr data = new IntPtr(); 


            switch (bmp.PixelFormat) 
            { 
                case PixelFormat.Format1bppIndexed: bpp = 1; break; 
                case PixelFormat.Format8bppIndexed: bpp = 8; break; 
                case PixelFormat.Format48bppRgb: bpp = 48; break; 
                case PixelFormat.Format32bppRgb: bpp = 32; break; 
                case PixelFormat.Format24bppRgb: bpp = 24; break; 
                default: throw new BadImageFormatException("Please use an 
image format of.... TODO");//TODO 

            }


            unsafe
            {
                byte[] imgArray = ConvertImageToByteArray(bmp, bpp);

                fixed (byte* ptr = &imgArray[0])
                {
                    try
                    {
                        data = (IntPtr)ptr;
                        int a = TessDllBeginPageUpright((uint)bmp.Width, 
(uint)bmp.Height, ptr, language);

                        data = TessDllRecognize_all_Words();
                        tmp = BuildAnalysisResults(data);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e.ToString());
                    }

                }
            }


            //TODO: Character analysis... 


            return (BuildAnalysisResults(data)).stringFound; 
        } 

        /// <summary> 
        /// Take an image and extract any text found using the specified 
language with TesseractOCR EXE Logic. 
        /// </summary> 
        /// <param name="bmp">The image to analyse.</param> 
        /// <param name="language">The language to be used to extract 
text.</param> 
        /// <param name="bpp">The number of bits per pixel(bpp) of the 
image.</param> 
        /// <returns>The extracted string </returns> 
        public static String RecognizeChars(System.Drawing.Bitmap bmp, 
string language) 
        { 
            IntPtr res= new IntPtr(); 
            Byte bpp = 0; 

            switch (bmp.PixelFormat) 
            { 
                case PixelFormat.Format1bppIndexed: bpp = 1; break; 
                case PixelFormat.Format8bppIndexed: bpp = 8; break; 
                case PixelFormat.Format48bppRgb: bpp = 48; break; 
                case PixelFormat.Format32bppRgb: bpp = 32; break; 
                case PixelFormat.Format24bppRgb: bpp = 24; break; 
                //TEMPORARY TEST 
                case PixelFormat.Format32bppArgb: bpp = 32; break; 
                default: throw new BadImageFormatException("Please use an 
image format of.... TODO");//TODO 

            } 

            unsafe 
            { 
                byte[] imgArray = ConvertImageToByteArray(bmp, bpp); 

                fixed (byte* ptr = &imgArray[0]) 
                { 
                    res = ExtractDataWithExeLogic((uint)bmp.Width, 
(uint)bmp.Height, ptr, language, bpp); 
                } 
            } 

            //TODO: Character analysis... 

            return PointerToString(res); 
        } 


        /// <summary> 
        /// Convert a pointer to a String. 
        /// </summary> 
        /// <param name="data">The pointer to be converted.</param> 
        /// <returns>The string converted.</returns> 
        private static String PointerToString(IntPtr data) 
        { 
            String result = ""; 
            int i=0; 

            if (data != IntPtr.Zero) 
            { 
                char characterExtracted = (char)Marshal.ReadByte(data, 
i); 

                while (characterExtracted.ToString() != "\0") 
                { 
                    result += characterExtracted.ToString(); 
                    i++; 
                    characterExtracted = (char)Marshal.ReadByte(data, 
i); 
                } 

                //Unaffect the pointer 
                data = IntPtr.Zero; 
            } 

            return result; 



        } 
        /// <summary> 
        /// Build and return a AnalysisResults object that contains the 
results of the Tesseract 
        /// extraction. 
        /// </summary> 
        /// <param name="data">A pointer to the data to be 
transformed.</param> 
        /// <returns>A AnalysisResults structure that contains the results 
of the Tesseract 
        /// extraction.</returns> 
        private static AnalysisResults BuildAnalysisResults(IntPtr 
data) 
        { 
            AnalysisResults analysisResults = new AnalysisResults(); 

            if (data != IntPtr.Zero) 
            { 


                //Retreives the structure starting at the address of the 
pointer 
                analysisResults.generalInfos = (GeneralInfos) 
(Marshal.PtrToStructure(data, typeof(GeneralInfos))); 

                //Move to the text data (next data is actually at + 20 not 
4, but 4 will be 
                //add up with 16 in the for loop (and make 20) 
                data = new IntPtr(data.ToInt32() + 4); 
                for (int i = 0; i < 
analysisResults.generalInfos.count; i++) 
                { 
                    //Move to the next character 
                    data = new IntPtr(data.ToInt32() + 16); 

                    CharDescription charInfos = (CharDescription) 
(Marshal.PtrToStructure(data, typeof(CharDescription))); 
                    analysisResults.charactersFound.Add(charInfos); 

                    //TODO: ADD CHARACTER ANALYSIS TO FORMAT THE STRING 
OR/AND TO REMOVE UNWANTED CHARACTERS 

                    //Lets add all the white space detected 
                    for (int j = 0; j < charInfos.blanks; j++) 
                        analysisResults.stringFound += " "; 

                    //Now lets add the char found 
                    analysisResults.stringFound += 
(char)charInfos.char_code; 

                } 

                //Unaffect the pointer 
                data = IntPtr.Zero; 
            } 
            return analysisResults; 

        } 
    } 
} 

>>>>>>>>>>>>>>>>>>>>>>>

On Sunday, 2 March 2008 18:33:28 UTC-5, EricD wrote:
>
> Hi all, 
>
> once again I'm having some difficulties while trying to use tessdll 
> from C#. 
>
> I dont know if im doing the marshaling correctly. Let me present the 
> code for my wrapper before I start explaining my problem: 
> <CODE> 
> using System; 
> using System.Collections.Generic; 
> using System.Text; 
> using System.Drawing; 
> using System.Runtime.InteropServices; 
> using System.Drawing.Imaging; 
> using System.IO; 
>
> [StructLayout(LayoutKind.Sequential)] 
> public struct EANYCODE_CHAR                 /*single character */ 
> { 
>     UInt16 char_code;              /*character itself */ 
>     Int16 left;                    /*of char (-1) */ 
>     Int16 right;                   /*of char (-1) */ 
>     Int16 top;                     /*of char (-1) */ 
>     Int16 bottom;                  /*of char (-1) */ 
>     Int16 font_index;              /*what font (0) */ 
>     byte confidence;              /*0=perfect, 100=reject (0/100) */ 
>     byte point_size;              /*of char, 72=i inch, (10) */ 
>     sbyte blanks;                   /*no of spaces before this char 
> (1) */ 
>     byte formatting;              /*char formatting (0) */ 
>
> }                  /*single character */ 
>
> [StructLayout(LayoutKind.Sequential)] 
> public struct ETEXT_DESC     /*output header */ 
> { 
>   Int16 count;                   /*chars in this buffer(0) */ 
>   Int16 progress;                /*percent complete increasing (0-100) 
> */ 
>   sbyte more_to_come;             /*true if not last */ 
>   sbyte ocr_alive;                /*ocr sets to 1, HP 0 */ 
>   sbyte err_code;                 /*for errcode use */ 
>   unsafe void* cancel;            /*returns true to cancel */ 
>   unsafe void* cancel_this;             /*this or other data for 
> cancel*/ 
>   Int32 end_time;              /*time to stop if not 0*/ 
>   EANYCODE_CHAR text;         /*character data */ 
> } 
>
> namespace testingWrapper 
> { 
>     class WrapperTesseract 
>     { 
>         [DllImport("tessdll.dll")] 
>         private unsafe static extern int 
> TessDllBeginPageUpright(UInt32 xsize, UInt32 ysize, byte* buf, string 
> lang); 
>
>         [DllImport("tessdll.dll")] 
>         private unsafe static extern IntPtr 
> TessDllRecognize_all_Words(); 
>
>         private static byte[] 
> ConvertImageToByteArray(System.Drawing.Image imageToConvert, 
> ImageFormat formatOfImage) 
>         { 
>             byte[] Ret; 
>             using (MemoryStream ms = new MemoryStream()) 
>             { 
>                 imageToConvert.Save(ms, formatOfImage); 
>                 Ret = ms.ToArray(); 
>             } 
>             return Ret; 
>         } 
>
>         public static void recognizeChars(System.Drawing.Bitmap bmp, 
> string language) 
>         { 
>             //This is a test image to see if i have some output as the 
> dlltest 
>             bmp = new Bitmap("test5.tif"); 
>
>             //Convertion from image to a byte array 
>             byte[] byteBmp = ConvertImageToByteArray(bmp, 
> bmp.RawFormat); 
>
>             unsafe 
>             { 
>                 //Convertion from a byte array to a pointer 
>                 fixed (byte* imgPtr = &byteBmp[0]) 
>                 { 
>
>                     BitmapData bmd1 = bmp.LockBits(new Rectangle(0, 0, 
> bmp.Size.Width, bmp.Size.Height), 
> System.Drawing.Imaging.ImageLockMode.ReadWrite, bmp.PixelFormat); 
>                     ETEXT_DESC d = new ETEXT_DESC(); 
>
>                     TessDllBeginPageUpright((uint)bmp.Width, 
> (uint)bmp.Height, imgPtr, "eng"); //(void*)bmd1.Scan0.ToPointer(), 
> "eng"); 
>
>                     IntPtr data = TessDllRecognize_all_Words(); 
>
>                     d = (ETEXT_DESC)(Marshal.PtrToStructure(data, 
> typeof(ETEXT_DESC))); 
>                     data = IntPtr.Zero; 
>
>
>                 } 
>             } 
>         } 
>     } 
> } 
>
> </CODE> 
>
> Alright so what i've done is to redefine the ETEXT_DESC as a managed 
> struct (and i've also specify [StructLayout(LayoutKind.Sequential)] to 
> order the struct as i defined it) but the difference is that it 
> contains a EANYCODE_CHAR text an not an array (i can't seem to find 
> how to do it without having a AccessViolation crash). 
>
> And i had to find a way to convert a C# image into a "unsigned char*", 
> so i thought that a byte* would be my best candidate. But i'm not sure 
> if i've done it correctly. 
>
> Right now the results between dlltess and my wrapper for the same tiff 
> is different and the text variable never seem correct. 
>
>
> Anyone could tell me what i'm doing wrong? 
>
> If i manage to write the code for this wrapper, ill post the solution 
> and the .dll on the group. 
>
>
> Thanks for any help. 
>
>  

> Regards,

George 

-- 
You received this message because you are subscribed to the Google
Groups "tesseract-ocr" group.
To post to this group, send email to tesseract-ocr@googlegroups.com
To unsubscribe from this group, send email to
tesseract-ocr+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/tesseract-ocr?hl=en

Reply via email to