Like I said I didn't test it :) apparently you need to seek to zero after
tesseract and not after image.open as tesseract is reading it. Fixed
version (which I actually tested):
# -*- coding: utf-8 -*-
import pytesseract
from PIL import Image
from pydal.validators import Validator, ValidationError
class IMG_HAS_TEXT(Validator):
def __init__(self, check_tokens=None, error_message="Image doesn't have
the required text"):
self.error_message = error_message
self.check_tokens = check_tokens or []
def validate(self, value, record_id=None):
try:
image = Image.open(value.file)
text = pytesseract.image_to_string(image).lower()
value.file.seek(0)
if not text or not all(token in text for token in
self.check_tokens):
raise ValidationError(self.translator(self.error_message))
return value
except Exception as e:
raise ValidationError(self.translator(self.error_message))
class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT):
def __init__(self, error_message="The uploaded file is not a receipt or
invoice!"):
self.error_message = error_message
self.check_tokens = ("receipt", "invoice")
A sábado, 18 de março de 2023 à(s) 07:20:04 UTC, mostwanted escreveu:
> Hey Leonel, thanks for your reply but the above code is still saving
> empty (0byte) images
>
> On Friday, March 17, 2023 at 5:22:07 PM UTC+2 Leonel Câmara wrote:
>
>> What's happening here is that your validator is reading the file and it's
>> not "rewinding" it afterwards so when the next step gets it the file
>> descriptor is at the end of the file hence the ' bytes size.
>>
>> I would recommend rewriting it to something like this (the important part
>> being the value.file.seek(0)) note that I haven't tested this code at all.
>>
>> import pytesseract
>> from PIL import Image
>> from pydal.validators import Validator, ValidationError
>>
>> class IMG_HAS_TEXT(Validator):
>>
>> def __init__(self, check_tokens=None, error_message="Image doesn't
>> have the required text"):
>> self.error_message = error_message
>> self.check_tokens = check_tokens or []
>>
>> def validate(self, value, record_id=None):
>> try:
>> image = Image.open(value.file)
>> value.file.seek(0)
>> text = pytesseract.image_to_string(image).lower()
>> if not text or not all(token in text for token in
>> self.check_tokens):
>> raise ValidationError(self.translator(self.error_message))
>> return value
>> except:
>> raise ValidationError(self.translator(self.error_message))
>>
>> class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT):
>>
>> def __init__(self, error_message="The uploaded file is not a receipt
>> or invoice!"):
>> self.error_message = error_message
>> self.check_tokens = ("receipt", "invoice")
>>
>>
>> A terça-feira, 14 de março de 2023 à(s) 20:55:11 UTC, mostwanted escreveu:
>>
>>> I have a little problem with my validator code, its saving 0 byte images
>>> and I just don't know why, the value variable is an image that actually
>>> contains an image, I have tested it several times to see what it contains
>>> and its an image but for some reason when it comes to saving it in the
>>> database it saves an empty image, please assist me where I could be going
>>> wrong. Regards
>>>
>>> from gluon import *
>>> import pytesseract
>>> from PIL import Image
>>>
>>> class IS_RECEIPT_OR_INVOICE(object):
>>> def __init__(self, error_message="The uploaded file is not a receipt
>>> or invoice!"):
>>> self.error_message = error_message
>>>
>>> def __call__(self, value):
>>> error = None
>>> try:
>>> image = Image.open(value.file)
>>> text = pytesseract.image_to_string(image)
>>> if "receipt" not in text.lower() and "invoice" not in
>>> text.lower() and "RECEIPT" not in text and "INVOICE" not in text:
>>> error = self.error_message
>>> except:
>>> error = self.error_message
>>> return (value, error)
>>
>>
--
Resources:
- http://web2py.com
- http://web2py.com/book (Documentation)
- http://github.com/web2py/web2py (Source code)
- https://code.google.com/p/web2py/issues/list (Report Issues)
---
You received this message because you are subscribed to the Google Groups
"web2py-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/web2py/eb4345c4-46c1-40b6-ae9c-86a033444b24n%40googlegroups.com.