[web2py] Re: Validator not saving images
Like I said I didn't test it :) apparently you need to seek to zero after tesseract and not after image.open as tesseract is reading it. Fixed version (which I actually tested): # -*- coding: utf-8 -*- import pytesseract from PIL import Image from pydal.validators import Validator, ValidationError class IMG_HAS_TEXT(Validator): def __init__(self, check_tokens=None, error_message="Image doesn't have the required text"): self.error_message = error_message self.check_tokens = check_tokens or [] def validate(self, value, record_id=None): try: image = Image.open(value.file) text = pytesseract.image_to_string(image).lower() value.file.seek(0) if not text or not all(token in text for token in self.check_tokens): raise ValidationError(self.translator(self.error_message)) return value except Exception as e: raise ValidationError(self.translator(self.error_message)) class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): def __init__(self, error_message="The uploaded file is not a receipt or invoice!"): self.error_message = error_message self.check_tokens = ("receipt", "invoice") A sábado, 18 de março de 2023 à(s) 07:20:04 UTC, mostwanted escreveu: > Hey Leonel, thanks for your reply but the above code is still saving > empty (0byte) images > > On Friday, March 17, 2023 at 5:22:07 PM UTC+2 Leonel Câmara wrote: > >> What's happening here is that your validator is reading the file and it's >> not "rewinding" it afterwards so when the next step gets it the file >> descriptor is at the end of the file hence the ' bytes size. >> >> I would recommend rewriting it to something like this (the important part >> being the value.file.seek(0)) note that I haven't tested this code at all. >> >> import pytesseract >> from PIL import Image >> from pydal.validators import Validator, ValidationError >> >> class IMG_HAS_TEXT(Validator): >> >> def __init__(self, check_tokens=None, error_message="Image doesn't >> have the required text"): >> self.error_message = error_message >> self.check_tokens = check_tokens or [] >> >> def validate(self, value, record_id=None): >> try: >> image = Image.open(value.file) >> value.file.seek(0) >> text = pytesseract.image_to_string(image).lower() >> if not text or not all(token in text for token in >> self.check_tokens): >> raise ValidationError(self.translator(self.error_message)) >> return value >> except: >> raise ValidationError(self.translator(self.error_message)) >> >> class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): >> >> def __init__(self, error_message="The uploaded file is not a receipt >> or invoice!"): >> self.error_message = error_message >> self.check_tokens = ("receipt", "invoice") >> >> >> A terça-feira, 14 de março de 2023 à(s) 20:55:11 UTC, mostwanted escreveu: >> >>> I have a little problem with my validator code, its saving 0 byte images >>> and I just don't know why, the value variable is an image that actually >>> contains an image, I have tested it several times to see what it contains >>> and its an image but for some reason when it comes to saving it in the >>> database it saves an empty image, please assist me where I could be going >>> wrong. Regards >>> >>> from gluon import * >>> import pytesseract >>> from PIL import Image >>> >>> class IS_RECEIPT_OR_INVOICE(object): >>> def __init__(self, error_message="The uploaded file is not a receipt >>> or invoice!"): >>> self.error_message = error_message >>> >>> def __call__(self, value): >>> error = None >>> try: >>> image = Image.open(value.file) >>> text = pytesseract.image_to_string(image) >>> if "receipt" not in text.lower() and "invoice" not in >>> text.lower() and "RECEIPT" not in text and "INVOICE" not in text: >>> error = self.error_message >>> except: >>> error = self.error_message >>> return (value, error) >> >> -- Resources: - http://web2py.com - http://web2py.com/book (Documentation) - http://github.com/web2py/web2py (Source code) - https://code.google.com/p/web2py/issues/list (Report Issues) --- You received this message because you are subscribed to the Google Groups "web2py-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to web2py+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/web2py/eb4345c4-46c1-40b6-ae9c-86a033444b24n%40googlegroups.com.
[web2py] Re: Validator not saving images
Hey Leonel, thanks for your reply but the above code is still saving empty (0byte) images On Friday, March 17, 2023 at 5:22:07 PM UTC+2 Leonel Câmara wrote: > What's happening here is that your validator is reading the file and it's > not "rewinding" it afterwards so when the next step gets it the file > descriptor is at the end of the file hence the ' bytes size. > > I would recommend rewriting it to something like this (the important part > being the value.file.seek(0)) note that I haven't tested this code at all. > > import pytesseract > from PIL import Image > from pydal.validators import Validator, ValidationError > > class IMG_HAS_TEXT(Validator): > > def __init__(self, check_tokens=None, error_message="Image doesn't > have the required text"): > self.error_message = error_message > self.check_tokens = check_tokens or [] > > def validate(self, value, record_id=None): > try: > image = Image.open(value.file) > value.file.seek(0) > text = pytesseract.image_to_string(image).lower() > if not text or not all(token in text for token in > self.check_tokens): > raise ValidationError(self.translator(self.error_message)) > return value > except: > raise ValidationError(self.translator(self.error_message)) > > class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): > > def __init__(self, error_message="The uploaded file is not a receipt > or invoice!"): > self.error_message = error_message > self.check_tokens = ("receipt", "invoice") > > > A terça-feira, 14 de março de 2023 à(s) 20:55:11 UTC, mostwanted escreveu: > >> I have a little problem with my validator code, its saving 0 byte images >> and I just don't know why, the value variable is an image that actually >> contains an image, I have tested it several times to see what it contains >> and its an image but for some reason when it comes to saving it in the >> database it saves an empty image, please assist me where I could be going >> wrong. Regards >> >> from gluon import * >> import pytesseract >> from PIL import Image >> >> class IS_RECEIPT_OR_INVOICE(object): >> def __init__(self, error_message="The uploaded file is not a receipt >> or invoice!"): >> self.error_message = error_message >> >> def __call__(self, value): >> error = None >> try: >> image = Image.open(value.file) >> text = pytesseract.image_to_string(image) >> if "receipt" not in text.lower() and "invoice" not in >> text.lower() and "RECEIPT" not in text and "INVOICE" not in text: >> error = self.error_message >> except: >> error = self.error_message >> return (value, error) > > -- Resources: - http://web2py.com - http://web2py.com/book (Documentation) - http://github.com/web2py/web2py (Source code) - https://code.google.com/p/web2py/issues/list (Report Issues) --- You received this message because you are subscribed to the Google Groups "web2py-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to web2py+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/web2py/d9fab1d2-83df-4f28-9179-61dadbbba347n%40googlegroups.com.
[web2py] Re: Validator not saving images
What's happening here is that your validator is reading the file and it's not "rewinding" it afterwards so when the next step gets it the file descriptor is at the end of the file hence the ' bytes size. I would recommend rewriting it to something like this (the important part being the value.file.seek(0)) note that I haven't tested this code at all. import pytesseract from PIL import Image from pydal.validators import Validator, ValidationError class IMG_HAS_TEXT(Validator): def __init__(self, check_tokens=None, error_message="Image doesn't have the required text"): self.error_message = error_message self.check_tokens = check_tokens or [] def validate(self, value, record_id=None): try: image = Image.open(value.file) value.file.seek(0) text = pytesseract.image_to_string(image).lower() if not text or not all(token in text for token in self.check_tokens): raise ValidationError(self.translator(self.error_message)) return value except: raise ValidationError(self.translator(self.error_message)) class IS_RECEIPT_OR_INVOICE(IMG_HAS_TEXT): def __init__(self, error_message="The uploaded file is not a receipt or invoice!"): self.error_message = error_message self.check_tokens = ("receipt", "invoice") A terça-feira, 14 de março de 2023 à(s) 20:55:11 UTC, mostwanted escreveu: > I have a little problem with my validator code, its saving 0 byte images > and I just don't know why, the value variable is an image that actually > contains an image, I have tested it several times to see what it contains > and its an image but for some reason when it comes to saving it in the > database it saves an empty image, please assist me where I could be going > wrong. Regards > > from gluon import * > import pytesseract > from PIL import Image > > class IS_RECEIPT_OR_INVOICE(object): > def __init__(self, error_message="The uploaded file is not a receipt > or invoice!"): > self.error_message = error_message > > def __call__(self, value): > error = None > try: > image = Image.open(value.file) > text = pytesseract.image_to_string(image) > if "receipt" not in text.lower() and "invoice" not in > text.lower() and "RECEIPT" not in text and "INVOICE" not in text: > error = self.error_message > except: > error = self.error_message > return (value, error) -- Resources: - http://web2py.com - http://web2py.com/book (Documentation) - http://github.com/web2py/web2py (Source code) - https://code.google.com/p/web2py/issues/list (Report Issues) --- You received this message because you are subscribed to the Google Groups "web2py-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to web2py+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/web2py/b073dbe7-8b70-4fea-8b54-da5de54d216cn%40googlegroups.com.