-
-
Save christianroman/8485394 to your computer and use it in GitHub Desktop.
| import tornado.ioloop | |
| import tornado.web | |
| import urllib2 as urllib | |
| from PIL import Image | |
| from cStringIO import StringIO | |
| import numpy as np | |
| import tesserwrap | |
| import cv2 | |
| class MainHandler(tornado.web.RequestHandler): | |
| def get(self): | |
| # Obtenemos el captcha | |
| url = "http://consultas.curp.gob.mx/CurpSP/imagenCatcha" | |
| file = StringIO(urllib.urlopen(url).read()) | |
| original = Image.open(file) | |
| # Convertimos formato PIL a CV2 | |
| cv_img = np.asarray(original)[:,:,::].copy() | |
| # Convertimos imagen a scala de grises. | |
| gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) | |
| # Aplicamos filtro Canny para eliminar lineas. | |
| edges = cv2.Canny(gray, 60, 200, apertureSize = 3) | |
| # Obtenemos las lineas. | |
| lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 1, None, 0, 0) | |
| # Dibujamos las lineas encontradas en color blanco. | |
| for x1, y1, x2, y2 in lines[0]: | |
| cv2.line(cv_img, (x1, y1), (x2, y2), (255,255,255 ), 2) | |
| # Creamos una copia de nuestra imagen limpia sin lineas. | |
| processed = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) | |
| # Aplicamos un desenfoque gaussiano. | |
| blur = cv2.GaussianBlur(processed, (3, 3), 0) | |
| # Aplicamos threshold. | |
| threshold = cv2.threshold(blur, 128, 255, cv2.THRESH_BINARY)[1] | |
| # Aplicamos transformación morfologica. | |
| kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6, 6)) | |
| morph = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, kernel) | |
| # Convertimos nuestra imagen final procesada a PIL. | |
| pil_img = Image.fromarray(morph) | |
| # Iniciamos tesseract y leemos la imagen. | |
| tesseract = tesserwrap.tesseract() | |
| tesseract.set_variable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") | |
| tesseract.set_page_seg_mode(8) | |
| text = tesseract.ocr_image(pil_img) | |
| self.write(text.strip()) | |
| application = tornado.web.Application([ | |
| (r"/", MainHandler), | |
| ]) | |
| if __name__ == "__main__": | |
| application.listen(8888) | |
| tornado.ioloop.IOLoop.instance().start() |
Que galán te viste!
Hola, me podrías ayudar?, estoy tratando de instalar tesserwrap pero no me deja:
λ python server.py
Traceback (most recent call last):
File "server.py", line 10, in
import tesserwrap
ImportError: No module named tesserwrap
λ pip install tesserwrap
Collecting tesserwrap
Using cached https://files.pythonhosted.org/packages/04/92/4c2134fc465d576c05d4426bc2f1ba7871652d78d3d913bec0bffe0afe8b/tesserwrap-0.1.6.tar.gz
Complete output from command python setup.py egg_info:
"ld" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.
"ld" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.
Traceback (most recent call last):
File "", line 1, in
File "c:\users\usuario\appdata\local\temp\pip-install-xcu3ya\tesserwrap\setup.py", line 45, in
extra_lib_paths)
File "c:\users\usuario\appdata\local\temp\pip-install-xcu3ya\tesserwrap\setup.py", line 30, in find_closest_libname
"Cannot find Tesseract via ldconfig, confirm it is installed.")
Exception: Cannot find Tesseract via ldconfig, confirm it is installed.
----------------------------------------
Command "python setup.py egg_info" failed with error code 1 in c:\users\usuario\appdata\local\temp\pip-install-xcu3ya\tesserwrap\
Thanks, this helped me a lot! :-)