telugu-boy/noisy_ocr.py

## noisy_ocr.py
# April 12, 2024

from typing import Dict, Optional, Tuple
from pytesseract import Output, pytesseract
import io
import cv2
import numpy as np
import imutils
from imutils.perspective import four_point_transform
import urllib.request

def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized

def preprocess_image(image: cv2.typing.MatLike) -> cv2.typing.MatLike:
    """Preprocess the image for OCR."""

    rgb_planes = cv2.split(image)

    result_planes = []
    result_norm_planes = []
    for plane in rgb_planes:
        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
        bg_img = cv2.medianBlur(dilated_img, 21)
        diff_img = 255 - cv2.absdiff(plane, bg_img)
        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
        result_planes.append(diff_img)
        result_norm_planes.append(norm_img)

    result = cv2.merge(result_planes)
    result_norm = cv2.merge(result_norm_planes)
    image = result_norm

    # Resize the image
    rescaled_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

    # Convert the image to grayscale
    gray_image = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding
    _, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY)
    #binary_image = gray_image

    # Denoise the image using morphological operations (opening)
    #kernel = np.ones((2,2),np.uint8)
    #denoised_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)
    denoised_image = binary_image

    # Adjust contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_adjusted_image = clahe.apply(denoised_image)
    #contrast_adjusted_image = denoised_image
    inverted_image = cv2.bitwise_not(contrast_adjusted_image)

    processed_img = inverted_image

    return processed_img

def ocr_image(image_stream: io.BytesIO, return_img = None) -> Tuple[Dict[str, str], Optional[cv2.typing.MatLike]]:
    """OCR the image data and return the result as JSON."""

    image_stream.seek(0)
    img_np = np.frombuffer(image_stream.read(), dtype=np.uint8)
    img = cv2.imdecode(img_np, cv2.IMREAD_COLOR)
    processed_img = preprocess_image(img)
    data = pytesseract.image_to_data(processed_img, output_type=Output.DICT, config='--psm 4')

    return data, image_resize(processed_img, height=500) if return_img else None

if __name__ == "__main__":
    headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
    assim_url = "https://cdn.discordapp.com/attachments/1005603724066029610/1228232883349749800/image.png?ex=662b4c02&is=6618d702&hm=19529ffce15930f392286f13a1f2921d253904af4de019c6bbf0c2c9d938de3d&"
    request = urllib.request.Request(assim_url, headers=headers)
    with urllib.request.urlopen(request) as f:
        # Read the response data
        image_data = f.read()
    data, img = ocr_image(io.BytesIO(image_data), True)
    print(' '.join(data['text']))
    cv2.imshow("img", img)
    cv2.waitKey(0)
	# April 12, 2024

	from typing import Dict, Optional, Tuple
	from pytesseract import Output, pytesseract
	import io
	import cv2
	import numpy as np
	import imutils
	from imutils.perspective import four_point_transform
	import urllib.request

	def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
	# initialize the dimensions of the image to be resized and
	# grab the image size
	dim = None
	(h, w) = image.shape[:2]

	# if both the width and height are None, then return the
	# original image
	if width is None and height is None:
	return image

	# check to see if the width is None
	if width is None:
	# calculate the ratio of the height and construct the
	# dimensions
	r = height / float(h)
	dim = (int(w * r), height)

	# otherwise, the height is None
	else:
	# calculate the ratio of the width and construct the
	# dimensions
	r = width / float(w)
	dim = (width, int(h * r))

	# resize the image
	resized = cv2.resize(image, dim, interpolation = inter)

	# return the resized image
	return resized

	def preprocess_image(image: cv2.typing.MatLike) -> cv2.typing.MatLike:
	"""Preprocess the image for OCR."""

	rgb_planes = cv2.split(image)

	result_planes = []
	result_norm_planes = []
	for plane in rgb_planes:
	dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
	bg_img = cv2.medianBlur(dilated_img, 21)
	diff_img = 255 - cv2.absdiff(plane, bg_img)
	norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
	result_planes.append(diff_img)
	result_norm_planes.append(norm_img)

	result = cv2.merge(result_planes)
	result_norm = cv2.merge(result_norm_planes)
	image = result_norm

	# Resize the image
	rescaled_image = cv2.resize(image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

	# Convert the image to grayscale
	gray_image = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)

	# Apply thresholding
	_, binary_image = cv2.threshold(gray_image, 150, 255, cv2.THRESH_BINARY)
	#binary_image = gray_image

	# Denoise the image using morphological operations (opening)
	#kernel = np.ones((2,2),np.uint8)
	#denoised_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)
	denoised_image = binary_image

	# Adjust contrast
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	contrast_adjusted_image = clahe.apply(denoised_image)
	#contrast_adjusted_image = denoised_image
	inverted_image = cv2.bitwise_not(contrast_adjusted_image)

	processed_img = inverted_image

	return processed_img

	def ocr_image(image_stream: io.BytesIO, return_img = None) -> Tuple[Dict[str, str], Optional[cv2.typing.MatLike]]:
	"""OCR the image data and return the result as JSON."""

	image_stream.seek(0)
	img_np = np.frombuffer(image_stream.read(), dtype=np.uint8)
	img = cv2.imdecode(img_np, cv2.IMREAD_COLOR)
	processed_img = preprocess_image(img)
	data = pytesseract.image_to_data(processed_img, output_type=Output.DICT, config='--psm 4')

	return data, image_resize(processed_img, height=500) if return_img else None

	if __name__ == "__main__":
	headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'}
	assim_url = "https://cdn.discordapp.com/attachments/1005603724066029610/1228232883349749800/image.png?ex=662b4c02&is=6618d702&hm=19529ffce15930f392286f13a1f2921d253904af4de019c6bbf0c2c9d938de3d&"
	request = urllib.request.Request(assim_url, headers=headers)
	with urllib.request.urlopen(request) as f:
	# Read the response data
	image_data = f.read()
	data, img = ocr_image(io.BytesIO(image_data), True)
	print(' '.join(data['text']))
	cv2.imshow("img", img)
	cv2.waitKey(0)
No results found