RozeFound/screen_ocr.py

## screen_ocr.py
from jeepney import DBusAddress, new_method_call
from jeepney.bus_messages import message_bus, MatchRule
from jeepney.io.blocking import open_dbus_connection, Proxy

from urllib.parse import unquote
from pathlib import Path
from tempfile import mkstemp

import pyocr, io, subprocess, numpy, cv2, screeninfo, time
from PIL import Image

def take_screenshot() -> bytes:

    portal = DBusAddress('/org/freedesktop/portal/desktop',
            bus_name='org.freedesktop.portal.Desktop',
            interface='org.freedesktop.portal.Screenshot')

    connection = open_dbus_connection()
    token = "python_screen_ocr"
    sender_name = connection.unique_name[1:].replace('.', '_')
    handle = f"/org/freedesktop/portal/desktop/request/{sender_name}/{token}"

    response_rule = MatchRule(type='signal', interface='org.freedesktop.portal.Request', path=handle)
    Proxy(message_bus, connection).AddMatch(response_rule)

    with connection.filter(response_rule) as responses:
        body = ('', {'handle_token': ('s', token), 'interactive': ('b', True)})
        request = new_method_call(portal, 'Screenshot', 'sa{sv}', body)
        connection.send_and_get_reply(request)
        response_message = connection.recv_until_filtered(responses)

    response, results = response_message.body
    if response != 0: raise RuntimeError()
    result_uri = results['uri'][1].split('file://', 1)[-1]

    file = Path(unquote(result_uri))
    image = file.read_bytes(); file.unlink()

    return image

def gnome_screenshot() -> bytes:

    portal = DBusAddress('/org/gnome/Shell/Screenshot',
            bus_name='org.gnome.Shell.Screenshot',
            interface='org.gnome.Shell.Screenshot')

    connection = open_dbus_connection()

    request = new_method_call(portal, 'SelectArea')
    result = connection.send_and_get_reply(request)

    if result.body[0] == "Operation was cancelled":
        raise RuntimeError
    else: x, y, width, height = result.body

    _, filename = mkstemp(); file = Path(filename)

    body = (x, y, width, height, True, filename)
    request = new_method_call(portal, 'ScreenshotArea', 'iiiibs', body)
    result = connection.send_and_get_reply(request)

    image = file.read_bytes(); file.unlink()

    return image

def get_monitor_info():

    monitor = screeninfo.get_monitors()[0]
    width = monitor.width
    height = monitor.height
    x = monitor.x
    y = monitor.y

    return x, y, width, height

def gnome_freeze_screenshot() -> bytes:

    portal = DBusAddress('/org/gnome/Shell/Screenshot',
            bus_name='org.gnome.Shell.Screenshot',
            interface='org.gnome.Shell.Screenshot')

    connection = open_dbus_connection()

    _, filename = mkstemp()

    body = (*get_monitor_info(), False, filename)
    request = new_method_call(portal, 'ScreenshotArea', 'iiiibs', body)
    result = connection.send_and_get_reply(request)

    eog = subprocess.Popen(["eog", filename, "--fullscreen"])
    time.sleep(0.5)

    request = new_method_call(portal, 'SelectArea')
    result = connection.send_and_get_reply(request)

    eog.kill()

    if result.body[0] != "Operation was cancelled":

        file = Path(filename)
        np_array = numpy.frombuffer(file.read_bytes(), numpy.uint8)
        image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)

        s_x, s_y, width, height = result.body
        m_x, m_y, _, _ = get_monitor_info()
        d_x, d_y = s_x - m_x, s_y - m_y

        _, new_image = cv2.imencode('.png', image[d_y:d_y+height, d_x:d_x+width])
        return new_image

    else: raise RuntimeError

def image_to_text(image: Image) -> str:

    tool = pyocr.get_available_tools()[0]
    print(f"{tool.get_name()} will be used for text recognition.")
    text = tool.image_to_string(image, "jpn_best+eng_best")

    return text

def copy_to_clipboard(text: str) -> None:

    cmd=f"wl-copy '{text.strip()}'"
    subprocess.check_call(cmd, shell=True)

def copy_from_clipboard() -> bytes:

    reslut = subprocess.run("wl-paste", capture_output=True, shell=True)
    if reslut.returncode != 0: raise RuntimeError
    buffer = reslut.stdout

    np_array = numpy.frombuffer(buffer, numpy.uint8)
    image = cv2.imdecode(np_array, cv2.IMREAD_GRAYSCALE)

    if image.size > 0: return buffer
    else: raise RuntimeError

def main() -> int:

    data = copy_from_clipboard()
    handle = io.BytesIO(data)
    image = Image.open(handle)
    text = image_to_text(image)
    copy_to_clipboard(text)

    return 0


if __name__ == "__main__":
    try: main()
    except KeyboardInterrupt:
        print("Operation aborted by user.")
        exit(-1)
    except RuntimeError:
        print("Something goes wrong.")
        exit(-1)
	from jeepney import DBusAddress, new_method_call
	from jeepney.bus_messages import message_bus, MatchRule
	from jeepney.io.blocking import open_dbus_connection, Proxy

	from urllib.parse import unquote
	from pathlib import Path
	from tempfile import mkstemp

	import pyocr, io, subprocess, numpy, cv2, screeninfo, time
	from PIL import Image

	def take_screenshot() -> bytes:

	portal = DBusAddress('/org/freedesktop/portal/desktop',
	bus_name='org.freedesktop.portal.Desktop',
	interface='org.freedesktop.portal.Screenshot')

	connection = open_dbus_connection()
	token = "python_screen_ocr"
	sender_name = connection.unique_name[1:].replace('.', '_')
	handle = f"/org/freedesktop/portal/desktop/request/{sender_name}/{token}"

	response_rule = MatchRule(type='signal', interface='org.freedesktop.portal.Request', path=handle)
	Proxy(message_bus, connection).AddMatch(response_rule)

	with connection.filter(response_rule) as responses:
	body = ('', {'handle_token': ('s', token), 'interactive': ('b', True)})
	request = new_method_call(portal, 'Screenshot', 'sa{sv}', body)
	connection.send_and_get_reply(request)
	response_message = connection.recv_until_filtered(responses)

	response, results = response_message.body
	if response != 0: raise RuntimeError()
	result_uri = results['uri'][1].split('file://', 1)[-1]

	file = Path(unquote(result_uri))
	image = file.read_bytes(); file.unlink()

	return image

	def gnome_screenshot() -> bytes:

	portal = DBusAddress('/org/gnome/Shell/Screenshot',
	bus_name='org.gnome.Shell.Screenshot',
	interface='org.gnome.Shell.Screenshot')

	connection = open_dbus_connection()

	request = new_method_call(portal, 'SelectArea')
	result = connection.send_and_get_reply(request)

	if result.body[0] == "Operation was cancelled":
	raise RuntimeError
	else: x, y, width, height = result.body

	_, filename = mkstemp(); file = Path(filename)

	body = (x, y, width, height, True, filename)
	request = new_method_call(portal, 'ScreenshotArea', 'iiiibs', body)
	result = connection.send_and_get_reply(request)

	image = file.read_bytes(); file.unlink()

	return image

	def get_monitor_info():

	monitor = screeninfo.get_monitors()[0]
	width = monitor.width
	height = monitor.height
	x = monitor.x
	y = monitor.y

	return x, y, width, height

	def gnome_freeze_screenshot() -> bytes:

	portal = DBusAddress('/org/gnome/Shell/Screenshot',
	bus_name='org.gnome.Shell.Screenshot',
	interface='org.gnome.Shell.Screenshot')

	connection = open_dbus_connection()

	_, filename = mkstemp()

	body = (*get_monitor_info(), False, filename)
	request = new_method_call(portal, 'ScreenshotArea', 'iiiibs', body)
	result = connection.send_and_get_reply(request)

	eog = subprocess.Popen(["eog", filename, "--fullscreen"])
	time.sleep(0.5)

	request = new_method_call(portal, 'SelectArea')
	result = connection.send_and_get_reply(request)

	eog.kill()

	if result.body[0] != "Operation was cancelled":

	file = Path(filename)
	np_array = numpy.frombuffer(file.read_bytes(), numpy.uint8)
	image = cv2.imdecode(np_array, cv2.IMREAD_COLOR)

	s_x, s_y, width, height = result.body
	m_x, m_y, _, _ = get_monitor_info()
	d_x, d_y = s_x - m_x, s_y - m_y

	_, new_image = cv2.imencode('.png', image[d_y:d_y+height, d_x:d_x+width])
	return new_image

	else: raise RuntimeError

	def image_to_text(image: Image) -> str:

	tool = pyocr.get_available_tools()[0]
	print(f"{tool.get_name()} will be used for text recognition.")
	text = tool.image_to_string(image, "jpn_best+eng_best")

	return text

	def copy_to_clipboard(text: str) -> None:

	cmd=f"wl-copy '{text.strip()}'"
	subprocess.check_call(cmd, shell=True)

	def copy_from_clipboard() -> bytes:

	reslut = subprocess.run("wl-paste", capture_output=True, shell=True)
	if reslut.returncode != 0: raise RuntimeError
	buffer = reslut.stdout

	np_array = numpy.frombuffer(buffer, numpy.uint8)
	image = cv2.imdecode(np_array, cv2.IMREAD_GRAYSCALE)

	if image.size > 0: return buffer
	else: raise RuntimeError

	def main() -> int:

	data = copy_from_clipboard()
	handle = io.BytesIO(data)
	image = Image.open(handle)
	text = image_to_text(image)
	copy_to_clipboard(text)

	return 0


	if __name__ == "__main__":
	try: main()
	except KeyboardInterrupt:
	print("Operation aborted by user.")
	exit(-1)
	except RuntimeError:
	print("Something goes wrong.")
	exit(-1)
No results found