Somberor/server.py

## server.py
"""
GPU OCR Server - FastAPI + EasyOCR

Run on a GPU server (RTX 3090, 4090, etc.) to provide fast OCR processing.

Usage:
    pip install fastapi uvicorn easyocr pillow python-multipart
    python server.py

The server will listen on port 8765 by default.
"""

import io
import time
import asyncio
from typing import List, Optional
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor

import easyocr
import numpy as np
from PIL import Image
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
import uvicorn

app = FastAPI(title="GPU OCR Server", version="1.0.0")

# Global OCR reader (loaded once at startup)
ocr_reader: Optional[easyocr.Reader] = None

# Thread pool for running blocking OCR calls
# High default - GPU will naturally throttle if saturated
# Only reduce if you see OOM errors (unlikely with 20GB VRAM)
OCR_WORKERS = 50
ocr_executor: Optional[ThreadPoolExecutor] = None


@app.on_event("startup")
async def load_model():
    """Load EasyOCR model on startup."""
    global ocr_reader, ocr_executor
    print("[OCR Server] Loading EasyOCR model with GPU...")
    start = time.time()
    ocr_reader = easyocr.Reader(['en'], gpu=True, verbose=False)
    ocr_executor = ThreadPoolExecutor(max_workers=OCR_WORKERS)
    print(f"[OCR Server] Model loaded in {time.time() - start:.1f}s (workers={OCR_WORKERS})")


@app.get("/health")
async def health_check():
    """Health check endpoint."""
    return {
        "status": "ok",
        "gpu_loaded": ocr_reader is not None,
        "workers": OCR_WORKERS
    }


def _run_ocr_sync(img_np: np.ndarray):
    """Synchronous OCR function to run in thread pool."""
    return ocr_reader.readtext(img_np)


@app.post("/ocr")
async def process_ocr(file: UploadFile = File(...)):
    """
    Process an image and return OCR results.

    Returns list of detected text regions with:
    - text: The detected text
    - confidence: Detection confidence (0-1)
    - bbox: Bounding box [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
    - center: Center point (x, y)
    """
    if ocr_reader is None:
        raise HTTPException(status_code=503, detail="OCR model not loaded")

    start_time = time.time()

    # Read image
    try:
        contents = await file.read()
        image = Image.open(io.BytesIO(contents))
        img_np = np.array(image)
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Invalid image: {e}")

    read_time = time.time()

    # Run OCR in thread pool to avoid blocking the event loop
    # This allows multiple OCR requests to run concurrently
    loop = asyncio.get_event_loop()
    results = await loop.run_in_executor(ocr_executor, _run_ocr_sync, img_np)

    ocr_time = time.time()

    # Format results
    formatted_results = []
    for (bbox, text, confidence) in results:
        x1, y1 = int(bbox[0][0]), int(bbox[0][1])
        x2, y2 = int(bbox[2][0]), int(bbox[2][1])
        center_x = (x1 + x2) // 2
        center_y = (y1 + y2) // 2

        formatted_results.append({
            "text": text,
            "confidence": float(confidence),
            "bbox": [[int(p[0]), int(p[1])] for p in bbox],
            "center": [center_x, center_y]
        })

    total_time = time.time() - start_time

    return {
        "results": formatted_results,
        "timing": {
            "read_ms": int((read_time - start_time) * 1000),
            "ocr_ms": int((ocr_time - read_time) * 1000),
            "total_ms": int(total_time * 1000)
        },
        "count": len(formatted_results)
    }


@app.post("/ocr/find")
async def find_text(
    file: UploadFile = File(...),
    search: str = "",
    exact: bool = False
):
    """
    Find specific text in an image.

    Args:
        file: Image file
        search: Text to search for
        exact: If True, match exactly; if False, partial match

    Returns:
        Found text with position, or null if not found
    """
    if ocr_reader is None:
        raise HTTPException(status_code=503, detail="OCR model not loaded")

    if not search:
        raise HTTPException(status_code=400, detail="search parameter required")

    start_time = time.time()

    # Read image
    try:
        contents = await file.read()
        image = Image.open(io.BytesIO(contents))
        img_np = np.array(image)
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Invalid image: {e}")

    # Run OCR in thread pool to avoid blocking the event loop
    loop = asyncio.get_event_loop()
    results = await loop.run_in_executor(ocr_executor, _run_ocr_sync, img_np)

    # Search for text
    search_lower = search.lower().strip()
    found = None

    for (bbox, text, confidence) in results:
        text_lower = text.lower().strip()

        if exact:
            match = (text_lower == search_lower)
        else:
            match = (search_lower in text_lower)

        if match:
            x1, y1 = int(bbox[0][0]), int(bbox[0][1])
            x2, y2 = int(bbox[2][0]), int(bbox[2][1])
            found = {
                "text": text,
                "confidence": float(confidence),
                "center": [(x1 + x2) // 2, (y1 + y2) // 2],
                "bbox": [[int(p[0]), int(p[1])] for p in bbox]
            }
            break

    return {
        "found": found,
        "search": search,
        "exact": exact,
        "timing_ms": int((time.time() - start_time) * 1000)
    }


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="GPU OCR Server")
    parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
    parser.add_argument("--port", type=int, default=8765, help="Port to bind to")
    parser.add_argument("--workers", type=int, default=50, help="Number of concurrent OCR workers (default: 50)")
    args = parser.parse_args()

    # Update worker count from command line (module-level assignment)
    OCR_WORKERS = args.workers

    print(f"[OCR Server] Starting on {args.host}:{args.port} with {OCR_WORKERS} workers")
    uvicorn.run(app, host=args.host, port=args.port)
	"""
	GPU OCR Server - FastAPI + EasyOCR

	Run on a GPU server (RTX 3090, 4090, etc.) to provide fast OCR processing.

	Usage:
	pip install fastapi uvicorn easyocr pillow python-multipart
	python server.py

	The server will listen on port 8765 by default.
	"""

	import io
	import time
	import asyncio
	from typing import List, Optional
	from dataclasses import dataclass
	from concurrent.futures import ThreadPoolExecutor

	import easyocr
	import numpy as np
	from PIL import Image
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import JSONResponse
	import uvicorn

	app = FastAPI(title="GPU OCR Server", version="1.0.0")

	# Global OCR reader (loaded once at startup)
	ocr_reader: Optional[easyocr.Reader] = None

	# Thread pool for running blocking OCR calls
	# High default - GPU will naturally throttle if saturated
	# Only reduce if you see OOM errors (unlikely with 20GB VRAM)
	OCR_WORKERS = 50
	ocr_executor: Optional[ThreadPoolExecutor] = None


	@app.on_event("startup")
	async def load_model():
	"""Load EasyOCR model on startup."""
	global ocr_reader, ocr_executor
	print("[OCR Server] Loading EasyOCR model with GPU...")
	start = time.time()
	ocr_reader = easyocr.Reader(['en'], gpu=True, verbose=False)
	ocr_executor = ThreadPoolExecutor(max_workers=OCR_WORKERS)
	print(f"[OCR Server] Model loaded in {time.time() - start:.1f}s (workers={OCR_WORKERS})")


	@app.get("/health")
	async def health_check():
	"""Health check endpoint."""
	return {
	"status": "ok",
	"gpu_loaded": ocr_reader is not None,
	"workers": OCR_WORKERS
	}


	def _run_ocr_sync(img_np: np.ndarray):
	"""Synchronous OCR function to run in thread pool."""
	return ocr_reader.readtext(img_np)


	@app.post("/ocr")
	async def process_ocr(file: UploadFile = File(...)):
	"""
	Process an image and return OCR results.

	Returns list of detected text regions with:
	- text: The detected text
	- confidence: Detection confidence (0-1)
	- bbox: Bounding box [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
	- center: Center point (x, y)
	"""
	if ocr_reader is None:
	raise HTTPException(status_code=503, detail="OCR model not loaded")

	start_time = time.time()

	# Read image
	try:
	contents = await file.read()
	image = Image.open(io.BytesIO(contents))
	img_np = np.array(image)
	except Exception as e:
	raise HTTPException(status_code=400, detail=f"Invalid image: {e}")

	read_time = time.time()

	# Run OCR in thread pool to avoid blocking the event loop
	# This allows multiple OCR requests to run concurrently
	loop = asyncio.get_event_loop()
	results = await loop.run_in_executor(ocr_executor, _run_ocr_sync, img_np)

	ocr_time = time.time()

	# Format results
	formatted_results = []
	for (bbox, text, confidence) in results:
	x1, y1 = int(bbox[0][0]), int(bbox[0][1])
	x2, y2 = int(bbox[2][0]), int(bbox[2][1])
	center_x = (x1 + x2) // 2
	center_y = (y1 + y2) // 2

	formatted_results.append({
	"text": text,
	"confidence": float(confidence),
	"bbox": [[int(p[0]), int(p[1])] for p in bbox],
	"center": [center_x, center_y]
	})

	total_time = time.time() - start_time

	return {
	"results": formatted_results,
	"timing": {
	"read_ms": int((read_time - start_time) * 1000),
	"ocr_ms": int((ocr_time - read_time) * 1000),
	"total_ms": int(total_time * 1000)
	},
	"count": len(formatted_results)
	}


	@app.post("/ocr/find")
	async def find_text(
	file: UploadFile = File(...),
	search: str = "",
	exact: bool = False
	):
	"""
	Find specific text in an image.

	Args:
	file: Image file
	search: Text to search for
	exact: If True, match exactly; if False, partial match

	Returns:
	Found text with position, or null if not found
	"""
	if ocr_reader is None:
	raise HTTPException(status_code=503, detail="OCR model not loaded")

	if not search:
	raise HTTPException(status_code=400, detail="search parameter required")

	start_time = time.time()

	# Read image
	try:
	contents = await file.read()
	image = Image.open(io.BytesIO(contents))
	img_np = np.array(image)
	except Exception as e:
	raise HTTPException(status_code=400, detail=f"Invalid image: {e}")

	# Run OCR in thread pool to avoid blocking the event loop
	loop = asyncio.get_event_loop()
	results = await loop.run_in_executor(ocr_executor, _run_ocr_sync, img_np)

	# Search for text
	search_lower = search.lower().strip()
	found = None

	for (bbox, text, confidence) in results:
	text_lower = text.lower().strip()

	if exact:
	match = (text_lower == search_lower)
	else:
	match = (search_lower in text_lower)

	if match:
	x1, y1 = int(bbox[0][0]), int(bbox[0][1])
	x2, y2 = int(bbox[2][0]), int(bbox[2][1])
	found = {
	"text": text,
	"confidence": float(confidence),
	"center": [(x1 + x2) // 2, (y1 + y2) // 2],
	"bbox": [[int(p[0]), int(p[1])] for p in bbox]
	}
	break

	return {
	"found": found,
	"search": search,
	"exact": exact,
	"timing_ms": int((time.time() - start_time) * 1000)
	}


	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser(description="GPU OCR Server")
	parser.add_argument("--host", default="0.0.0.0", help="Host to bind to")
	parser.add_argument("--port", type=int, default=8765, help="Port to bind to")
	parser.add_argument("--workers", type=int, default=50, help="Number of concurrent OCR workers (default: 50)")
	args = parser.parse_args()

	# Update worker count from command line (module-level assignment)
	OCR_WORKERS = args.workers

	print(f"[OCR Server] Starting on {args.host}:{args.port} with {OCR_WORKERS} workers")
	uvicorn.run(app, host=args.host, port=args.port)
No results found