hikaMaeng/Tess.kt

## imageProcess.kt
package kore.ocr

import java.awt.image.BufferedImage
import java.awt.image.ConvolveOp
import java.awt.image.Kernel
import kotlin.math.pow

internal fun preprocessImage(originalImage:BufferedImage):BufferedImage{
    val grayscale:BufferedImage = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
    val g = grayscale.createGraphics()
    g.drawImage(originalImage, 0, 0, null)
    g.dispose()
    val gamma = 1.2
    val raster = grayscale.raster
    for (y in 0 until grayscale.height) {
        for (x in 0 until grayscale.width) {
            val pixel = raster.getSample(x, y, 0)
            val corrected = ((255.0 * (pixel / 255.0).pow(1.0 / gamma)).toInt()).coerceIn(0, 255)
            raster.setSample(x, y, 0, corrected)
        }
    }
    val sharpenKernel = floatArrayOf(
        0f,  -1f,  0f,
        -1f,  5f, -1f,
        0f,  -1f,  0f
    )
    val kernel = Kernel(3, 3, sharpenKernel)
    val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
    val sharpened = convolveOp.filter(grayscale, null)
    val binaryImage = otsuThreshold(sharpened)
    grayscale.flush()
    sharpened.flush()
    return binaryImage
}
internal fun otsuThreshold(image:BufferedImage): BufferedImage {
    require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) {
        "이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
    }
    val width = image.width
    val height = image.height
    val histogram = IntArray(256)
    val raster = image.raster
    for (y in 0 until height) {
        for (x in 0 until width) {
            val pixel = raster.getSample(x, y, 0)
            histogram[pixel]++
        }
    }
    val totalPixels = width * height
    var sumAll = 0.0
    for (i in 0..255) {
        sumAll += i * histogram[i]
    }
    var sumBackground = 0.0
    var wBackground = 0
    var wForeground: Int
    var maxVariance = 0.0
    var threshold = 0

    for (t in 0..255) {
        wBackground += histogram[t]
        if (wBackground == 0) continue
        wForeground = totalPixels - wBackground
        if (wForeground == 0) break

        sumBackground += (t * histogram[t]).toDouble()
        val meanBackground = sumBackground / wBackground
        val meanForeground = (sumAll - sumBackground) / wForeground
        val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
                (meanBackground - meanForeground) * (meanBackground - meanForeground)
        if (betweenVariance > maxVariance) {
            maxVariance = betweenVariance
            threshold = t
        }
    }
    val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
    val binaryRaster = binary.raster
    for (y in 0 until height) {
        for (x in 0 until width) {
            val pixel = raster.getSample(x, y, 0)
            val newVal = if (pixel > threshold) 255 else 0
            binaryRaster.setSample(x, y, 0, newVal)
        }
    }
    return binary
}

## Tess.kt
package kore.ocr

import kore.vo.VO
import kore.vo.field.value.float
import kore.vo.field.value.int
import kore.vo.field.value.string
import kore.vo.field.voList
import net.sourceforge.tess4j.Tesseract
import java.awt.image.BufferedImage
import java.io.File

class Tess internal constructor(path:String, lang:String) {
    class OCRList:VO(){
        var width by int(0)
        var height by int(0)
        val list by voList(::OCRWord){
            default{arrayListOf()}
        }
    }
    class OCRWord:VO(){
        var index by int
        var char by string
        var x by int
        var y by int
        var width by int
        var height by int
        var data by string("")
        var accuracy by float(0f)
    }
    enum class OCRLevel{
        BLOCK, PARA, TEXTLINE, WORD, SYMBOL
    }
    companion object{
        private var dataPath:String? = null
        private fun prepareData(lang:String):String = dataPath ?: run {
            val tempDir = System.getProperty("user.home") + "/koreTessData"
            val dir = File(tempDir)
            if(!dir.exists()) dir.mkdirs()
            lang.split("+").forEach {
                val f = "$it.traineddata".trim()
                val targetFile = File(dir, f)
                if(!targetFile.exists()) {
                    Tess::class.java.getResourceAsStream("/$f")?.use {input->
                        targetFile.outputStream().use {output->
                            input.copyTo(output)
                        }
                    } ?: throw IllegalArgumentException("Tessdata file not found for language: $f")
                }
            }
            dir.absolutePath.also{dataPath = it}
        }
        operator fun invoke(lang:String = "eng+kor"):Tess = Tess(prepareData(lang), lang)
    }
    private val tess = Tesseract().also{
        it.setDatapath(path)
        it.setLanguage(lang)
    }
    fun extractText(image:BufferedImage, isPreprocess:Boolean = true):String {
        val img = if(isPreprocess) preprocessImage(image) else image
        val s = tess.doOCR(img)
        img.flush()
        if(!isPreprocess) image.flush()
        return s
    }
    fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL, isPreprocess:Boolean = true):OCRList{
        var i = 0
        val img = if(isPreprocess) preprocessImage(image) else image
        val list = tess.getWords(img, level.ordinal).fold(OCRList()){acc, it->
            val text = it.text.trim()
            if(it.text.isNotEmpty()) {
                val boundingBox = it.boundingBox
                println("OCR: $text, $boundingBox")
                acc.list.add(OCRWord().apply {
                    index = i++
                    char = text
                    x = boundingBox.x
                    y = boundingBox.y
                    width = boundingBox.width
                    height = boundingBox.height
                    accuracy = it.confidence
                })
            }
            acc
        }
        img.flush()
        if(!isPreprocess) image.flush()
        return list
    }
}
	package kore.ocr

	import java.awt.image.BufferedImage
	import java.awt.image.ConvolveOp
	import java.awt.image.Kernel
	import kotlin.math.pow

	internal fun preprocessImage(originalImage:BufferedImage):BufferedImage{
	val grayscale:BufferedImage = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
	val g = grayscale.createGraphics()
	g.drawImage(originalImage, 0, 0, null)
	g.dispose()
	val gamma = 1.2
	val raster = grayscale.raster
	for (y in 0 until grayscale.height) {
	for (x in 0 until grayscale.width) {
	val pixel = raster.getSample(x, y, 0)
	val corrected = ((255.0 * (pixel / 255.0).pow(1.0 / gamma)).toInt()).coerceIn(0, 255)
	raster.setSample(x, y, 0, corrected)
	}
	}
	val sharpenKernel = floatArrayOf(
	0f, -1f, 0f,
	-1f, 5f, -1f,
	0f, -1f, 0f
	)
	val kernel = Kernel(3, 3, sharpenKernel)
	val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
	val sharpened = convolveOp.filter(grayscale, null)
	val binaryImage = otsuThreshold(sharpened)
	grayscale.flush()
	sharpened.flush()
	return binaryImage
	}
	internal fun otsuThreshold(image:BufferedImage): BufferedImage {
	require(image.type == BufferedImage.TYPE_BYTE_GRAY \|\| image.type == BufferedImage.TYPE_BYTE_BINARY) {
	"이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
	}
	val width = image.width
	val height = image.height
	val histogram = IntArray(256)
	val raster = image.raster
	for (y in 0 until height) {
	for (x in 0 until width) {
	val pixel = raster.getSample(x, y, 0)
	histogram[pixel]++
	}
	}
	val totalPixels = width * height
	var sumAll = 0.0
	for (i in 0..255) {
	sumAll += i * histogram[i]
	}
	var sumBackground = 0.0
	var wBackground = 0
	var wForeground: Int
	var maxVariance = 0.0
	var threshold = 0

	for (t in 0..255) {
	wBackground += histogram[t]
	if (wBackground == 0) continue
	wForeground = totalPixels - wBackground
	if (wForeground == 0) break

	sumBackground += (t * histogram[t]).toDouble()
	val meanBackground = sumBackground / wBackground
	val meanForeground = (sumAll - sumBackground) / wForeground
	val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
	(meanBackground - meanForeground) * (meanBackground - meanForeground)
	if (betweenVariance > maxVariance) {
	maxVariance = betweenVariance
	threshold = t
	}
	}
	val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
	val binaryRaster = binary.raster
	for (y in 0 until height) {
	for (x in 0 until width) {
	val pixel = raster.getSample(x, y, 0)
	val newVal = if (pixel > threshold) 255 else 0
	binaryRaster.setSample(x, y, 0, newVal)
	}
	}
	return binary
	}
	package kore.ocr

	import kore.vo.VO
	import kore.vo.field.value.float
	import kore.vo.field.value.int
	import kore.vo.field.value.string
	import kore.vo.field.voList
	import net.sourceforge.tess4j.Tesseract
	import java.awt.image.BufferedImage
	import java.io.File

	class Tess internal constructor(path:String, lang:String) {
	class OCRList:VO(){
	var width by int(0)
	var height by int(0)
	val list by voList(::OCRWord){
	default{arrayListOf()}
	}
	}
	class OCRWord:VO(){
	var index by int
	var char by string
	var x by int
	var y by int
	var width by int
	var height by int
	var data by string("")
	var accuracy by float(0f)
	}
	enum class OCRLevel{
	BLOCK, PARA, TEXTLINE, WORD, SYMBOL
	}
	companion object{
	private var dataPath:String? = null
	private fun prepareData(lang:String):String = dataPath ?: run {
	val tempDir = System.getProperty("user.home") + "/koreTessData"
	val dir = File(tempDir)
	if(!dir.exists()) dir.mkdirs()
	lang.split("+").forEach {
	val f = "$it.traineddata".trim()
	val targetFile = File(dir, f)
	if(!targetFile.exists()) {
	Tess::class.java.getResourceAsStream("/$f")?.use {input->
	targetFile.outputStream().use {output->
	input.copyTo(output)
	}
	} ?: throw IllegalArgumentException("Tessdata file not found for language: $f")
	}
	}
	dir.absolutePath.also{dataPath = it}
	}
	operator fun invoke(lang:String = "eng+kor"):Tess = Tess(prepareData(lang), lang)
	}
	private val tess = Tesseract().also{
	it.setDatapath(path)
	it.setLanguage(lang)
	}
	fun extractText(image:BufferedImage, isPreprocess:Boolean = true):String {
	val img = if(isPreprocess) preprocessImage(image) else image
	val s = tess.doOCR(img)
	img.flush()
	if(!isPreprocess) image.flush()
	return s
	}
	fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL, isPreprocess:Boolean = true):OCRList{
	var i = 0
	val img = if(isPreprocess) preprocessImage(image) else image
	val list = tess.getWords(img, level.ordinal).fold(OCRList()){acc, it->
	val text = it.text.trim()
	if(it.text.isNotEmpty()) {
	val boundingBox = it.boundingBox
	println("OCR: $text, $boundingBox")
	acc.list.add(OCRWord().apply {
	index = i++
	char = text
	x = boundingBox.x
	y = boundingBox.y
	width = boundingBox.width
	height = boundingBox.height
	accuracy = it.confidence
	})
	}
	acc
	}
	img.flush()
	if(!isPreprocess) image.flush()
	return list
	}
	}