Skip to content

Instantly share code, notes, and snippets.

@hikaMaeng
Created October 16, 2025 14:54
Show Gist options
  • Select an option

  • Save hikaMaeng/94867c9a854afc4106f01cde4271292c to your computer and use it in GitHub Desktop.

Select an option

Save hikaMaeng/94867c9a854afc4106f01cde4271292c to your computer and use it in GitHub Desktop.
package kore.ocr
import java.awt.image.BufferedImage
import java.awt.image.ConvolveOp
import java.awt.image.Kernel
import kotlin.math.pow
internal fun preprocessImage(originalImage:BufferedImage):BufferedImage{
val grayscale:BufferedImage = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
val g = grayscale.createGraphics()
g.drawImage(originalImage, 0, 0, null)
g.dispose()
val gamma = 1.2
val raster = grayscale.raster
for (y in 0 until grayscale.height) {
for (x in 0 until grayscale.width) {
val pixel = raster.getSample(x, y, 0)
val corrected = ((255.0 * (pixel / 255.0).pow(1.0 / gamma)).toInt()).coerceIn(0, 255)
raster.setSample(x, y, 0, corrected)
}
}
val sharpenKernel = floatArrayOf(
0f, -1f, 0f,
-1f, 5f, -1f,
0f, -1f, 0f
)
val kernel = Kernel(3, 3, sharpenKernel)
val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
val sharpened = convolveOp.filter(grayscale, null)
val binaryImage = otsuThreshold(sharpened)
grayscale.flush()
sharpened.flush()
return binaryImage
}
internal fun otsuThreshold(image:BufferedImage): BufferedImage {
require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) {
"이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
}
val width = image.width
val height = image.height
val histogram = IntArray(256)
val raster = image.raster
for (y in 0 until height) {
for (x in 0 until width) {
val pixel = raster.getSample(x, y, 0)
histogram[pixel]++
}
}
val totalPixels = width * height
var sumAll = 0.0
for (i in 0..255) {
sumAll += i * histogram[i]
}
var sumBackground = 0.0
var wBackground = 0
var wForeground: Int
var maxVariance = 0.0
var threshold = 0
for (t in 0..255) {
wBackground += histogram[t]
if (wBackground == 0) continue
wForeground = totalPixels - wBackground
if (wForeground == 0) break
sumBackground += (t * histogram[t]).toDouble()
val meanBackground = sumBackground / wBackground
val meanForeground = (sumAll - sumBackground) / wForeground
val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
(meanBackground - meanForeground) * (meanBackground - meanForeground)
if (betweenVariance > maxVariance) {
maxVariance = betweenVariance
threshold = t
}
}
val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
val binaryRaster = binary.raster
for (y in 0 until height) {
for (x in 0 until width) {
val pixel = raster.getSample(x, y, 0)
val newVal = if (pixel > threshold) 255 else 0
binaryRaster.setSample(x, y, 0, newVal)
}
}
return binary
}
package kore.ocr
import kore.vo.VO
import kore.vo.field.value.float
import kore.vo.field.value.int
import kore.vo.field.value.string
import kore.vo.field.voList
import net.sourceforge.tess4j.Tesseract
import java.awt.image.BufferedImage
import java.io.File
class Tess internal constructor(path:String, lang:String) {
class OCRList:VO(){
var width by int(0)
var height by int(0)
val list by voList(::OCRWord){
default{arrayListOf()}
}
}
class OCRWord:VO(){
var index by int
var char by string
var x by int
var y by int
var width by int
var height by int
var data by string("")
var accuracy by float(0f)
}
enum class OCRLevel{
BLOCK, PARA, TEXTLINE, WORD, SYMBOL
}
companion object{
private var dataPath:String? = null
private fun prepareData(lang:String):String = dataPath ?: run {
val tempDir = System.getProperty("user.home") + "/koreTessData"
val dir = File(tempDir)
if(!dir.exists()) dir.mkdirs()
lang.split("+").forEach {
val f = "$it.traineddata".trim()
val targetFile = File(dir, f)
if(!targetFile.exists()) {
Tess::class.java.getResourceAsStream("/$f")?.use {input->
targetFile.outputStream().use {output->
input.copyTo(output)
}
} ?: throw IllegalArgumentException("Tessdata file not found for language: $f")
}
}
dir.absolutePath.also{dataPath = it}
}
operator fun invoke(lang:String = "eng+kor"):Tess = Tess(prepareData(lang), lang)
}
private val tess = Tesseract().also{
it.setDatapath(path)
it.setLanguage(lang)
}
fun extractText(image:BufferedImage, isPreprocess:Boolean = true):String {
val img = if(isPreprocess) preprocessImage(image) else image
val s = tess.doOCR(img)
img.flush()
if(!isPreprocess) image.flush()
return s
}
fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL, isPreprocess:Boolean = true):OCRList{
var i = 0
val img = if(isPreprocess) preprocessImage(image) else image
val list = tess.getWords(img, level.ordinal).fold(OCRList()){acc, it->
val text = it.text.trim()
if(it.text.isNotEmpty()) {
val boundingBox = it.boundingBox
println("OCR: $text, $boundingBox")
acc.list.add(OCRWord().apply {
index = i++
char = text
x = boundingBox.x
y = boundingBox.y
width = boundingBox.width
height = boundingBox.height
accuracy = it.confidence
})
}
acc
}
img.flush()
if(!isPreprocess) image.flush()
return list
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment