Created
October 16, 2025 14:54
-
-
Save hikaMaeng/94867c9a854afc4106f01cde4271292c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package kore.ocr | |
| import java.awt.image.BufferedImage | |
| import java.awt.image.ConvolveOp | |
| import java.awt.image.Kernel | |
| import kotlin.math.pow | |
| internal fun preprocessImage(originalImage:BufferedImage):BufferedImage{ | |
| val grayscale:BufferedImage = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY) | |
| val g = grayscale.createGraphics() | |
| g.drawImage(originalImage, 0, 0, null) | |
| g.dispose() | |
| val gamma = 1.2 | |
| val raster = grayscale.raster | |
| for (y in 0 until grayscale.height) { | |
| for (x in 0 until grayscale.width) { | |
| val pixel = raster.getSample(x, y, 0) | |
| val corrected = ((255.0 * (pixel / 255.0).pow(1.0 / gamma)).toInt()).coerceIn(0, 255) | |
| raster.setSample(x, y, 0, corrected) | |
| } | |
| } | |
| val sharpenKernel = floatArrayOf( | |
| 0f, -1f, 0f, | |
| -1f, 5f, -1f, | |
| 0f, -1f, 0f | |
| ) | |
| val kernel = Kernel(3, 3, sharpenKernel) | |
| val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null) | |
| val sharpened = convolveOp.filter(grayscale, null) | |
| val binaryImage = otsuThreshold(sharpened) | |
| grayscale.flush() | |
| sharpened.flush() | |
| return binaryImage | |
| } | |
| internal fun otsuThreshold(image:BufferedImage): BufferedImage { | |
| require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) { | |
| "이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다." | |
| } | |
| val width = image.width | |
| val height = image.height | |
| val histogram = IntArray(256) | |
| val raster = image.raster | |
| for (y in 0 until height) { | |
| for (x in 0 until width) { | |
| val pixel = raster.getSample(x, y, 0) | |
| histogram[pixel]++ | |
| } | |
| } | |
| val totalPixels = width * height | |
| var sumAll = 0.0 | |
| for (i in 0..255) { | |
| sumAll += i * histogram[i] | |
| } | |
| var sumBackground = 0.0 | |
| var wBackground = 0 | |
| var wForeground: Int | |
| var maxVariance = 0.0 | |
| var threshold = 0 | |
| for (t in 0..255) { | |
| wBackground += histogram[t] | |
| if (wBackground == 0) continue | |
| wForeground = totalPixels - wBackground | |
| if (wForeground == 0) break | |
| sumBackground += (t * histogram[t]).toDouble() | |
| val meanBackground = sumBackground / wBackground | |
| val meanForeground = (sumAll - sumBackground) / wForeground | |
| val betweenVariance = wBackground.toDouble() * wForeground.toDouble() * | |
| (meanBackground - meanForeground) * (meanBackground - meanForeground) | |
| if (betweenVariance > maxVariance) { | |
| maxVariance = betweenVariance | |
| threshold = t | |
| } | |
| } | |
| val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY) | |
| val binaryRaster = binary.raster | |
| for (y in 0 until height) { | |
| for (x in 0 until width) { | |
| val pixel = raster.getSample(x, y, 0) | |
| val newVal = if (pixel > threshold) 255 else 0 | |
| binaryRaster.setSample(x, y, 0, newVal) | |
| } | |
| } | |
| return binary | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package kore.ocr | |
| import kore.vo.VO | |
| import kore.vo.field.value.float | |
| import kore.vo.field.value.int | |
| import kore.vo.field.value.string | |
| import kore.vo.field.voList | |
| import net.sourceforge.tess4j.Tesseract | |
| import java.awt.image.BufferedImage | |
| import java.io.File | |
| class Tess internal constructor(path:String, lang:String) { | |
| class OCRList:VO(){ | |
| var width by int(0) | |
| var height by int(0) | |
| val list by voList(::OCRWord){ | |
| default{arrayListOf()} | |
| } | |
| } | |
| class OCRWord:VO(){ | |
| var index by int | |
| var char by string | |
| var x by int | |
| var y by int | |
| var width by int | |
| var height by int | |
| var data by string("") | |
| var accuracy by float(0f) | |
| } | |
| enum class OCRLevel{ | |
| BLOCK, PARA, TEXTLINE, WORD, SYMBOL | |
| } | |
| companion object{ | |
| private var dataPath:String? = null | |
| private fun prepareData(lang:String):String = dataPath ?: run { | |
| val tempDir = System.getProperty("user.home") + "/koreTessData" | |
| val dir = File(tempDir) | |
| if(!dir.exists()) dir.mkdirs() | |
| lang.split("+").forEach { | |
| val f = "$it.traineddata".trim() | |
| val targetFile = File(dir, f) | |
| if(!targetFile.exists()) { | |
| Tess::class.java.getResourceAsStream("/$f")?.use {input-> | |
| targetFile.outputStream().use {output-> | |
| input.copyTo(output) | |
| } | |
| } ?: throw IllegalArgumentException("Tessdata file not found for language: $f") | |
| } | |
| } | |
| dir.absolutePath.also{dataPath = it} | |
| } | |
| operator fun invoke(lang:String = "eng+kor"):Tess = Tess(prepareData(lang), lang) | |
| } | |
| private val tess = Tesseract().also{ | |
| it.setDatapath(path) | |
| it.setLanguage(lang) | |
| } | |
| fun extractText(image:BufferedImage, isPreprocess:Boolean = true):String { | |
| val img = if(isPreprocess) preprocessImage(image) else image | |
| val s = tess.doOCR(img) | |
| img.flush() | |
| if(!isPreprocess) image.flush() | |
| return s | |
| } | |
| fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL, isPreprocess:Boolean = true):OCRList{ | |
| var i = 0 | |
| val img = if(isPreprocess) preprocessImage(image) else image | |
| val list = tess.getWords(img, level.ordinal).fold(OCRList()){acc, it-> | |
| val text = it.text.trim() | |
| if(it.text.isNotEmpty()) { | |
| val boundingBox = it.boundingBox | |
| println("OCR: $text, $boundingBox") | |
| acc.list.add(OCRWord().apply { | |
| index = i++ | |
| char = text | |
| x = boundingBox.x | |
| y = boundingBox.y | |
| width = boundingBox.width | |
| height = boundingBox.height | |
| accuracy = it.confidence | |
| }) | |
| } | |
| acc | |
| } | |
| img.flush() | |
| if(!isPreprocess) image.flush() | |
| return list | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment