Skip to content

Instantly share code, notes, and snippets.

@reekystive
Last active August 26, 2024 10:18
Show Gist options
  • Select an option

  • Save reekystive/a9a2bdc42bfd9e1356ad181e14e70c44 to your computer and use it in GitHub Desktop.

Select an option

Save reekystive/a9a2bdc42bfd9e1356ad181e14e70c44 to your computer and use it in GitHub Desktop.
Convert unicode range to actual character range (Kotlin & Swift & TypeScript)
import android.icu.text.BreakIterator
fun convertUnicodeRangeToCharRange(str: String, unicodeRange: IntRange): IntRange? {
val charIterator = BreakIterator.getCharacterInstance()
charIterator.setText(str)
var charStart = -1
var charEnd = -1
var currentUnicodeIndex = 0
var charIndex = 0
while (charIterator.next() != BreakIterator.DONE) {
if (currentUnicodeIndex == unicodeRange.first) {
charStart = charIndex
}
if (currentUnicodeIndex == unicodeRange.last + 1) {
charEnd = charIndex
break
}
currentUnicodeIndex += charIterator.current() - charIterator.previous()
charIterator.next()
charIndex++
}
if (charEnd == -1) {
charEnd = charIndex
}
if (charStart == -1) {
return null
}
return IntRange(charStart, charEnd - 1)
}
fun main() {
val str = "πŸ™‚β€β†•οΈ hello world"
val unicodeRange = 0..6
val charRange = convertUnicodeRangeToCharRange(str, unicodeRange)
println("Converted character range: $charRange")
if (charRange != null) {
println("Substring: ${str.substring(charRange)}")
// Output: 0..2
}
}
import Foundation
func convertUnicodeRangeToCharRange(_ str: String, unicodeRange: NSRange) -> Range<String.Index>? {
guard
let start16 = str.utf16.index(
str.utf16.startIndex, offsetBy: unicodeRange.location, limitedBy: str.utf16.endIndex),
let end16 = str.utf16.index(
start16, offsetBy: unicodeRange.length, limitedBy: str.utf16.endIndex),
let start = String.Index(start16, within: str),
let end = String.Index(end16, within: str)
else {
return nil
}
return start..<end
}
let str = "πŸ™‚β€β†•οΈ hello world"
let unicodeRange = NSRange(location: 0, length: 6)
if let charRange = convertUnicodeRangeToCharRange(str, unicodeRange: unicodeRange) {
print("Character range: \"\(charRange)\"")
// Output: Character range: "πŸ™‚β€β†•οΈ "
} else {
print("Invalid range")
}
function convertUnicodeRangeToCharRange(str: string, unicodeRange: { start: number; end: number }) {
const segmenter = new Intl.Segmenter();
const segments = Array.from(segmenter.segment(str));
let charStart = 0;
let charEnd = 0;
let currentUnicodeIndex = 0;
for (const [index, segment] of segments.entries()) {
if (currentUnicodeIndex === unicodeRange.start) {
charStart = index;
}
if (currentUnicodeIndex === unicodeRange.end) {
charEnd = index;
break;
}
currentUnicodeIndex += segment.segment.length;
}
if (charEnd === 0 && currentUnicodeIndex === unicodeRange.end) {
charEnd = segments.length;
}
return { start: charStart, end: charEnd };
}
const str1 = 'πŸ™‚β€β†•οΈ hello world';
const unicodeRange = { start: 0, end: 6 };
const charRange = convertUnicodeRangeToCharRange(str1, unicodeRange);
console.log(charRange);
// Output: { start: 0, end: 2 }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment