annibal/bidi-tests.js

## bidi-tests.js
// Example usage:
console.log(testWidthReductionTechniques("Hello World"));
console.log(runWidthTests());

function testWidthReductionTechniques(inputText) {
  // Store original for comparison
  const original = inputText;

  // Key directional control characters
  const RLM = "\u200F"; // Right-to-Left Mark
  const LRM = "\u200E"; // Left-to-Right Mark
  const RLO = "\u202E"; // Right-to-Left Override
  const LRO = "\u202D"; // Left-to-Right Override
  const PDF = "\u202C"; // Pop Directional Formatting
  const RLI = "\u2067"; // Right-to-Left Isolate
  const LRI = "\u2066"; // Left-to-Right Isolate
  const FSI = "\u2068"; // First Strong Isolate
  const PDI = "\u2069"; // Pop Directional Isolate

  // Key combining characters
  const CGJ = "\u034F"; // Combining Grapheme Joiner
  const ZWJ = "\u200D"; // Zero Width Joiner
  const ZWNJ = "\u200C"; // Zero Width Non-Joiner
  const COMB_DIAERESIS = "\u0308"; // Combining Diaeresis
  const COMB_BREVE = "\u0306"; // Combining Breve
  const COMB_DBL_BREVE = "\u035C"; // Combining Double Breve Below
  const COMB_OVERLINE = "\u0305"; // Combining Overline
  const COMB_MACRON = "\u0304"; // Combining Macron

  // Techniques to try
  const techniques = [
    {
      name: "RTL Mark + Combining Joiner",
      transform: (text) => {
        let result = "";
        for (let i = 0; i < text.length; i++) {
          result += text[i] + RLM + CGJ;
        }
        return result;
      },
    },
    {
      name: "RTL Override Pairs",
      transform: (text) => {
        let result = "";
        for (let i = 0; i < text.length; i += 2) {
          if (i + 1 < text.length) {
            // Pair characters with RTL override
            result += text[i] + RLO + text[i + 1] + PDF;
          } else {
            result += text[i];
          }
        }
        return result;
      },
    },
    {
      name: "Combining Marks Compression",
      transform: (text) => {
        let result = "";
        for (let i = 0; i < text.length; i++) {
          // Add combining mark to potentially compress
          result += text[i] + COMB_BREVE;
        }
        return result;
      },
    },
    {
      name: "RTL Isolate with Combining",
      transform: (text) => {
        let result = "";
        for (let i = 0; i < text.length; i += 3) {
          if (i + 2 < text.length) {
            // Group three chars with RTL isolate
            result += RLI + text[i] + text[i + 1] + text[i + 2] + PDI + COMB_MACRON;
          } else if (i + 1 < text.length) {
            result += RLI + text[i] + text[i + 1] + PDI;
          } else {
            result += text[i];
          }
        }
        return result;
      },
    },
    {
      name: "ZWJ Character Fusion",
      transform: (text) => {
        let result = "";
        for (let i = 0; i < text.length; i++) {
          if (i > 0) {
            // Try to fuse characters with ZWJ
            result += ZWJ;
          }
          result += text[i];
        }
        return result;
      },
    },
    {
      name: "Bidirectional Layered Approach",
      transform: (text) => {
        // Create layers of bidirectional controls
        let result = RLI;
        for (let i = 0; i < text.length; i++) {
          if (i % 2 === 0) {
            result += LRI + text[i] + PDI;
          } else {
            result += RLI + text[i] + PDI;
          }
          // Add combining mark every other character
          if (i % 2 === 1) {
            result += COMB_DBL_BREVE;
          }
        }
        result += PDI;
        return result;
      },
    },
  ];

  // Apply each technique and collect results
  const results = techniques.map((technique) => {
    const transformed = technique.transform(inputText);
    return {
      technique: technique.name,
      original: inputText,
      transformed: transformed,
      originalLength: inputText.length,
      transformedLength: transformed.length,
      // Code points for debugging
      originalCodePoints: [...inputText].map((c) => c.codePointAt(0).toString(16).padStart(4, "0")).join(" "),
      transformedCodePoints: [...transformed].map((c) => c.codePointAt(0).toString(16).padStart(4, "0")).join(" "),
    };
  });

  // Format results for display
  let output = "=== Width Reduction Techniques Test ===\n\n";
  output += `Original text: "${original}" (${original.length} chars)\n\n`;

  results.forEach((result) => {
    output += `Technique: ${result.technique}\n`;
    output += `Transformed: "${result.transformed}" (${result.transformedLength} chars)\n`;
    output += `Original code points: ${result.originalCodePoints}\n`;
    output += `Transformed code points: ${result.transformedCodePoints}\n\n`;
  });

  return output;
}

// Test with various strings
function runWidthTests() {
  const testStrings = ["Hello", "MMMMMM", "iiiiii", "Mixed width chars", "👨‍👩‍👧‍👦 Family emoji"];

  let allResults = "";

  testStrings.forEach((str) => {
    allResults += testWidthReductionTechniques(str);
    allResults += "\n----------------------------\n\n";
  });

  return allResults;
}

// Function to measure visual width in browser (if available)
function measureVisualWidth(text, fontStyle = "16px Arial") {
  // Check if we're in a browser environment
  if (typeof document === "undefined") {
    return { text, fontStyle, note: "Cannot measure width (not in browser environment)" };
  }

  // Create canvas for measurement
  const canvas = document.createElement("canvas");
  const context = canvas.getContext("2d");
  context.font = fontStyle;

  // Measure text
  const metrics = context.measureText(text);

  return {
    text,
    fontStyle,
    width: metrics.width,
    pixelsPerChar: metrics.width / text.length,
  };
}

## intToUnicodeHexEscape.js
function intToUnicodeHexEscape(charNum) {
  result += `U+${charNum.toString(16).toUpperCase().padStart(4, '0')}: `;
  const char = String.fromCodePoint(charNum);
  return char;

  // if (charNum < 256) {
  //   return '\\x' + charNum.toString(16).toUpperCase().padStart(2, '0');
  // }
  // return '\\u' + charNum.toString(16).toUpperCase().padStart(4, '0');
}

## printAllUnicodeChars.js
// depends on unicodeCombinationCharactersRange.json

function printAllCombiningCharacters(baseChar = " ", secChar = " ") {
  const ranges = unicodeCombinationCharactersRange.uCRanges.map((r) => ({
    ...r,
    start: intToUnicodeHexEscape(r.start),
    end: intToUnicodeHexEscape(r.end),
  }));
  let result = "";

  [
    { title: "Regular", add: "" },
    { title: "With ZWJ", add: "0x200D" },
    { title: "With ZWNJ", add: "0x200C" },
  ].forEach((recombine) => {
    result += "█".repeat(24) + "\n████ All Combinable Chars, " + recombine.title + "\n";
    ranges.forEach((range) => {
      result += `\n\n=== ${range.name} (U+${range.start.toString(16).toUpperCase()}-U+${range.end
        .toString(16)
        .toUpperCase()}) ===\n\n`;

      // Group characters in rows of 16 for better readability
      for (let idx = range.start; idx <= range.end; idx++) {
        // Add the character code
        result += " ";
        const char = unicodeCombinationCharactersRange(idx);

        // Special handling for format characters which are invisible on their own
        if ([0x200b, 0x2060, 0x2061, 0x2062, 0x2063, 0x2064, 0x034f].includes(idx)) {
          result += `|[${baseChar}${char}]|[${secChar}${char}]`;
        } else if ([0x200d, 0x200c, 0x034f, 0x035c, 0x0361].includes(idx)) {
          result += `| ${baseChar}${char}${secChar}| ${secChar}${char}${baseChar}`;
        } else {
          result += `| ${baseChar}${char} | ${secChar}${char} `;
        }

        // Add a newline after every 4 characters for readability (reduced from 8 for more content per line)
        if ((idx - range.start + 1) % 8 === 0) {
          result += `| U+${i.toString(16).toUpperCase().padStart(4, "0")}\n`;
        }
      }
    });
  });

  return result;
}

// Example usage:
// console.log(printAllCombiningCharacters());

// Function to print specific ranges of combining characters
function printCombiningCharactersRange(start, end, baseLetter = "a") {
  let result = `Combining characters from U+${start.toString(16).toUpperCase()} to U+${end.toString(16).toUpperCase()}:\n\n`;

  for (let i = start; i <= end; i++) {
    const char = String.fromCodePoint(i);
    result += `U+${i.toString(16).toUpperCase().padStart(4, "0")}: ${baseLetter}${char} `;

    // Add examples with different base characters
    result += `| o${char} | i${char} `;

    // Add a newline after every 3 characters for readability
    if ((i - start + 1) % 3 === 0) {
      result += "\n";
    }
  }

  return result;
}

// Function to test if a combining character might reduce width
function testWidthReducingCharacters() {
  // Characters that might affect width in some fonts/contexts
  const potentialWidthReducers = [
    { code: 0x200d, name: "Zero Width Joiner (ZWJ)" },
    { code: 0x200c, name: "Zero Width Non-Joiner (ZWNJ)" },
    { code: 0x034f, name: "Combining Grapheme Joiner (CGJ)" },
    { code: 0x035c, name: "Combining Double Breve Below" },
    { code: 0x0361, name: "Combining Double Inverted Breve" },
    { code: 0x0311, name: "Combining Inverted Breve" },
    { code: 0x0306, name: "Combining Breve" },
    { code: 0x0310, name: "Combining Candrabindu" },
    { code: 0x0344, name: "Combining Greek Dialytika Tonos" },
    { code: 0x0323, name: "Combining Dot Below" },
    { code: 0x033e, name: "Combining Vertical Tilde" },
    { code: 0x0355, name: "Combining Right Arrowhead Below" },
    { code: 0x035d, name: "Combining Double Breve" },
    { code: 0x0360, name: "Combining Double Tilde" },
  ];

  let result = "=== Testing Potentially Width-Reducing Characters ===\n\n";

  // Test base strings of various widths
  const baseStrings = ["m", "i", "w", "l", "nnnn", "iiii", "wwww", "mmmm", "....."];

  potentialWidthReducers.forEach((char) => {
    const combChar = String.fromCodePoint(char.code);
    result += `U+${char.code.toString(16).toUpperCase().padStart(4, "0")} ${char.name}:\n`;

    baseStrings.forEach((base) => {
      // Apply the combining character to each character in the base string
      let modified = "";
      for (let i = 0; i < base.length; i++) {
        modified += base[i] + combChar;
      }

      result += `  Base: "${base}" → Modified: "${modified}"\n`;
    });

    result += "\n";
  });

  return result;
}

// Examples:
// console.log(printCombiningCharactersRange(0x0300, 0x0310));
// console.log(testWidthReducingCharacters());

## uCombine.js
// dependencies:
// - unicodeCombinationCharactersRange.json
// - intToUnicodeHexEscape.js
// - combineRanges and the RegExp

// unicode combine: for each addition, combine it with the base character.
uCombine("a", ["`"]) == "à";
uCombine("o", ["^"]) == "ô";

// apply all combination characters once to every character in base.
uCombine("b", ["A̪","B̫","C̬"]) == "b̪̫̬";

// extract the combinated characters from each addition, ignore the base character.
uCombine("bob", ["A̪","B̫","C̬"]) == "b̪̫̬o̪̫̬b̪̫̬";

// otherwise lots of garbage characters like spaces would fall thru.
uCombine("e", [" ̄", " ̄̄", " ̳̳", "x̳̳"]) == "ē̳̳̳̄̄";

// additions[i] must be array of chars. so for compatibility, get the first valid combining character, reading from the end, and ignore the rest.
// also trim whitespaces in case the source of the addition string was copied quickly.
uCombine("x", ["tá 10 g̊  "]) == "x̊"

//

const combineRanges = unicodeCombinationCharactersRange.uCRanges.map(r => {
  const s = intToUnicodeHexEscape(r.start);
  const e = intToUnicodeHexEscape(r.end);
  return s + "-" + e;
})
const isCombineCharRegExp = new RegExp(combineRanges.join(""));

function uCombine(base, additions) {
  if (!base || base.length === 0) return '';

  if (!additions || !Array.isArray(additions) || additions.length === 0) return base;

  let result = '';

  for (let i = 0; i < base.length; i++) {
    const baseChar = base[i];
    let combinedChar = baseChar;

    for (let j = 0; j < additions.length; j++) {
      const addition = additions[j].trim();


      if (!addition || addition.length === 0) continue;

      // Find the combining character in the addition
      // Start from the end and find the first combining character
      let combiningChar = null;
      for (let k = addition.length - 1; k >= 0; k--) {
        const char = addition[k];
        // Check if this is a combining character (Unicode range U+0300 to U+036F and others)
        if (isCombineCharRegExp.test(char)) {
          combiningChar = char;
          break;
        }
      }

      // If no combining character found, try to extract one from grapheme clusters
      if (!combiningChar) {
        for (let k = 0; k < addition.length; k++) {
          const charCode = addition.charCodeAt(k);
          // Skip ASCII characters
          if (charCode < 127) continue;

          // Try to find the combining character in this complex character
          const chars = [...addition[k]];
          if (chars.length > 1) {
            for (let l = 1; l < chars.length; l++) {
              if (isCombineCharRegExp.test(chars[l])) {
                combiningChar = chars[l];
                break;
              }
            }
          }

          if (combiningChar) break;
        }
      }

      // If we found a combining character, add it to the combined character
      if (combiningChar) {
        combinedChar += combiningChar;
      }
    }

    result += combinedChar;
  }

  return result;
}

## unicodeCombinationCharactersRange.json
{
  "meta": {
    "filename": "unicodeCombinationCharactersRange.json",
    "usage": "const char = intToUnicodeHexEscape(uCRanges[14].start)",
    "-":"--",
    "all unicode latest characters data": "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt",
    "top most ranges": "Default chars like à and ô",
     "from Hebrew to Arabic": "Additional combining character blocks",
     "bottom most ranges": "Special format characters that affect text rendering",
     "o": "k"
  },
  "uCRanges": [
    { "start": 768,   "end": 879,   "name": "Comb. Diacrt. Marks"               },
    { "start": 6832,  "end": 6911,  "name": "Comb. Diacrt. Marks Extended"      },
    { "start": 7616,  "end": 7679,  "name": "Comb. Diacrt. Marks Supplement"    },
    { "start": 8400,  "end": 8447,  "name": "Comb. Diacrt. Marks for Symbols"   },
    { "start": 65056, "end": 65071, "name": "Comb. Half Marks"                  },

    { "start": 1425,  "end": 1469,  "name": "Hebrew Combining Marks"            },
    { "start": 1471,  "end": 1471,  "name": "Hebrew Point Rafe"                 },
    { "start": 1473,  "end": 1474,  "name": "Hebrew Points"                     },
    { "start": 1476,  "end": 1477,  "name": "Hebrew Mark"                       },
    { "start": 1479,  "end": 1479,  "name": "Hebrew Point Qamats Qatan"         },
    { "start": 1552,  "end": 1562,  "name": "Arabic Combining Marks"            },
    { "start": 1611,  "end": 1631,  "name": "Arabic Fathatan...Sukun"           },
    { "start": 1648,  "end": 1648,  "name": "Arabic Letter Superscript Alef"    },
    { "start": 1750,  "end": 1756,  "name": "Arabic Small Fatha...Small Waw"    },
    { "start": 1759,  "end": 1764,  "name": "Arabic Small High..."              },
    { "start": 1767,  "end": 1768,  "name": "Arabic Small High Yeh/Noon"        },
    { "start": 1770,  "end": 1773,  "name": "Arabic Empty Centre..."            },
    { "start": 1809,  "end": 1809,  "name": "Syriac Abbreviation Mark"          },
    { "start": 1840,  "end": 1866,  "name": "Syriac Pthaha...Barrekh"           },
    { "start": 1958,  "end": 1968,  "name": "Thaana Combining Marks"            },
    { "start": 2027,  "end": 2035,  "name": "NKo Combining Marks"               },
    { "start": 2070,  "end": 2073,  "name": "Samaritan Marks"                   },
    { "start": 2075,  "end": 2083,  "name": "Samaritan Marks"                   },
    { "start": 2085,  "end": 2087,  "name": "Samaritan Marks"                   },
    { "start": 2089,  "end": 2093,  "name": "Samaritan Marks"                   },
    { "start": 2137,  "end": 2139,  "name": "Mandaic Affrication/Gemination"    },
    { "start": 2260,  "end": 2273,  "name": "Arabic Tone/Extended Marks"        },
    { "start": 2275,  "end": 2303,  "name": "Arabic Extended Marks"             },

    { "start": 8203,  "end": 8203,  "name": "Zero Width Space (ZWSP)"           },
    { "start": 8204,  "end": 8204,  "name": "Zero Width Non-Joiner (ZWNJ)"      },
    { "start": 8205,  "end": 8205,  "name": "Zero Width Joiner (ZWJ)"           },
    { "start": 8288,  "end": 8288,  "name": "Word Joiner (WJ)"                  },
    { "start": 8289,  "end": 8292,  "name": "Function Appl...Invisible Plus"    },
    { "start": 8298,  "end": 8303,  "name": "Inhibit Symmetric...Nominal Digit" },
    { "start": 847,   "end": 847,   "name": "Combining Grapheme Joiner (CGJ)"   }
  ]
}
	// Example usage:
	console.log(testWidthReductionTechniques("Hello World"));
	console.log(runWidthTests());

	function testWidthReductionTechniques(inputText) {
	// Store original for comparison
	const original = inputText;

	// Key directional control characters
	const RLM = "\u200F"; // Right-to-Left Mark
	const LRM = "\u200E"; // Left-to-Right Mark
	const RLO = "\u202E"; // Right-to-Left Override
	const LRO = "\u202D"; // Left-to-Right Override
	const PDF = "\u202C"; // Pop Directional Formatting
	const RLI = "\u2067"; // Right-to-Left Isolate
	const LRI = "\u2066"; // Left-to-Right Isolate
	const FSI = "\u2068"; // First Strong Isolate
	const PDI = "\u2069"; // Pop Directional Isolate

	// Key combining characters
	const CGJ = "\u034F"; // Combining Grapheme Joiner
	const ZWJ = "\u200D"; // Zero Width Joiner
	const ZWNJ = "\u200C"; // Zero Width Non-Joiner
	const COMB_DIAERESIS = "\u0308"; // Combining Diaeresis
	const COMB_BREVE = "\u0306"; // Combining Breve
	const COMB_DBL_BREVE = "\u035C"; // Combining Double Breve Below
	const COMB_OVERLINE = "\u0305"; // Combining Overline
	const COMB_MACRON = "\u0304"; // Combining Macron

	// Techniques to try
	const techniques = [
	{
	name: "RTL Mark + Combining Joiner",
	transform: (text) => {
	let result = "";
	for (let i = 0; i < text.length; i++) {
	result += text[i] + RLM + CGJ;
	}
	return result;
	},
	},
	{
	name: "RTL Override Pairs",
	transform: (text) => {
	let result = "";
	for (let i = 0; i < text.length; i += 2) {
	if (i + 1 < text.length) {
	// Pair characters with RTL override
	result += text[i] + RLO + text[i + 1] + PDF;
	} else {
	result += text[i];
	}
	}
	return result;
	},
	},
	{
	name: "Combining Marks Compression",
	transform: (text) => {
	let result = "";
	for (let i = 0; i < text.length; i++) {
	// Add combining mark to potentially compress
	result += text[i] + COMB_BREVE;
	}
	return result;
	},
	},
	{
	name: "RTL Isolate with Combining",
	transform: (text) => {
	let result = "";
	for (let i = 0; i < text.length; i += 3) {
	if (i + 2 < text.length) {
	// Group three chars with RTL isolate
	result += RLI + text[i] + text[i + 1] + text[i + 2] + PDI + COMB_MACRON;
	} else if (i + 1 < text.length) {
	result += RLI + text[i] + text[i + 1] + PDI;
	} else {
	result += text[i];
	}
	}
	return result;
	},
	},
	{
	name: "ZWJ Character Fusion",
	transform: (text) => {
	let result = "";
	for (let i = 0; i < text.length; i++) {
	if (i > 0) {
	// Try to fuse characters with ZWJ
	result += ZWJ;
	}
	result += text[i];
	}
	return result;
	},
	},
	{
	name: "Bidirectional Layered Approach",
	transform: (text) => {
	// Create layers of bidirectional controls
	let result = RLI;
	for (let i = 0; i < text.length; i++) {
	if (i % 2 === 0) {
	result += LRI + text[i] + PDI;
	} else {
	result += RLI + text[i] + PDI;
	}
	// Add combining mark every other character
	if (i % 2 === 1) {
	result += COMB_DBL_BREVE;
	}
	}
	result += PDI;
	return result;
	},
	},
	];

	// Apply each technique and collect results
	const results = techniques.map((technique) => {
	const transformed = technique.transform(inputText);
	return {
	technique: technique.name,
	original: inputText,
	transformed: transformed,
	originalLength: inputText.length,
	transformedLength: transformed.length,
	// Code points for debugging
	originalCodePoints: [...inputText].map((c) => c.codePointAt(0).toString(16).padStart(4, "0")).join(" "),
	transformedCodePoints: [...transformed].map((c) => c.codePointAt(0).toString(16).padStart(4, "0")).join(" "),
	};
	});

	// Format results for display
	let output = "=== Width Reduction Techniques Test ===\n\n";
	output += `Original text: "${original}" (${original.length} chars)\n\n`;

	results.forEach((result) => {
	output += `Technique: ${result.technique}\n`;
	output += `Transformed: "${result.transformed}" (${result.transformedLength} chars)\n`;
	output += `Original code points: ${result.originalCodePoints}\n`;
	output += `Transformed code points: ${result.transformedCodePoints}\n\n`;
	});

	return output;
	}

	// Test with various strings
	function runWidthTests() {
	const testStrings = ["Hello", "MMMMMM", "iiiiii", "Mixed width chars", "👨‍👩‍👧‍👦 Family emoji"];

	let allResults = "";

	testStrings.forEach((str) => {
	allResults += testWidthReductionTechniques(str);
	allResults += "\n----------------------------\n\n";
	});

	return allResults;
	}

	// Function to measure visual width in browser (if available)
	function measureVisualWidth(text, fontStyle = "16px Arial") {
	// Check if we're in a browser environment
	if (typeof document === "undefined") {
	return { text, fontStyle, note: "Cannot measure width (not in browser environment)" };
	}

	// Create canvas for measurement
	const canvas = document.createElement("canvas");
	const context = canvas.getContext("2d");
	context.font = fontStyle;

	// Measure text
	const metrics = context.measureText(text);

	return {
	text,
	fontStyle,
	width: metrics.width,
	pixelsPerChar: metrics.width / text.length,
	};
	}
	function intToUnicodeHexEscape(charNum) {
	result += `U+${charNum.toString(16).toUpperCase().padStart(4, '0')}: `;
	const char = String.fromCodePoint(charNum);
	return char;

	// if (charNum < 256) {
	// return '\\x' + charNum.toString(16).toUpperCase().padStart(2, '0');
	// }
	// return '\\u' + charNum.toString(16).toUpperCase().padStart(4, '0');
	}
	// depends on unicodeCombinationCharactersRange.json

	function printAllCombiningCharacters(baseChar = " ", secChar = " ") {
	const ranges = unicodeCombinationCharactersRange.uCRanges.map((r) => ({
	...r,
	start: intToUnicodeHexEscape(r.start),
	end: intToUnicodeHexEscape(r.end),
	}));
	let result = "";

	[
	{ title: "Regular", add: "" },
	{ title: "With ZWJ", add: "0x200D" },
	{ title: "With ZWNJ", add: "0x200C" },
	].forEach((recombine) => {
	result += "█".repeat(24) + "\n████ All Combinable Chars, " + recombine.title + "\n";
	ranges.forEach((range) => {
	result += `\n\n=== ${range.name} (U+${range.start.toString(16).toUpperCase()}-U+${range.end
	.toString(16)
	.toUpperCase()}) ===\n\n`;

	// Group characters in rows of 16 for better readability
	for (let idx = range.start; idx <= range.end; idx++) {
	// Add the character code
	result += " ";
	const char = unicodeCombinationCharactersRange(idx);

	// Special handling for format characters which are invisible on their own
	if ([0x200b, 0x2060, 0x2061, 0x2062, 0x2063, 0x2064, 0x034f].includes(idx)) {
	result += `\|[${baseChar}${char}]\|[${secChar}${char}]`;
	} else if ([0x200d, 0x200c, 0x034f, 0x035c, 0x0361].includes(idx)) {
	result += `\| ${baseChar}${char}${secChar}\| ${secChar}${char}${baseChar}`;
	} else {
	result += `\| ${baseChar}${char} \| ${secChar}${char} `;
	}

	// Add a newline after every 4 characters for readability (reduced from 8 for more content per line)
	if ((idx - range.start + 1) % 8 === 0) {
	result += `\| U+${i.toString(16).toUpperCase().padStart(4, "0")}\n`;
	}
	}
	});
	});

	return result;
	}

	// Example usage:
	// console.log(printAllCombiningCharacters());

	// Function to print specific ranges of combining characters
	function printCombiningCharactersRange(start, end, baseLetter = "a") {
	let result = `Combining characters from U+${start.toString(16).toUpperCase()} to U+${end.toString(16).toUpperCase()}:\n\n`;

	for (let i = start; i <= end; i++) {
	const char = String.fromCodePoint(i);
	result += `U+${i.toString(16).toUpperCase().padStart(4, "0")}: ${baseLetter}${char} `;

	// Add examples with different base characters
	result += `\| o${char} \| i${char} `;

	// Add a newline after every 3 characters for readability
	if ((i - start + 1) % 3 === 0) {
	result += "\n";
	}
	}

	return result;
	}

	// Function to test if a combining character might reduce width
	function testWidthReducingCharacters() {
	// Characters that might affect width in some fonts/contexts
	const potentialWidthReducers = [
	{ code: 0x200d, name: "Zero Width Joiner (ZWJ)" },
	{ code: 0x200c, name: "Zero Width Non-Joiner (ZWNJ)" },
	{ code: 0x034f, name: "Combining Grapheme Joiner (CGJ)" },
	{ code: 0x035c, name: "Combining Double Breve Below" },
	{ code: 0x0361, name: "Combining Double Inverted Breve" },
	{ code: 0x0311, name: "Combining Inverted Breve" },
	{ code: 0x0306, name: "Combining Breve" },
	{ code: 0x0310, name: "Combining Candrabindu" },
	{ code: 0x0344, name: "Combining Greek Dialytika Tonos" },
	{ code: 0x0323, name: "Combining Dot Below" },
	{ code: 0x033e, name: "Combining Vertical Tilde" },
	{ code: 0x0355, name: "Combining Right Arrowhead Below" },
	{ code: 0x035d, name: "Combining Double Breve" },
	{ code: 0x0360, name: "Combining Double Tilde" },
	];

	let result = "=== Testing Potentially Width-Reducing Characters ===\n\n";

	// Test base strings of various widths
	const baseStrings = ["m", "i", "w", "l", "nnnn", "iiii", "wwww", "mmmm", "....."];

	potentialWidthReducers.forEach((char) => {
	const combChar = String.fromCodePoint(char.code);
	result += `U+${char.code.toString(16).toUpperCase().padStart(4, "0")} ${char.name}:\n`;

	baseStrings.forEach((base) => {
	// Apply the combining character to each character in the base string
	let modified = "";
	for (let i = 0; i < base.length; i++) {
	modified += base[i] + combChar;
	}

	result += ` Base: "${base}" → Modified: "${modified}"\n`;
	});

	result += "\n";
	});

	return result;
	}

	// Examples:
	// console.log(printCombiningCharactersRange(0x0300, 0x0310));
	// console.log(testWidthReducingCharacters());
	// dependencies:
	// - unicodeCombinationCharactersRange.json
	// - intToUnicodeHexEscape.js
	// - combineRanges and the RegExp

	// unicode combine: for each addition, combine it with the base character.
	uCombine("a", ["`"]) == "à";
	uCombine("o", ["^"]) == "ô";

	// apply all combination characters once to every character in base.
	uCombine("b", ["A̪","B̫","C̬"]) == "b̪̫̬";

	// extract the combinated characters from each addition, ignore the base character.
	uCombine("bob", ["A̪","B̫","C̬"]) == "b̪̫̬o̪̫̬b̪̫̬";

	// otherwise lots of garbage characters like spaces would fall thru.
	uCombine("e", [" ̄", " ̄̄", " ̳̳", "x̳̳"]) == "ē̳̳̳̄̄";

	// additions[i] must be array of chars. so for compatibility, get the first valid combining character, reading from the end, and ignore the rest.
	// also trim whitespaces in case the source of the addition string was copied quickly.
	uCombine("x", ["tá 10 g̊ "]) == "x̊"

	//

	const combineRanges = unicodeCombinationCharactersRange.uCRanges.map(r => {
	const s = intToUnicodeHexEscape(r.start);
	const e = intToUnicodeHexEscape(r.end);
	return s + "-" + e;
	})
	const isCombineCharRegExp = new RegExp(combineRanges.join(""));

	function uCombine(base, additions) {
	if (!base \|\| base.length === 0) return '';

	if (!additions \|\| !Array.isArray(additions) \|\| additions.length === 0) return base;

	let result = '';

	for (let i = 0; i < base.length; i++) {
	const baseChar = base[i];
	let combinedChar = baseChar;

	for (let j = 0; j < additions.length; j++) {
	const addition = additions[j].trim();


	if (!addition \|\| addition.length === 0) continue;

	// Find the combining character in the addition
	// Start from the end and find the first combining character
	let combiningChar = null;
	for (let k = addition.length - 1; k >= 0; k--) {
	const char = addition[k];
	// Check if this is a combining character (Unicode range U+0300 to U+036F and others)
	if (isCombineCharRegExp.test(char)) {
	combiningChar = char;
	break;
	}
	}

	// If no combining character found, try to extract one from grapheme clusters
	if (!combiningChar) {
	for (let k = 0; k < addition.length; k++) {
	const charCode = addition.charCodeAt(k);
	// Skip ASCII characters
	if (charCode < 127) continue;

	// Try to find the combining character in this complex character
	const chars = [...addition[k]];
	if (chars.length > 1) {
	for (let l = 1; l < chars.length; l++) {
	if (isCombineCharRegExp.test(chars[l])) {
	combiningChar = chars[l];
	break;
	}
	}
	}

	if (combiningChar) break;
	}
	}

	// If we found a combining character, add it to the combined character
	if (combiningChar) {
	combinedChar += combiningChar;
	}
	}

	result += combinedChar;
	}

	return result;
	}
	{
	"meta": {
	"filename": "unicodeCombinationCharactersRange.json",
	"usage": "const char = intToUnicodeHexEscape(uCRanges[14].start)",
	"-":"--",
	"all unicode latest characters data": "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt",
	"top most ranges": "Default chars like à and ô",
	"from Hebrew to Arabic": "Additional combining character blocks",
	"bottom most ranges": "Special format characters that affect text rendering",
	"o": "k"
	},
	"uCRanges": [
	{ "start": 768, "end": 879, "name": "Comb. Diacrt. Marks" },
	{ "start": 6832, "end": 6911, "name": "Comb. Diacrt. Marks Extended" },
	{ "start": 7616, "end": 7679, "name": "Comb. Diacrt. Marks Supplement" },
	{ "start": 8400, "end": 8447, "name": "Comb. Diacrt. Marks for Symbols" },
	{ "start": 65056, "end": 65071, "name": "Comb. Half Marks" },

	{ "start": 1425, "end": 1469, "name": "Hebrew Combining Marks" },
	{ "start": 1471, "end": 1471, "name": "Hebrew Point Rafe" },
	{ "start": 1473, "end": 1474, "name": "Hebrew Points" },
	{ "start": 1476, "end": 1477, "name": "Hebrew Mark" },
	{ "start": 1479, "end": 1479, "name": "Hebrew Point Qamats Qatan" },
	{ "start": 1552, "end": 1562, "name": "Arabic Combining Marks" },
	{ "start": 1611, "end": 1631, "name": "Arabic Fathatan...Sukun" },
	{ "start": 1648, "end": 1648, "name": "Arabic Letter Superscript Alef" },
	{ "start": 1750, "end": 1756, "name": "Arabic Small Fatha...Small Waw" },
	{ "start": 1759, "end": 1764, "name": "Arabic Small High..." },
	{ "start": 1767, "end": 1768, "name": "Arabic Small High Yeh/Noon" },
	{ "start": 1770, "end": 1773, "name": "Arabic Empty Centre..." },
	{ "start": 1809, "end": 1809, "name": "Syriac Abbreviation Mark" },
	{ "start": 1840, "end": 1866, "name": "Syriac Pthaha...Barrekh" },
	{ "start": 1958, "end": 1968, "name": "Thaana Combining Marks" },
	{ "start": 2027, "end": 2035, "name": "NKo Combining Marks" },
	{ "start": 2070, "end": 2073, "name": "Samaritan Marks" },
	{ "start": 2075, "end": 2083, "name": "Samaritan Marks" },
	{ "start": 2085, "end": 2087, "name": "Samaritan Marks" },
	{ "start": 2089, "end": 2093, "name": "Samaritan Marks" },
	{ "start": 2137, "end": 2139, "name": "Mandaic Affrication/Gemination" },
	{ "start": 2260, "end": 2273, "name": "Arabic Tone/Extended Marks" },
	{ "start": 2275, "end": 2303, "name": "Arabic Extended Marks" },

	{ "start": 8203, "end": 8203, "name": "Zero Width Space (ZWSP)" },
	{ "start": 8204, "end": 8204, "name": "Zero Width Non-Joiner (ZWNJ)" },
	{ "start": 8205, "end": 8205, "name": "Zero Width Joiner (ZWJ)" },
	{ "start": 8288, "end": 8288, "name": "Word Joiner (WJ)" },
	{ "start": 8289, "end": 8292, "name": "Function Appl...Invisible Plus" },
	{ "start": 8298, "end": 8303, "name": "Inhibit Symmetric...Nominal Digit" },
	{ "start": 847, "end": 847, "name": "Combining Grapheme Joiner (CGJ)" }
	]
	}