using System;
using System.Collections.Generic;
using System.Text;
using Topten.RichTextKit.Utils;
namespace Topten.RichTextKit
{
static class GraphemeClusterAlgorithm
{
///
/// Given a sequence of code points, return its grapheme cluster boundaries
///
/// The code points
/// An enumerable of grapheme cluster boundaries
public static IEnumerable GetBoundaries(Slice codePoints)
{
for (int i = 0; i <= codePoints.Length; i++)
{
if (IsBoundary(codePoints, i))
yield return i;
}
}
///
/// Check if a position in a code point buffer is a grapheme cluster boundary
///
/// The code points
/// The position to check
///
public static bool IsBoundary(Slice codePoints, int position)
{
if (codePoints.Length == 0)
return false;
// Get the grapheme cluster class of the character on each side
var a = position <= 0 ? GraphemeClusterClass.SOT : UnicodeClasses.GraphemeClusterClass(codePoints[position - 1]);
var b = position < codePoints.Length ? UnicodeClasses.GraphemeClusterClass(codePoints[position]) : GraphemeClusterClass.EOT;
// Rule 11 - Special handling for ZWJ in extended pictograph
if (a == GraphemeClusterClass.ZWJ)
{
var i = position - 2;
while (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.Extend)
{
i--;
}
if (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.ExtPict)
{
a = GraphemeClusterClass.ExtPictZwg;
}
}
// Special handling for regional indicator
// Rule 12 and 13
if (a == GraphemeClusterClass.Regional_Indicator)
{
// Count how many
int count = 0;
for (int i = position - 1; i > 0; i--)
{
if (UnicodeClasses.GraphemeClusterClass(codePoints[i - 1]) != GraphemeClusterClass.Regional_Indicator)
break;
count++;
}
// If odd, switch from RI to Any
if ((count % 2) != 0)
{
a = GraphemeClusterClass.Any;
}
}
return pairTable[(int)b][(int)a] != 0;
}
static byte[][] pairTable= new byte[][]
{
// Any CR LF Control Extend Regional_Indicator Prepend SpacingMark L V T LV LVT ExtPict ZWJ SOT EOT ExtPictZwg
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Any
new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // CR
new byte[] { 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LF
new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Control
new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // Extend
new byte[] { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Regional_Indicator
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Prepend
new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // SpacingMark
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // L
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, }, // V
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, }, // T
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LV
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LVT
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, }, // ExtPict
new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // ZWJ
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // SOT
new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // EOT
new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // ExtPictZwg
};
}
}