// RichTextKit
// Copyright © 2019-2020 Topten Software. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this product except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
//
// Ported from: https://github.com/foliojs/linebreak
using System.Collections.Generic;
using Topten.RichTextKit.Utils;
namespace Topten.RichTextKit
{
///
/// Implementation of the Unicode Line Break Algorithm
///
internal class LineBreaker
{
///
/// Constructor
///
static LineBreaker()
{
}
///
/// Reset this line breaker
///
/// The string to be broken
public void Reset(string str)
{
Reset(new Slice(Utf32Utils.ToUtf32(str)));
}
///
/// Reset this line breaker
///
/// The code points of the string to be broken
public void Reset(Slice codePoints)
{
_codePoints = codePoints;
_first = true;
_pos = 0;
_lastPos = 0;
_LB8a = false;
_LB21a = false;
_LB30a = 0;
}
Slice _codePoints;
bool _first = true;
int _pos;
int _lastPos;
LineBreakClass _curClass;
LineBreakClass _nextClass;
bool _LB8a = false;
bool _LB21a = false;
int _LB30a = 0;
///
/// Enumerate all line breaks
///
/// A collection of line break positions
public List GetBreaks(bool mandatoryOnly = false)
{
var list = new List();
if (mandatoryOnly)
{
list.AddRange(FindMandatoryBreaks());
}
else
{
while (NextBreak(out var lb))
list.Add(lb);
}
return list;
}
LineBreakClass mapClass(LineBreakClass c)
{
switch (c)
{
case LineBreakClass.AI:
return LineBreakClass.AL;
case LineBreakClass.SA:
case LineBreakClass.SG:
case LineBreakClass.XX:
return LineBreakClass.AL;
case LineBreakClass.CJ:
return LineBreakClass.NS;
default:
return c;
}
}
LineBreakClass mapFirst(LineBreakClass c)
{
switch (c)
{
case LineBreakClass.LF:
case LineBreakClass.NL:
return LineBreakClass.BK;
case LineBreakClass.SP:
return LineBreakClass.WJ;
default:
return c;
}
}
// Get the next character class
LineBreakClass nextCharClass()
{
return mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos++]));
}
bool? getSimpleBreak()
{
// handle classes not handled by the pair table
switch (_nextClass)
{
case LineBreakClass.SP:
return false;
case LineBreakClass.BK:
case LineBreakClass.LF:
case LineBreakClass.NL:
_curClass = LineBreakClass.BK;
return false;
case LineBreakClass.CR:
_curClass = LineBreakClass.CR;
return false;
}
return null;
}
bool getPairTableBreak(LineBreakClass lastClass)
{
// if not handled already, use the pair table
bool shouldBreak = false;
switch (LineBreakPairTable.table[(int)_curClass][(int)_nextClass])
{
case LineBreakPairTable.DI_BRK: // Direct break
shouldBreak = true;
break;
case LineBreakPairTable.IN_BRK: // possible indirect break
shouldBreak = lastClass == LineBreakClass.SP;
break;
case LineBreakPairTable.CI_BRK:
shouldBreak = lastClass == LineBreakClass.SP;
if (!shouldBreak)
{
shouldBreak = false;
return shouldBreak;
}
break;
case LineBreakPairTable.CP_BRK: // prohibited for combining marks
if (lastClass != LineBreakClass.SP)
{
return shouldBreak;
}
break;
case LineBreakPairTable.PR_BRK:
break;
}
if (_LB8a)
{
shouldBreak = false;
}
// Rule LB21a
if (_LB21a && (_curClass == LineBreakClass.HY || _curClass == LineBreakClass.BA))
{
shouldBreak = false;
_LB21a = false;
}
else
{
_LB21a = (_curClass == LineBreakClass.HL);
}
// Rule LB30a
if (_curClass == LineBreakClass.RI)
{
_LB30a++;
if (_LB30a == 2 && (_nextClass == LineBreakClass.RI))
{
shouldBreak = true;
_LB30a = 0;
}
}
else
{
_LB30a = 0;
}
_curClass = _nextClass;
return shouldBreak;
}
public bool NextBreak(out LineBreak lineBreak)
{
// get the first char if we're at the beginning of the string
if (_first)
{
_first = false;
var firstClass = nextCharClass();
_curClass = mapFirst(firstClass);
_nextClass = firstClass;
_LB8a = (firstClass == LineBreakClass.ZWJ);
_LB30a = 0;
}
while (_pos < _codePoints.Length)
{
_lastPos = _pos;
var lastClass = _nextClass;
_nextClass = nextCharClass();
// explicit newline
if ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF)))
{
_curClass = mapFirst(mapClass(_nextClass));
lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true);
return true;
}
bool? shouldBreak = getSimpleBreak();
if (!shouldBreak.HasValue)
{
shouldBreak = getPairTableBreak(lastClass);
}
// Rule LB8a
_LB8a = (_nextClass == LineBreakClass.ZWJ);
if (shouldBreak.Value)
{
lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, false);
return true;
}
}
if (_lastPos < _codePoints.Length)
{
_lastPos = _codePoints.Length;
var required = (_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF));
lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _lastPos, required);
return true;
}
else
{
lineBreak = new LineBreak(0, 0, false);
return false;
}
}
public IEnumerable FindMandatoryBreaks()
{
for (int i = 0; i < _codePoints.Length; i++)
{
var cls = UnicodeClasses.LineBreakClass(_codePoints[i]);
switch (cls)
{
case LineBreakClass.BK:
yield return new LineBreak(i, i + 1, true);
break;
case LineBreakClass.CR:
if (i + 1 < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[i + 1]) == LineBreakClass.LF)
{
yield return new LineBreak(i, i + 2, true);
}
else
{
yield return new LineBreak(i, i + 1, true);
}
break;
case LineBreakClass.LF:
yield return new LineBreak(i, i + 1, true);
break;
}
}
}
int findPriorNonWhitespace(int from)
{
if (from > 0)
{
var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR)
from--;
// ATZ: in case of cr + lf, remove both from measure
if (from > 0 && cls == LineBreakClass.LF)
{
cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
if (cls == LineBreakClass.CR)
from--;
}
}
while (from > 0)
{
var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
if (cls == LineBreakClass.SP)
from--;
else
break;
}
return from;
}
}
}