// RichTextKit // Copyright © 2019-2020 Topten Software. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may // not use this product except in compliance with the License. You may obtain // a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. // // Ported from: https://github.com/foliojs/linebreak using System.Collections.Generic; using Topten.RichTextKit.Utils; namespace Topten.RichTextKit { /// /// Implementation of the Unicode Line Break Algorithm /// internal class LineBreaker { /// /// Constructor /// static LineBreaker() { } /// /// Reset this line breaker /// /// The string to be broken public void Reset(string str) { Reset(new Slice(Utf32Utils.ToUtf32(str))); } /// /// Reset this line breaker /// /// The code points of the string to be broken public void Reset(Slice codePoints) { _codePoints = codePoints; _first = true; _pos = 0; _lastPos = 0; _LB8a = false; _LB21a = false; _LB30a = 0; } Slice _codePoints; bool _first = true; int _pos; int _lastPos; LineBreakClass _curClass; LineBreakClass _nextClass; bool _LB8a = false; bool _LB21a = false; int _LB30a = 0; /// /// Enumerate all line breaks /// /// A collection of line break positions public List GetBreaks(bool mandatoryOnly = false) { var list = new List(); if (mandatoryOnly) { list.AddRange(FindMandatoryBreaks()); } else { while (NextBreak(out var lb)) list.Add(lb); } return list; } LineBreakClass mapClass(LineBreakClass c) { switch (c) { case LineBreakClass.AI: return LineBreakClass.AL; case LineBreakClass.SA: case LineBreakClass.SG: case LineBreakClass.XX: return LineBreakClass.AL; case LineBreakClass.CJ: return LineBreakClass.NS; default: return c; } } LineBreakClass mapFirst(LineBreakClass c) { switch (c) { case LineBreakClass.LF: case LineBreakClass.NL: return LineBreakClass.BK; case LineBreakClass.SP: return LineBreakClass.WJ; default: return c; } } // Get the next character class LineBreakClass nextCharClass() { return mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos++])); } bool? getSimpleBreak() { // handle classes not handled by the pair table switch (_nextClass) { case LineBreakClass.SP: return false; case LineBreakClass.BK: case LineBreakClass.LF: case LineBreakClass.NL: _curClass = LineBreakClass.BK; return false; case LineBreakClass.CR: _curClass = LineBreakClass.CR; return false; } return null; } bool getPairTableBreak(LineBreakClass lastClass) { // if not handled already, use the pair table bool shouldBreak = false; switch (LineBreakPairTable.table[(int)_curClass][(int)_nextClass]) { case LineBreakPairTable.DI_BRK: // Direct break shouldBreak = true; break; case LineBreakPairTable.IN_BRK: // possible indirect break shouldBreak = lastClass == LineBreakClass.SP; break; case LineBreakPairTable.CI_BRK: shouldBreak = lastClass == LineBreakClass.SP; if (!shouldBreak) { shouldBreak = false; return shouldBreak; } break; case LineBreakPairTable.CP_BRK: // prohibited for combining marks if (lastClass != LineBreakClass.SP) { return shouldBreak; } break; case LineBreakPairTable.PR_BRK: break; } if (_LB8a) { shouldBreak = false; } // Rule LB21a if (_LB21a && (_curClass == LineBreakClass.HY || _curClass == LineBreakClass.BA)) { shouldBreak = false; _LB21a = false; } else { _LB21a = (_curClass == LineBreakClass.HL); } // Rule LB30a if (_curClass == LineBreakClass.RI) { _LB30a++; if (_LB30a == 2 && (_nextClass == LineBreakClass.RI)) { shouldBreak = true; _LB30a = 0; } } else { _LB30a = 0; } _curClass = _nextClass; return shouldBreak; } public bool NextBreak(out LineBreak lineBreak) { // get the first char if we're at the beginning of the string if (_first) { _first = false; var firstClass = nextCharClass(); _curClass = mapFirst(firstClass); _nextClass = firstClass; _LB8a = (firstClass == LineBreakClass.ZWJ); _LB30a = 0; } while (_pos < _codePoints.Length) { _lastPos = _pos; var lastClass = _nextClass; _nextClass = nextCharClass(); // explicit newline if ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF))) { _curClass = mapFirst(mapClass(_nextClass)); lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true); return true; } bool? shouldBreak = getSimpleBreak(); if (!shouldBreak.HasValue) { shouldBreak = getPairTableBreak(lastClass); } // Rule LB8a _LB8a = (_nextClass == LineBreakClass.ZWJ); if (shouldBreak.Value) { lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, false); return true; } } if (_lastPos < _codePoints.Length) { _lastPos = _codePoints.Length; var required = (_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF)); lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _lastPos, required); return true; } else { lineBreak = new LineBreak(0, 0, false); return false; } } public IEnumerable FindMandatoryBreaks() { for (int i = 0; i < _codePoints.Length; i++) { var cls = UnicodeClasses.LineBreakClass(_codePoints[i]); switch (cls) { case LineBreakClass.BK: yield return new LineBreak(i, i + 1, true); break; case LineBreakClass.CR: if (i + 1 < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[i + 1]) == LineBreakClass.LF) { yield return new LineBreak(i, i + 2, true); } else { yield return new LineBreak(i, i + 1, true); } break; case LineBreakClass.LF: yield return new LineBreak(i, i + 1, true); break; } } } int findPriorNonWhitespace(int from) { if (from > 0) { var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]); if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR) from--; // ATZ: in case of cr + lf, remove both from measure if (from > 0 && cls == LineBreakClass.LF) { cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]); if (cls == LineBreakClass.CR) from--; } } while (from > 0) { var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]); if (cls == LineBreakClass.SP) from--; else break; } return from; } } }