WordBoundaryAlgorithm.cs 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. // RichTextKit
  2. // Copyright © 2019-2020 Topten Software. All Rights Reserved.
  3. //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); you may
  5. // not use this product except in compliance with the License. You may obtain
  6. // a copy of the License at
  7. //
  8. // http://www.apache.org/licenses/LICENSE-2.0
  9. //
  10. // Unless required by applicable law or agreed to in writing, software
  11. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. // License for the specific language governing permissions and limitations
  14. // under the License.
  15. //
  16. // Ported from: https://github.com/foliojs/linebreak
  17. using System.Collections;
  18. using System.Collections.Generic;
  19. using System.Diagnostics;
  20. using Topten.RichTextKit.Utils;
  21. namespace Topten.RichTextKit
  22. {
  23. /// <summary>
  24. /// Implementation of the word boundary algorithm
  25. /// </summary>
  26. internal class WordBoundaryAlgorithm
  27. {
  28. /// <summary>
  29. /// Locate the start of each "word" in a unicode string. Used for Ctrl+Left/Right
  30. /// in editor and different to the line break algorithm.
  31. /// </summary>
  32. public static IEnumerable<int> FindWordBoundaries(Slice<int> codePoints)
  33. {
  34. // Start is always a word boundary
  35. yield return 0;
  36. // Find all boundaries
  37. bool inWord = false;
  38. var wordGroup = WordBoundaryClass.Ignore;
  39. for (int i=0; i<codePoints.Length; i++)
  40. {
  41. // Get group
  42. var bg = UnicodeClasses.BoundaryGroup(codePoints[i]);
  43. // Ignore?
  44. if (bg == WordBoundaryClass.Ignore)
  45. continue;
  46. // Ignore spaces before word
  47. if (!inWord)
  48. {
  49. // Ignore spaces before word
  50. if (bg == WordBoundaryClass.Space)
  51. continue;
  52. // Found start of word
  53. if (i != 0)
  54. yield return i;
  55. // We're now in the word
  56. inWord = true;
  57. wordGroup = bg;
  58. continue;
  59. }
  60. // We're in a word group, check for change of kind
  61. if (wordGroup != bg)
  62. {
  63. if (bg == WordBoundaryClass.Space)
  64. {
  65. inWord = false;
  66. }
  67. else
  68. {
  69. // Switch to a different word kind without a space
  70. // just emit a word boundary here
  71. yield return i;
  72. }
  73. }
  74. }
  75. if (!inWord && codePoints.Length > 0)
  76. {
  77. yield return codePoints.Length;
  78. }
  79. }
  80. /// <summary>
  81. /// Check if the characters at the boundary between strings is a word boundary
  82. /// </summary>
  83. /// <param name="a">The first string</param>
  84. /// <param name="b">The second string</param>
  85. /// <returns>True if this is a word boundary</returns>
  86. public static bool IsWordBoundary(Slice<int> a, Slice<int> b)
  87. {
  88. // If either empty, assume it's a boundary
  89. if (a.Length == 0)
  90. return true;
  91. if (b.Length == 0)
  92. return true;
  93. // Get the last non-ignore character from 'first string
  94. var aGroup = WordBoundaryClass.Ignore;
  95. for (int i = a.Length - 1; i >= 0 && aGroup == WordBoundaryClass.Ignore; i--)
  96. {
  97. aGroup = UnicodeClasses.BoundaryGroup(a[i]);
  98. }
  99. // Get the first non-ignore character from second string
  100. var bGroup = WordBoundaryClass.Ignore;
  101. for (int i = 0; i < b.Length && bGroup == WordBoundaryClass.Ignore; i++)
  102. {
  103. bGroup = UnicodeClasses.BoundaryGroup(b[i]);
  104. }
  105. // Check if boundary
  106. return aGroup != bGroup && bGroup != WordBoundaryClass.Space;
  107. }
  108. }
  109. }