// RichTextKit // Copyright © 2019-2020 Topten Software. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); you may // not use this product except in compliance with the License. You may obtain // a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations // under the License. using System; using System.Collections.Generic; using System.Linq; using System.Runtime.CompilerServices; using System.Threading; using Topten.RichTextKit.Utils; namespace Topten.RichTextKit { /// /// Implementation of Unicode Bidirection Algorithm (UAX #9) /// https://unicode.org/reports/tr9/ /// /// /// The Bidi algorithm uses a number of memory arrays for resolved /// types, level information, bracket types, x9 removal maps and /// more... /// /// This implementation of the Bidi algorithm has been designed /// to reduce memory pressure on the GC by re-using the same /// work buffers, so instances of this class should be re-used /// as much as possible. /// class Bidi { /// /// A per-thread instance that can be re-used as often /// as necessary. /// internal static ThreadLocal Instance = new ThreadLocal(() => new Bidi()); /// /// Constructs a new instance of Bidi algorithm processor /// public Bidi() { } /// /// Get the resolved levels /// public Slice ResolvedLevels => _resolvedLevels; /// /// Get the resolved paragraph embedding level /// public int ResolvedParagraphEmbeddingLevel => _paragraphEmbeddingLevel; /// /// Process data from a BidiData instance /// /// public void Process(BidiData data) { Process(data.Types, data.PairedBracketTypes, data.PairedBracketValues, data.ParagraphEmbeddingLevel, data.HasBrackets, data.HasEmbeddings, data.HasIsolates, null); } /// /// Processes Bidi Data /// public void Process( Slice types, Slice pairedBracketTypes, Slice pairedBracketValues, sbyte paragraphEmbeddingLevel, bool? hasBrackets, bool? hasEmbeddings, bool? hasIsolates, Slice? outLevels ) { // Reset state _isolatePairs.Clear(); _workingTypesBuffer.Clear(); _levelRuns.Clear(); _resolvedLevelsBuffer.Clear(); // Setup original types and working types _originalTypes = types; _workingTypes = _workingTypesBuffer.Add(types); // Capture paired bracket values and types _pairedBracketTypes = pairedBracketTypes; _pairedBracketValues = pairedBracketValues; // Store things we know _hasBrackets = hasBrackets ?? _pairedBracketTypes.Length == _originalTypes.Length; _hasEmbeddings = hasEmbeddings ?? true; _hasIsolates = hasIsolates ?? true; // Find all isolate pairs FindIsolatePairs(); // Resolve the paragraph embedding level if (paragraphEmbeddingLevel == 2) _paragraphEmbeddingLevel = ResolveEmbeddingLevel(_originalTypes); else _paragraphEmbeddingLevel = paragraphEmbeddingLevel; // Create resolved levels buffer if (outLevels.HasValue) { if (outLevels.Value.Length != _originalTypes.Length) throw new ArgumentException("Out levels must be the same length as the input data"); _resolvedLevels = outLevels.Value; } else { _resolvedLevels = _resolvedLevelsBuffer.Add(_originalTypes.Length); _resolvedLevels.Fill(_paragraphEmbeddingLevel); } // Resolve explicit embedding levels (Rules X1-X8) ResolveExplicitEmbeddingLevels(); // Build the rule X9 map BuildX9RemovalMap(); // Process all isolated run sequences ProcessIsolatedRunSequences(); // Reset whitespace levels ResetWhitespaceLevels(); // Clean up AssignLevelsToCodePointsRemovedByX9(); } /// /// The original Directionality types as provided by the caller /// Slice _originalTypes; /// /// Paired bracket types as provided by caller /// Slice _pairedBracketTypes; /// /// Paired bracket values as provided by caller /// Slice _pairedBracketValues; /// /// Try if the incoming data is known to contain brackets /// bool _hasBrackets; /// /// True if the incoming data is known to contain embedding runs /// bool _hasEmbeddings; /// /// True if the incomding data is known to contain isolating runs /// bool _hasIsolates; /// /// Two directional mapping of isolate start/end pairs /// /// /// The forward mapping maps the start index to the end index. /// The reverse mapping maps the end index to the start index. /// BiDictionary _isolatePairs = new BiDictionary(); /// /// The working Directionality types /// Slice _workingTypes; /// /// The buffer underlying _workingTypes /// Buffer _workingTypesBuffer = new Buffer(); /// /// The resolved levels /// Slice _resolvedLevels; /// /// The buffer underlying _resolvedLevels /// Buffer _resolvedLevelsBuffer = new Buffer(); /// /// The resolve paragraph embedding level /// sbyte _paragraphEmbeddingLevel; /// /// Status stack entry used while resolving explicit /// embedding levels /// struct Status { public sbyte EmbeddingLevel; public Directionality OverrideStatus; public bool IsolateStatus; } /// /// The status stack used during resolution of explicit /// embedding and isolating runs /// Stack _statusStack = new Stack(); /// /// Mapping used to virtually remove characters for rule X9 /// Buffer _X9Map = new Buffer(); /// /// Re-usable list of level runs /// List _levelRuns = new List(); /// /// Mapping for the current isolating sequence, built /// by joining level runs from the x9 map. /// Buffer _isolatedRunMapping = new Buffer(); /// /// A stack of pending isolate openings used by FindIsolatePairs() /// Stack _pendingIsolateOpenings = new Stack(); /// /// Build a list of matching isolates for a directionality slice /// Implements BD9 /// void FindIsolatePairs() { // Redundant? if (!_hasIsolates) return; // Lets double check this as we go and clear the flag // if there actually aren't any isolate pairs as this might // mean we can skip some later steps _hasIsolates = false; // BD9... _pendingIsolateOpenings.Clear(); for (int i = 0; i < _originalTypes.Length; i++) { var t = _originalTypes[i]; if (t == Directionality.LRI || t == Directionality.RLI || t == Directionality.FSI) { _pendingIsolateOpenings.Push(i); _hasIsolates = true; } else if (t == Directionality.PDI) { if (_pendingIsolateOpenings.Count > 0) { _isolatePairs.Add(_pendingIsolateOpenings.Pop(), i); } _hasIsolates = true; } } } /// /// Resolve the explicit embedding levels from the original /// data. Implements rules X1 to X8. /// private void ResolveExplicitEmbeddingLevels() { // Redundant? if (!_hasIsolates && !_hasEmbeddings) return; // Work variables _statusStack.Clear(); int overflowIsolateCount = 0; int overflowEmbeddingCount = 0; int validIsolateCount = 0; // Constants const int maxStackDepth = 125; // Rule X1 - setup initial state _statusStack.Clear(); _statusStack.Push(new Status() { EmbeddingLevel = _paragraphEmbeddingLevel, OverrideStatus = Directionality.ON, // Neutral IsolateStatus = false, }); // Process all characters for (int i = 0; i < _originalTypes.Length; i++) { switch (_originalTypes[i]) { case Directionality.RLE: { // Rule X2 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 1) | 1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status() { EmbeddingLevel = newLevel, OverrideStatus = Directionality.ON, IsolateStatus = false, }); _resolvedLevels[i] = newLevel; } else { if (overflowIsolateCount == 0) overflowEmbeddingCount++; } break; } case Directionality.LRE: { // Rule X3 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 2) & ~1); if (newLevel < maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status() { EmbeddingLevel = newLevel, OverrideStatus = Directionality.ON, IsolateStatus = false, }); _resolvedLevels[i] = newLevel; } else { if (overflowIsolateCount == 0) overflowEmbeddingCount++; } break; } case Directionality.RLO: { // Rule X4 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 1) | 1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status() { EmbeddingLevel = newLevel, OverrideStatus = Directionality.R, IsolateStatus = false, }); _resolvedLevels[i] = newLevel; } else { if (overflowIsolateCount == 0) overflowEmbeddingCount++; } break; } case Directionality.LRO: { // Rule X5 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 2) & ~1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status() { EmbeddingLevel = newLevel, OverrideStatus = Directionality.L, IsolateStatus = false, }); _resolvedLevels[i] = newLevel; } else { if (overflowIsolateCount == 0) overflowEmbeddingCount++; } break; } case Directionality.RLI: case Directionality.LRI: case Directionality.FSI: { // Rule X5a, X5b and X5c var resolvedIsolate = _originalTypes[i]; if (resolvedIsolate == Directionality.FSI) { if (!_isolatePairs.TryGetValue(i, out var endOfIsolate)) { endOfIsolate = _originalTypes.Length; } // Rule X5c if (ResolveEmbeddingLevel(_originalTypes.SubSlice(i + 1, endOfIsolate - (i + 1))) == 1) resolvedIsolate = Directionality.RLI; else resolvedIsolate = Directionality.LRI; } // Replace RLI's level with current embedding level var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; // Apply override if (tos.OverrideStatus != Directionality.ON) { _workingTypes[i] = tos.OverrideStatus; } // Work out new level sbyte newLevel; if (resolvedIsolate == Directionality.RLI) newLevel = (sbyte)((tos.EmbeddingLevel + 1) | 1); else newLevel = (sbyte)((tos.EmbeddingLevel + 2) & ~1); // Valid? if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { validIsolateCount++; _statusStack.Push(new Status() { EmbeddingLevel = newLevel, OverrideStatus = Directionality.ON, IsolateStatus = true, }); } else { overflowIsolateCount++; } break; } case Directionality.BN: { // Mentioned in rule X6 - "for all types besides ..., BN, ..." // no-op break; } default: { // Rule X6 var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; if (tos.OverrideStatus != Directionality.ON) { _workingTypes[i] = tos.OverrideStatus; } break; } case Directionality.PDI: { // Rule X6a if (overflowIsolateCount > 0) { overflowIsolateCount--; } else if (validIsolateCount != 0) { overflowEmbeddingCount = 0; while (!_statusStack.Peek().IsolateStatus) _statusStack.Pop(); _statusStack.Pop(); validIsolateCount--; } var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; if (tos.OverrideStatus != Directionality.ON) { _workingTypes[i] = tos.OverrideStatus; } break; } case Directionality.PDF: { // Rule X7 if (overflowIsolateCount == 0) { if (overflowEmbeddingCount > 0) { overflowEmbeddingCount--; } else { if (!_statusStack.Peek().IsolateStatus && _statusStack.Count >= 2) { _statusStack.Pop(); } } } break; } case Directionality.B: { // Rule X8 _resolvedLevels[i] = _paragraphEmbeddingLevel; break; } } } } /// /// Resolve the paragraph embedding level if not explicitly passed /// by the caller. Also used by rule X5c for FSI isolating sequences. /// /// The data to be evaluated /// The resolved embedding level public sbyte ResolveEmbeddingLevel(Slice data) { // P2 for (var i = 0; i < data.Length; ++i) { switch (data[i]) { case Directionality.L: // P3 return 0; case Directionality.AL: case Directionality.R: // P3 return 1; case Directionality.FSI: case Directionality.LRI: case Directionality.RLI: // Skip isolate pairs // (Because we're working with a slice, we need to adjust the indicies // we're using for the isolatePairs map) if (_isolatePairs.TryGetValue(data.Start + i, out i)) { i -= data.Start; } else { i = data.Length; } break; } } // P3 return 0; } /// /// Build a map to the original data positions that excludes all /// the types defined by rule X9 /// void BuildX9RemovalMap() { // Reserve room for the x9 map _X9Map.Length = _originalTypes.Length; if (_hasEmbeddings || _hasIsolates) { // Build a map the removes all x9 characters var j = 0; for (int i = 0; i < _originalTypes.Length; i++) { if (!IsRemovedByX9(_originalTypes[i])) { _X9Map[j++] = i; } } // Set the final length _X9Map.Length = j; } else { for (int i = 0, count = _originalTypes.Length; i < count; i++) { _X9Map[i] = i; } } } /// /// Find the original character index for an entry in the X9 map /// /// Index in the x9 removal map /// Index to the original data [MethodImpl(MethodImplOptions.AggressiveInlining)] int mapX9(int index) { //return index < _X9Map.Length ? _X9Map[index] : _originalTypes.Length; return _X9Map[index]; } /// /// Provides information about a level run - a continuous /// sequence of equal levels. /// struct LevelRun { public LevelRun(int start, int length, int level, Directionality sos, Directionality eos) { this.start = start; this.length = length; this.level = level; this.sos = sos; this.eos = eos; } public int start; public int length; public int level; public Directionality sos; public Directionality eos; } /// /// Add a new level run /// /// /// This method resolves the sos and eos values for the run /// and adds the run to the list /// /// /// The index of the start of the run (in x9 removed units) /// The length of the run (in x9 removed units) /// The level of the run void AddLevelRun(int start, int length, int level) { // Get original indicies to first and last character in this run int firstCharIndex = mapX9(start); int lastCharIndex = mapX9(start + length - 1); // Work out sos int i = firstCharIndex - 1; while (i >= 0 && IsRemovedByX9(_originalTypes[i])) i--; var prevLevel = i < 0 ? _paragraphEmbeddingLevel : _resolvedLevels[i]; var sos = DirectionFromLevel(Math.Max(prevLevel, level)); // Work out eos var lastType = _workingTypes[lastCharIndex]; int nextLevel; if (lastType == Directionality.LRI || lastType == Directionality.RLI || lastType == Directionality.FSI) { nextLevel = _paragraphEmbeddingLevel; } else { i = lastCharIndex + 1; while (i < _originalTypes.Length && IsRemovedByX9(_originalTypes[i])) i++; nextLevel = i >= _originalTypes.Length ? _paragraphEmbeddingLevel : _resolvedLevels[i]; } var eos = DirectionFromLevel(Math.Max(nextLevel, level)); // Add the run _levelRuns.Add(new LevelRun(start, length, level, sos, eos)); } /// /// Find all runs of the same level, populating the _levelRuns /// collection /// void FindLevelRuns() { int currentLevel = -1; int runStart = 0; for (int i = 0; i < _X9Map.Length; ++i) { int level = _resolvedLevels[mapX9(i)]; if (level != currentLevel) { if (currentLevel != -1) { AddLevelRun(runStart, i - runStart, currentLevel); } currentLevel = level; runStart = i; } } // Don't forget the final level run if (currentLevel != -1) { AddLevelRun(runStart, _X9Map.Length - runStart, currentLevel); } } /// /// Given a character index, find the level run that starts at that position /// /// The index into the original (unmapped) data /// The index of the run that starts at that index int FindRunForIndex(int index) { for (int i = 0; i < _levelRuns.Count; i++) { // Passed index is for the original non-x9 filtered data, however // the level run ranges are for the x9 filtered data. Convert before // comparing if (mapX9(_levelRuns[i].start) == index) return i; } throw new InvalidOperationException("Internal error"); } /// /// Determine and the process all isolated run sequences /// void ProcessIsolatedRunSequences() { // Find all runs with the same level FindLevelRuns(); // Process them one at a time by first building // a mapping using slices from the x9 map for each // run section that needs to be joined together to // form an complete run. That full run mapping // will be placed in _isolatedRunMapping and then // processed by ProcessIsolatedRunSequence(). while (_levelRuns.Count > 0) { // Clear the mapping _isolatedRunMapping.Clear(); // Combine mappings from this run and all runs that continue on from it var runIndex = 0; Directionality eos = _levelRuns[0].eos; Directionality sos = _levelRuns[0].sos; int level = _levelRuns[0].level; while (true) { // Get the run var r = _levelRuns[runIndex]; // The eos of the isolating run is the eos of the // last level run that comprises it. eos = r.eos; // Remove this run as we've now processed it _levelRuns.RemoveAt(runIndex); // Add the x9 map indicies for the run range to the mapping // for this isolated run _isolatedRunMapping.Add(_X9Map.SubSlice(r.start, r.length)); // Get the last character and see if it's an isolating run with a matching // PDI and concatenate that run to this one int lastCharacterIndex = _isolatedRunMapping[_isolatedRunMapping.Length - 1]; var lastType = _originalTypes[lastCharacterIndex]; if ((lastType == Directionality.LRI || lastType == Directionality.RLI || lastType == Directionality.FSI) && _isolatePairs.TryGetValue(lastCharacterIndex, out var nextRunIndex)) { // Find the continuing run index runIndex = FindRunForIndex(nextRunIndex); } else { break; } } // Process this isolated run ProcessIsolatedRunSequence(sos, eos, level); } } /// /// The level of the isolating run currently being processed /// int _runLevel; /// /// The direction of the isolating run currently being processed /// Directionality _runDirection; /// /// The length of the isolating run currently being processed /// int _runLength; /// /// A mapped slice of the resolved types for the isolating run currently /// being processed /// MappedSlice _runResolvedTypes; /// /// A mapped slice of the original types for the isolating run currently /// being processed /// MappedSlice _runOriginalTypes; /// /// A mapped slice of the run levels for the isolating run currently /// being processed /// MappedSlice _runLevels; /// /// A mapped slice of the paired bracket types of the isolating /// run currently being processed /// MappedSlice _runPairedBracketTypes; /// /// A mapped slice of the paired bracket values of the isolating /// run currently being processed /// MappedSlice _runPairedBracketValues; /// /// Process a single isolated run sequence, where the character sequence /// mapping is currently held in _isolatedRunMapping. /// void ProcessIsolatedRunSequence(Directionality sos, Directionality eos, int runLevel) { // Create mappings onto the underlying data _runResolvedTypes = new MappedSlice(_workingTypes, _isolatedRunMapping.AsSlice()); _runOriginalTypes = new MappedSlice(_originalTypes, _isolatedRunMapping.AsSlice()); _runLevels = new MappedSlice(_resolvedLevels, _isolatedRunMapping.AsSlice()); if (_hasBrackets) { _runPairedBracketTypes = new MappedSlice(_pairedBracketTypes, _isolatedRunMapping.AsSlice()); _runPairedBracketValues = new MappedSlice(_pairedBracketValues, _isolatedRunMapping.AsSlice()); } _runLevel = runLevel; _runDirection = DirectionFromLevel(runLevel); _runLength = _runResolvedTypes.Length; // By tracking the types of characters known to be in the current run, we can // skip some of the rules that we know won't apply. The flags will be // initialized while we're processing rule W1 below. bool hasEN = false; bool hasAL = false; bool hasES = false; bool hasCS = false; bool hasAN = false; bool hasET = false; // Rule W1 // Also, set hasXX flags int i; var prevType = sos; for (i = 0; i < _runLength; i++) { var t = _runResolvedTypes[i]; switch (t) { case Directionality.NSM: _runResolvedTypes[i] = prevType; break; case Directionality.LRI: case Directionality.RLI: case Directionality.FSI: case Directionality.PDI: prevType = Directionality.ON; break; case Directionality.EN: hasEN = true; prevType = t; break; case Directionality.AL: hasAL = true; prevType = t; break; case Directionality.ES: hasES = true; prevType = t; break; case Directionality.CS: hasCS = true; prevType = t; break; case Directionality.AN: hasAN = true; prevType = t; break; case Directionality.ET: hasET = true; prevType = t; break; default: prevType = t; break; } } // Rule W2 if (hasEN) { for (i = 0; i < _runLength; i++) { if (_runResolvedTypes[i] == Directionality.EN) { for (int j = i - 1; j >= 0; j--) { var t = _runResolvedTypes[j]; if (t == Directionality.L || t == Directionality.R || t == Directionality.AL) { if (t == Directionality.AL) { _runResolvedTypes[i] = Directionality.AN; hasAN = true; } break; } } } } } // Rule W3 if (hasAL) { for (i = 0; i < _runLength; i++) { if (_runResolvedTypes[i] == Directionality.AL) { _runResolvedTypes[i] = Directionality.R; } } } // Rule W4 if ((hasES || hasCS) && (hasEN || hasAN)) { for (i = 1; i < _runLength - 1; ++i) { ref var rt = ref _runResolvedTypes[i]; if (rt == Directionality.ES) { var prevSepType = _runResolvedTypes[i - 1]; var succSepType = _runResolvedTypes[i + 1]; if (prevSepType == Directionality.EN && succSepType == Directionality.EN) { // ES between EN and EN rt = Directionality.EN; } } else if (rt == Directionality.CS) { var prevSepType = _runResolvedTypes[i - 1]; var succSepType = _runResolvedTypes[i + 1]; if ((prevSepType == Directionality.AN && succSepType == Directionality.AN) || (prevSepType == Directionality.EN && succSepType == Directionality.EN)) { // CS between (AN and AN) or (EN and EN) rt = prevSepType; } } } } // Rule W5 if (hasET && hasEN) { for (i = 0; i < _runLength; ++i) { if (_runResolvedTypes[i] == Directionality.ET) { // Locate end of sequence int seqStart = i; int seqEnd = i; while (seqEnd < _runLength && _runResolvedTypes[seqEnd] == Directionality.ET) seqEnd++; // Preceeded by, or followed by EN? if ((seqStart == 0 ? sos : _runResolvedTypes[seqStart - 1]) == Directionality.EN || (seqEnd == _runLength ? eos : _runResolvedTypes[seqEnd]) == Directionality.EN) { // Change the entire range for (int j = seqStart; i < seqEnd; ++i) { _runResolvedTypes[i] = Directionality.EN; } } // continue at end of sequence i = seqEnd; } } } // Rule W6 if (hasES || hasET || hasCS) { for (i = 0; i < _runLength; ++i) { ref var t = ref _runResolvedTypes[i]; if (t == Directionality.ES || t == Directionality.ET || t == Directionality.CS) { t = Directionality.ON; } } } // Rule W7. if (hasEN) { var prevStrongType = sos; for (i = 0; i < _runLength; ++i) { ref var rt = ref _runResolvedTypes[i]; if (rt == Directionality.EN) { // If prev strong type was an L change this to L too if (prevStrongType == Directionality.L) { _runResolvedTypes[i] = Directionality.L; } } // Remember previous strong type (NB: AL should already be changed to R) if (rt == Directionality.L || rt == Directionality.R) { prevStrongType = rt; } } } // Rule N0 - process bracket pairs if (_hasBrackets) { int count; var pairedBrackets = LocatePairedBrackets(); for (i = 0, count = pairedBrackets.Count; i < count; i++) { var pb = pairedBrackets[i]; var dir = InspectPairedBracket(pb); // Case "d" - no strong types in the brackets, ignore if (dir == Directionality.ON) { continue; } // Case "b" - strong type found that matches the embedding direction if ((dir == Directionality.L || dir == Directionality.R) && dir == _runDirection) { SetPairedBracketDirection(pb, dir); continue; } // Case "c" - found opposite strong type found, look before to establish context dir = InspectBeforePairedBracket(pb, sos); if (dir == _runDirection || dir == Directionality.ON) { dir = _runDirection; } SetPairedBracketDirection(pb, dir); } } // Rules N1 and N2 - resolve neutral types for (i = 0; i < _runLength; ++i) { var t = _runResolvedTypes[i]; if (IsNeutralType(t)) { // Locate end of sequence int seqStart = i; int seqEnd = i; while (seqEnd < _runLength && IsNeutralType(_runResolvedTypes[seqEnd])) seqEnd++; // Work out the preceding type Directionality typeBefore; if (seqStart == 0) { typeBefore = sos; } else { typeBefore = _runResolvedTypes[seqStart - 1]; if (typeBefore == Directionality.AN || typeBefore == Directionality.EN) { typeBefore = Directionality.R; } } // Work out the following type Directionality typeAfter; if (seqEnd == _runLength) { typeAfter = eos; } else { typeAfter = _runResolvedTypes[seqEnd]; if (typeAfter == Directionality.AN || typeAfter == Directionality.EN) { typeAfter = Directionality.R; } } // Work out the final resolved type Directionality resolvedType; if (typeBefore == typeAfter) { // Rule N1 resolvedType = typeBefore; } else { // Rule N2 resolvedType = _runDirection; } // Apply changes for (int j = seqStart; j < seqEnd; j++) { _runResolvedTypes[j] = resolvedType; } // continue after this run i = seqEnd; } } // Rules I1 and I2 - resolve implicit types if ((_runLevel & 0x01) == 0) { // Rule I1 - even for (i = 0; i < _runLength; i++) { var t = _runResolvedTypes[i]; ref var l = ref _runLevels[i]; if (t == Directionality.R) l++; else if (t == Directionality.AN || t == Directionality.EN) l += 2; } } else { // Rule I2 - odd for (i = 0; i < _runLength; i++) { var t = _runResolvedTypes[i]; ref var l = ref _runLevels[i]; if (t != Directionality.R) l++; } } } /// /// IComparer for BracketPairs /// class PairedBracketComparer : IComparer { int IComparer.Compare(BracketPair x, BracketPair y) { return x.OpeningIndex - y.OpeningIndex; } } /// /// An shared instance of the PairedBracket comparer /// static PairedBracketComparer _pairedBracketComparer = new PairedBracketComparer(); /// /// Maximum pairing depth for paired brackets /// const int MaxPairedBracketDepth = 63; /// /// Re-useable list of pending opening brackets used by the /// LocatePairedBrackets method /// List _pendingOpeningBrackets = new List(); /// /// Resolved list of paired brackets /// List _pairedBrackets = new List(); /// /// Locate all pair brackets in the current isolating run /// /// A sorted list of BracketPairs List LocatePairedBrackets() { // Clear work collections _pendingOpeningBrackets.Clear(); _pairedBrackets.Clear(); // Since List.Sort is expensive on memory if called often (it internally // allocates an ArraySorted object) and since we will rarely have many // items in this list (most paragraphs will only have a handful of bracket // pairs - if that), we use a simple linear lookup and insert most of the // time. If there are more that `sortLimit` paired brackets we abort th // linear searching/inserting and using List.Sort at the end. const int sortLimit = 8; // Process all characters in the run, looking for paired brackets for (int ich = 0, length = _runLength; ich < length; ich++) { // Ignore non-neutral characters if (_runResolvedTypes[ich] != Directionality.ON) continue; switch (_runPairedBracketTypes[ich]) { case PairedBracketType.o: if (_pendingOpeningBrackets.Count == MaxPairedBracketDepth) goto exit; _pendingOpeningBrackets.Insert(0, ich); break; case PairedBracketType.c: // see if there is a match for (int i = 0; i < _pendingOpeningBrackets.Count; i++) { if (_runPairedBracketValues[ich] == _runPairedBracketValues[_pendingOpeningBrackets[i]]) { // Add this paired bracket set var opener = _pendingOpeningBrackets[i]; if (_pairedBrackets.Count < sortLimit) { int ppi = 0; while (ppi < _pairedBrackets.Count && _pairedBrackets[ppi].OpeningIndex < opener) { ppi++; } _pairedBrackets.Insert(ppi, new BracketPair(opener, ich)); } else { _pairedBrackets.Add(new BracketPair(opener, ich)); } // remove up to and including matched opener _pendingOpeningBrackets.RemoveRange(0, i + 1); break; } } break; } } exit: // Is a sort pending? if (_pairedBrackets.Count > sortLimit) _pairedBrackets.Sort(_pairedBracketComparer); return _pairedBrackets; } /// /// Inspect a paired bracket set and determine its strong direction /// /// The paired bracket to be inpected /// The direction of the bracket set content Directionality InspectPairedBracket(BracketPair pb) { var dirEmbed = DirectionFromLevel(_runLevel); var dirOpposite = Directionality.ON; for (int ich = pb.OpeningIndex + 1; ich < pb.ClosingIndex; ich++) { var dir = GetStrongTypeN0(_runResolvedTypes[ich]); if (dir == Directionality.ON) continue; if (dir == dirEmbed) return dir; dirOpposite = dir; } return dirOpposite; } /// /// Look for a strong type before a paired bracket /// /// The paired bracket set to be inspected /// The sos in case nothing found before the bracket /// The strong direction before the brackets Directionality InspectBeforePairedBracket(BracketPair pb, Directionality sos) { for (int ich = pb.OpeningIndex - 1; ich >= 0; --ich) { var dir = GetStrongTypeN0(_runResolvedTypes[ich]); if (dir != Directionality.ON) return dir; } return sos; } /// /// Sets the direction of a bracket pair, including setting the direction of /// NSM's inside the brackets and following. /// /// The paired brackets /// The resolved direction for the bracket pair void SetPairedBracketDirection(BracketPair pb, Directionality dir) { // Set the direction of the brackets _runResolvedTypes[pb.OpeningIndex] = dir; _runResolvedTypes[pb.ClosingIndex] = dir; // Set the directionality of NSM's inside the brackets for (int i = pb.OpeningIndex + 1; i < pb.ClosingIndex; i++) { if (_runOriginalTypes[i] == Directionality.NSM) _runOriginalTypes[i] = dir; else break; } // Set the directionality of NSM's following the brackets for (int i = pb.ClosingIndex + 1; i < _runLength; i++) { if (_runOriginalTypes[i] == Directionality.NSM) _runResolvedTypes[i] = dir; else break; } } /// /// Hold the start and end index of a pair of brackets /// readonly struct BracketPair { /// /// Index of the opening bracket /// public readonly int OpeningIndex; /// /// Index of the closing bracket /// public readonly int ClosingIndex; /// /// Constructs a new paired bracket /// /// Index of the opening bracket /// Index of the closing bracket public BracketPair(int openingIndex, int closingIndex) { this.OpeningIndex = openingIndex; this.ClosingIndex = closingIndex; } } /// /// Resets whitespace levels. Implements rule L1 /// void ResetWhitespaceLevels() { for (int i = 0; i < _resolvedLevels.Length; i++) { var t = _originalTypes[i]; if (t == Directionality.B || t == Directionality.S) { // Rule L1, clauses one and two. _resolvedLevels[i] = _paragraphEmbeddingLevel; // Rule L1, clause three. for (int j = i - 1; j >= 0; --j) { if (IsWhitespace(_originalTypes[j])) { // including format // codes _resolvedLevels[j] = _paragraphEmbeddingLevel; } else { break; } } } } // Rule L1, clause four. for (int j = _resolvedLevels.Length - 1; j >= 0; j--) { if (IsWhitespace(_originalTypes[j])) { // including format codes _resolvedLevels[j] = _paragraphEmbeddingLevel; } else { break; } } } /// /// Assign levels to any characters that would be have been /// removed by rule X9. The idea is to keep level runs together /// that would otherwise be broken by an interfering isolate/embedding /// control character. /// void AssignLevelsToCodePointsRemovedByX9() { // Redundant? if (!_hasIsolates && !_hasEmbeddings) return; // No-op? if (_workingTypes.Length == 0) return; // Fix up first character if (_resolvedLevels[0] < 0) _resolvedLevels[0] = _paragraphEmbeddingLevel; if (IsRemovedByX9(_originalTypes[0])) _workingTypes[0] = _originalTypes[0]; for (int i = 1, length = _workingTypes.Length; i < length; i++) { var t = _originalTypes[i]; if (IsRemovedByX9(t)) { _workingTypes[i] = t; _resolvedLevels[i] = _resolvedLevels[i - 1]; } } } /// /// Check if a directionality type represents whitepsace /// /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] static bool IsWhitespace(Directionality biditype) { switch (biditype) { case Directionality.LRE: case Directionality.RLE: case Directionality.LRO: case Directionality.RLO: case Directionality.PDF: case Directionality.LRI: case Directionality.RLI: case Directionality.FSI: case Directionality.PDI: case Directionality.BN: case Directionality.WS: return true; default: return false; } } /// /// Convert a level to a direction where odd is RTL and /// even is LTR /// /// The level to convert /// A directionality [MethodImpl(MethodImplOptions.AggressiveInlining)] static Directionality DirectionFromLevel(int level) { return ((level & 0x1) == 0) ? Directionality.L : Directionality.R; } /// /// Helper to check if a directionality is removed by rule X9 /// /// The bidi type to check /// True if rule X9 would remove this character; otherwise false [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsRemovedByX9(Directionality biditype) { switch (biditype) { case Directionality.LRE: case Directionality.RLE: case Directionality.LRO: case Directionality.RLO: case Directionality.PDF: case Directionality.BN: return true; default: return false; } } /// /// Check if a a directionality is neutral for rules N1 and N2 /// /// /// [MethodImpl(MethodImplOptions.AggressiveInlining)] bool IsNeutralType(Directionality dir) { switch (dir) { case Directionality.B: case Directionality.S: case Directionality.WS: case Directionality.ON: case Directionality.RLI: case Directionality.LRI: case Directionality.FSI: case Directionality.PDI: return true; } return false; } /// /// Maps a direction to a strong type for rule N0 /// /// The direction to map /// A strong direction - R, L or ON [MethodImpl(MethodImplOptions.AggressiveInlining)] Directionality GetStrongTypeN0(Directionality dir) { switch (dir) { case Directionality.EN: case Directionality.AN: case Directionality.AL: case Directionality.R: return Directionality.R; case Directionality.L: return Directionality.L; default: return Directionality.ON; } } } }