// RichTextKit
// Copyright © 2019-2020 Topten Software. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this product except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Topten.RichTextKit.Utils;
namespace Topten.RichTextKit.Utils
{
///
/// Represents a buffer of UTF-32 encoded code point data
///
public class Utf32Buffer : Buffer
{
///
/// Constructs a new Utf32Buffer
///
public Utf32Buffer()
{
}
///
/// Constructs a Utf32 buffer with an initial string
///
/// The string to initialize with
public Utf32Buffer(string str)
{
Add(str);
}
///
/// Clears this buffer.
///
public new void Clear()
{
_surrogatePositionsValid = false;
base.Clear();
}
///
/// Appends utf32 data to this buffer
///
/// The UTF32 data to be appended
/// A slice representing the added UTF-32 data.
public new Slice Add(Slice data)
{
_surrogatePositionsValid = false;
return base.Add(data);
}
///
/// Appends text to this buffer, converting from UTF-16 to UTF-32
///
/// The string of text to be inserted
/// A slice representing the added UTF-32 data.
public Slice Add(string str)
{
return Insert(Length, str);
}
///
/// Appends text to this buffer, converting from UTF-16 to UTF-32
///
/// The string of text to be inserted
/// A slice representing the added UTF-32 data.
public Slice Add(ReadOnlySpan str)
{
return Insert(Length, str);
}
///
/// Appends utf32 data to this buffer
///
/// Position to insert the string
/// The string of text to be appended
/// A slice representing the added UTF-32 data.
public new Slice Insert(int position, Slice data)
{
_surrogatePositionsValid = false;
return base.Insert(position, data);
}
///
/// Inserts text to this buffer, converting from UTF-16 to UTF-32
///
/// The position to insert the string
/// The string of text to be inserted
/// A slice representing the added UTF-32 data.
public Slice Insert(int position, string str)
{
return Insert(position, str.AsSpan());
}
///
/// Inserts text to this buffer, converting from UTF-16 to UTF-32
///
/// The position to insert the string
/// The string of text to be inserted
/// A slice representing the added UTF-32 data.
public Slice Insert(int position, ReadOnlySpan str)
{
// Remember old length
int oldLength = Length;
// Invalidate surrogate positions
_surrogatePositionsValid = false;
// For performance reasons and to save copying to intermediate arrays if we use
// (Encoding.UTF32), we do our own utf16 to utf32 decoding directly to our
// internal code point buffer. Also stores the indicies of any surrogate pairs
// for later back conversion.
// Also use pointers for performance reasons too (maybe)
Slice codePointBuffer = base.Insert(position, str.Length);
int convertedLength;
unsafe
{
fixed (int* pDestBuf = codePointBuffer.Underlying)
fixed (char* pSrcBuf = str)
{
int* pDestStart = pDestBuf + codePointBuffer.Start;
int* pDest = pDestStart;
char* pSrc = pSrcBuf;
char* pSrcEnd = pSrcBuf + str.Length;
while (pSrc < pSrcEnd)
{
char ch = *pSrc++;
if (ch >= 0xD800 && ch <= 0xDFFF)
{
if (ch <= 0xDBFF)
{
// High surrogate
var chL = pSrc < pSrcEnd ? (*pSrc++) : 0;
*pDest++ = 0x10000 | ((ch - 0xD800) << 10) | (chL - 0xDC00);
}
else
{
// Single low surrogte?
*pDest++ = 0x10000 + ch - 0xDC00;
}
}
else
{
*pDest++ = ch;
}
}
// Work out the converted length
convertedLength = (int)(pDest - pDestStart);
}
}
// If converted length was shorter due to surrogates, then remove
// the extra space that was allocated
if (convertedLength < str.Length)
{
base.Delete(position + convertedLength, str.Length - convertedLength);
}
// Return the encapsulating slice
return SubSlice(position, convertedLength);
}
///
/// Delete a section of the buffer
///
/// The position to delete from
/// The length to of the deletion
public new void Delete(int from, int length)
{
_surrogatePositionsValid = false;
base.Delete(from, length);
}
///
/// Convers an offset into this buffer to a UTF-16 offset in the originally
/// added string.
///
///
/// This function assumes the was text added to the buffer as UTF-16
/// and hasn't been modified in any way since.
///
/// The UTF-3232 offset to convert
/// The converted UTF-16 character offset
public int Utf32OffsetToUtf16Offset(int utf32Offset)
{
// Make sure surrorgate positions are valid
BuildSurrogatePositions();
// How many surrogate pairs were there before this utf32 offset?
int pos = _surrogatePositions.BinarySearch(utf32Offset);
if (pos < 0)
{
pos = ~pos;
}
return utf32Offset + pos;
}
///
/// Converts an offset in the original UTF-16 string, a code point index into
/// this UTF-32 buffer.
///
/// The utf-16 character index
/// The utf-32 code point index
public int Utf16OffsetToUtf32Offset(int utf16Offset)
{
// Make sure surrorgate positions are valid
BuildSurrogatePositions();
var pos = utf16Offset;
for (int i = 0; i < _surrogatePositions.Count; i++)
{
var sp = _surrogatePositions[i];
if (sp < pos)
pos--;
if (sp > pos)
return pos;
}
return pos;
}
///
/// Gets the enture buffer's content as a string.
///
///
public override string ToString()
{
return Utf32Utils.FromUtf32(AsSlice());
}
///
/// Gets a part of the buffer as a string.
///
/// The UTF-32 code point index of the first character to retrieve
/// The number of code points in the string to be retrieved
/// A string equivalent to the specified code point range.
public string GetString(int start, int length)
{
return Utf32Utils.FromUtf32(SubSlice(start, length));
}
///
/// Indicies of all code points in the in the buffer
/// that were decoded from a surrogate pair
///
List _surrogatePositions = new List();
bool _surrogatePositionsValid = false;
///
/// Build an array indicies to all characters that require surrogates
/// when converted to utf16.
///
void BuildSurrogatePositions()
{
if (_surrogatePositionsValid)
return;
_surrogatePositionsValid = true;
_surrogatePositions.Clear();
unsafe
{
fixed (int* pBuf = this.Underlying)
{
int* pEnd = pBuf + this.Length;
int* p = pBuf;
while (p < pEnd)
{
if (p[0] >= 0x10000)
_surrogatePositions.Add((int)(p - pBuf));
p++;
}
}
}
}
}
}