using System;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Text;
namespace FastReport.RichTextParser
{
enum ParserStatus
{
Collecting,
Text,
ControlTag,
OpenBlock,
CloseBlock
}
///
/// This class detect a RTF control sequences and text.
///
class RTF_Parser
{
enum ParserState
{
Neutral,
Control,
Number,
FirstNibble,
SecondNibble,
SkipToNext,
RichFormatExtensions,
CheckHyphen,
IgnoreSpaces
}
ParserStatus status;
ParserState parser_state;
StringBuilder control;
StringBuilder number;
StringBuilder text;
bool has_value;
bool list_is_active;
string control_tag;
string previous_tag;
long control_num;
string parsed_text;
char delimiter;
bool has_has_value;
int indirection_counter;
int skip_counter;
int extensions_skip_counter;
// 20200605
internal bool override_default_color;
// 20180511
internal ParagraphFormat current_paragraph_format;
internal RunFormat current_run_format;
internal bool insideTable;
#region Uniocode converters
internal int current_lang;
internal long font_charset;
int default_lang;
const int Lang_EN_US = 1033;
Decoder current_unicode_decoder;
Dictionary unicode_decoders = new Dictionary();
Stack lang_ids = new Stack();
Dictionary translate_charset = new Dictionary();
List raw_chars = new List();
#endregion
public string Text { get { return parsed_text; } }
public string Control { get { return control_tag; } }
public string PreviousTag { get { return previous_tag; } }
public long Number { get { return control_num; } }
public char Delimiter { get { return status == ParserStatus.ControlTag ? delimiter : '\0'; } }
public ParserStatus Status { get { return status; } }
public bool HasValue { get { return has_has_value; } }
public bool ListItem { get { return list_is_active; } set { list_is_active = value; } }
public bool EndOfFile
{
get
{
if (text.Length == 0)
return false;
parsed_text = text.ToString();
return true;
}
}
public void ClearParsedText() { parsed_text = String.Empty; }
#region Language selector and translator
private void SelectUnicodeDecoder(int lcid)
{
if (lcid == 0)
lcid = default_lang == 0 ? Lang_EN_US : default_lang;
if (!unicode_decoders.ContainsKey(lcid))
{
System.Globalization.CultureInfo ci;
try
{
ci = System.Globalization.CultureInfo.GetCultureInfo(lcid);
}
catch (Exception)
{
ci = System.Globalization.CultureInfo.CurrentCulture;
}
Encoding encoder = Encoding.GetEncoding(ci.TextInfo.ANSICodePage);
current_unicode_decoder = encoder.GetDecoder();
unicode_decoders.Add(lcid, current_unicode_decoder);
}
else
{
current_unicode_decoder = unicode_decoders[lcid];
}
current_lang = lcid;
}
int TranslateCharset(long charset)
{
switch (charset)
{
case 0: return 1033; // ANSI
case 1: return default_lang; // default
case 2: return 42; // Symbol - to fix
case 77: return 10000; // Mac romant - to fix
case 78: return 10001; // Mac Shift Jis - to fix
case 79: return 10003; // Mac Hangul - to fix
case 80: return 10008; // Mac GB2312 - to fix
case 81: return 10002; // Mac Big5 - to fix
//case 82: return 10002; // Johab old
case 83: return 10005; // Mac Hebrew - to fix
case 84: return 10004; // Mac Arabic - to fix
case 85: return 10006; // Mac Greek - to fix
case 86: return 10081; // Mac Turkish - to fix
case 87: return 10021; // Mac Thai - to fix
case 88: return 10029; // Mac East Europe - to fix
case 89: return 10007; // Mac Russian - to fix
case 128: return 1041; // 932; Shift JIS
case 129: return 1042; // 949; Korean Hangul
case 130: return default_lang; // 1361; Korean Johab
case 134: return 2052; // 936; GB2312
case 136: return 1028; // 950; BIG5
case 161: return 1032; // 1253; Greel
case 162: return 1055; // 1254; Turkish
case 163: return 1066; // 1258; Vietnamese
case 177: return 1037; // 1255; Hebrew
case 178: return 1056; // 1256; Arabic
//case 179: return 0; // Arabic Traditional (old)
//case 180: return 0; // Arabic user (old)
//case 181: return 0; // Hebrew user (old)
case 186: return 1062; // 1257; Baltic
case 204: return 1049; // 1251; Russian
case 222: return 1054; // 874; Thai
case 238: return 1045; // 1250; East Europe (Polland selected)
case 254: return current_lang; // 437; // PC437
case 255: return current_lang; // 850; // OEM
}
return current_lang;
}
internal void SelectCodepageByFontCharset(long charset)
{
int lcid = TranslateCharset(charset);
SelectUnicodeDecoder(lcid);
}
private void PushLocaleDecoder()
{
lang_ids.Push(current_lang);
}
private void PopLocaleDecoder()
{
if (lang_ids.Count != 0)
{
current_lang = lang_ids.Pop();
SelectUnicodeDecoder(current_lang);
}
else
{
#if DEBUG
Debug.WriteLine("Rich document error: broken source document structure. Ignore error and continue");
#else
throw new SyntaxErrorException("Rich document structure error - broken source document");
#endif
}
}
private void CollectCharacters()
{
try
{
byte hex = byte.Parse(number.ToString(), System.Globalization.NumberStyles.HexNumber);
number = new StringBuilder();
raw_chars.Add(hex);
}
catch (Exception e)
{
;
}
}
//private void TranslateUnicode(System.Globalization.NumberStyles num_style)
//{
// uint unichar = uint.Parse(number.ToString(), num_style);
// number.Length = 0;
// byte[] conv = new byte[2];
// char[] chars = new char[2];
// conv[0] = (byte)unichar;
// conv[1] = 0;
// if (current_lang == 0)
// SelectUnicodeDecoder(default_lang == 0 ? 1033 : default_lang);
// current_unicode_decoder.GetChars(conv, 0, 1, chars, 0);
// text.Append(chars[0]);
//}
#endregion
private void ControlWord()
{
previous_tag = control_tag;
control_tag = control.ToString();
control_num = number.Length != 0 ? long.Parse(number.ToString()) : 0;
if (control_tag == "lang")
{
if (font_charset == 0)
SelectUnicodeDecoder((int)control_num);
}
if (control_tag == "deflang")
{
SelectUnicodeDecoder((int)control_num);
default_lang = current_lang;
}
RestoreEncodedText();
parsed_text = text.ToString();
control.Length = 0;
number.Length = 0;
text.Length = 0;
has_has_value = has_value;
has_value = false;
}
private void RestoreEncodedText()
{
if (raw_chars.Count != 0)
{
byte[] arr = raw_chars.ToArray();
char[] result = new char[raw_chars.Count];
int count = current_unicode_decoder.GetChars(arr, 0, raw_chars.Count, result, 0);
char[] str = new char[count];
Array.Copy(result, str, count);
string text = new string(str);
this.text.Append(text);
raw_chars.Clear();
}
}
private void AppendCharacter(char ch)
{
RestoreEncodedText();
this.text.Append(ch);
}
internal ParserStatus ParseByte(char ch)
{
//Console.Write(ch);
status = ParserStatus.Collecting;
delimiter = ch;
if (ch == '{')
{
indirection_counter++;
PushLocaleDecoder();
}
if (ch == '}')
{
PopLocaleDecoder();
indirection_counter--;
}
bool loop;
do
{
loop = false;
switch (parser_state)
{
case ParserState.Neutral:
switch (ch)
{
case '{':
ControlWord();
status = ParserStatus.OpenBlock;
break;
case '}':
ControlWord();
status = ParserStatus.CloseBlock;
break;
case '\\':
parser_state = ParserState.Control;
break;
default:
switch (ch)
{
case '\r':
case '\n':
case '\t':
case '\0':
break;
default:
AppendCharacter(ch);
break;
}
break;
}
break;
case ParserState.CheckHyphen:
if (char.IsDigit(ch))
{
number.Append('-');
number.Append(ch);
parser_state = ParserState.Number;
has_value = true;
break;
}
// Substitute Optional HYPHEN with ZERO WIDTH SPACE
AppendCharacter((char)8203);
parser_state = ParserState.Neutral;
status = ParseByte(ch);
break;
case ParserState.Control:
if (char.IsLetter(ch))
{
control.Append(ch);
}
else if (ch == '-')
{
parser_state = ParserState.CheckHyphen;
}
else if (char.IsDigit(ch))
{
number.Append(ch);
parser_state = ParserState.Number;
has_value = true;
}
else if (ch == '\\')
{
if (control.Length > 0)
{
ControlWord();
status = ParserStatus.ControlTag;
}
else
{
AppendCharacter(ch);
parser_state = ParserState.Neutral;
}
}
else if (ch == '{')
{
if (control.Length > 0)
{
ControlWord();
status = ParserStatus.OpenBlock;
}
else
{
AppendCharacter(ch);
status = ParserStatus.Collecting;
}
parser_state = ParserState.Neutral;
}
else if (ch == '}')
{
if (control.Length > 0)
{
ControlWord();
status = ParserStatus.CloseBlock;
}
else
{
AppendCharacter(ch);
status = ParserStatus.Collecting;
}
parser_state = ParserState.Neutral;
}
else if (char.IsWhiteSpace(ch))
{
parser_state = ParserState.IgnoreSpaces;
ControlWord();
status = ParserStatus.ControlTag;
}
else if (ch == '*')
{
#if false // Preivous version which ignore pictures in \* control (20210211)
parser_state = ParserState.RichFormatExtensions;
if (indirection_counter == 0)
throw new Exception("Broken RTF format");
extensions_skip_counter = indirection_counter - 1;
#else
parser_state = ParserState.Neutral;
ControlWord();
status = ParserStatus.ControlTag;
#endif
}
else if (ch == ';')
{
parser_state = ParserState.Neutral;
ControlWord();
status = ParserStatus.ControlTag;
}
else if (ch == '\'')
{
parser_state = ParserState.FirstNibble;
}
else if (ch == '~')
{
// Non-breaking space
AppendCharacter((char)0x00a0);
parser_state = ParserState.Neutral;
}
else if (ch == '_')
{
// Non-breaking hyphen
AppendCharacter((char)0x2011);
parser_state = ParserState.Neutral;
}
else if (ch == '.')
{
AppendCharacter('.');
parser_state = ParserState.Neutral;
}
else if (ch == ')')
{
AppendCharacter(')');
parser_state = ParserState.Neutral;
}
else
throw new Exception("RTF format not parsed");
break;
case ParserState.Number:
if (char.IsDigit(ch))
number.Append(ch);
else
{
if (ch == '{')
{
parser_state = ParserState.Neutral;
status = ParserStatus.OpenBlock;
}
else if (ch == '}')
{
parser_state = ParserState.Neutral;
status = ParserStatus.CloseBlock;
}
else
{
if (this.control.ToString() == "u")
{
int bukva = int.Parse(number.ToString());
AppendCharacter((char)bukva);
number.Length = 0;
if (ch != '?')
{
parser_state = ParserState.SkipToNext;
skip_counter = 3;
}
else
{
parser_state = ParserState.Neutral;
control.Length = 0;
}
break;
}
else if (ch == '\\')
parser_state = ParserState.Control;
else if (ch == ';' || char.IsWhiteSpace(ch))
parser_state = ParserState.Neutral;
status = ParserStatus.ControlTag;
}
ControlWord();
}
break;
case ParserState.FirstNibble:
parser_state = ParserState.SecondNibble;
number.Append(ch);
break;
case ParserState.SecondNibble:
number.Append(ch);
CollectCharacters();
//TranslateUnicode(System.Globalization.NumberStyles.HexNumber);
parser_state = ParserState.Neutral;
break;
case ParserState.SkipToNext: // Ignore hexdecmal representation of the character
skip_counter--;
if (skip_counter == 0)
{
parser_state = ParserState.Neutral;
control.Length = 0;
}
break;
case ParserState.RichFormatExtensions:
status = ParseExtensionByte(ch);
break;
case ParserState.IgnoreSpaces:
if (ch == ' ')
{
// Debug.WriteLine("Skip space");
}
else
{
parser_state = ParserState.Neutral;
loop = true;
}
break;
}
} while (loop);
return status;
}
#if false // Debug
StringBuilder dbg = new StringBuilder();
private ParserStatus ParseExtensionByte(char ch)
{
if (extensions_skip_counter == indirection_counter)
{
dbg.Append("\n\n");
parser_state = ParserState.Neutral;
dbg.Clear();
}
else
dbg.Append(ch);
return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
}
#else
private ParserStatus ParseExtensionByte(char ch)
{
if (extensions_skip_counter == indirection_counter)
{
parser_state = ParserState.Neutral;
}
return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
}
#endif
internal void ResetRunFormat()
{
current_run_format.bold = false;
current_run_format.italic = false;
current_run_format.underline = false;
current_run_format.font_size = 24;
current_run_format.color = System.Drawing.Color.Black;
current_run_format.BColor = System.Drawing.Color.White;
current_run_format.FillColor = System.Drawing.Color.White;
current_run_format.font_idx = 0;
current_run_format.script_type = RunFormat.ScriptType.PlainText;
}
public void ResetParagraphFormat()
{
current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Left;
current_paragraph_format.line_spacing = 0;
current_paragraph_format.space_before = 0;
current_paragraph_format.space_after = 0;
current_paragraph_format.left_indent = 0;
current_paragraph_format.right_indent = 0;
current_paragraph_format.first_line_indent = 0;
current_paragraph_format.lnspcmult = ParagraphFormat.LnSpcMult.Exactly;
current_paragraph_format.pnstart = 0;
current_lang = default_lang;
current_paragraph_format.list_id = null;
current_paragraph_format.tab_positions = null;
}
internal RTF_Parser()
{
parser_state = ParserState.Neutral;
control = new StringBuilder();
number = new StringBuilder(12, 12);
text = new StringBuilder();
has_value = false;
override_default_color = false;
current_lang = 0;
indirection_counter = 0;
ResetRunFormat();
ResetParagraphFormat();
}
static RTF_Parser()
{
#if CROSSPLATFORM || COREWIN
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
#endif
}
}
}