123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613 |
- using System;
- using System.Collections.Generic;
- using System.Data;
- using System.Diagnostics;
- using System.Text;
- namespace FastReport.RichTextParser
- {
- enum ParserStatus
- {
- Collecting,
- Text,
- ControlTag,
- OpenBlock,
- CloseBlock
- }
- /// <summary>
- /// This class detect a RTF control sequences and text.
- /// </summary>
- class RTF_Parser
- {
- enum ParserState
- {
- Neutral,
- Control,
- Number,
- FirstNibble,
- SecondNibble,
- SkipToNext,
- RichFormatExtensions,
- CheckHyphen,
- IgnoreSpaces
- }
- ParserStatus status;
- ParserState parser_state;
- StringBuilder control;
- StringBuilder number;
- StringBuilder text;
- bool has_value;
- bool list_is_active;
- string control_tag;
- string previous_tag;
- long control_num;
- string parsed_text;
- char delimiter;
- bool has_has_value;
- int indirection_counter;
- int skip_counter;
- int extensions_skip_counter;
- // 20200605
- internal bool override_default_color;
- // 20180511
- internal ParagraphFormat current_paragraph_format;
- internal RunFormat current_run_format;
- internal bool insideTable;
- #region Uniocode converters
- internal int current_lang;
- internal long font_charset;
- int default_lang;
- const int Lang_EN_US = 1033;
- Decoder current_unicode_decoder;
- Dictionary<long, Decoder> unicode_decoders = new Dictionary<long, Decoder>();
- Stack<int> lang_ids = new Stack<int>();
- Dictionary<long, long> translate_charset = new Dictionary<long, long>();
- List<byte> raw_chars = new List<byte>();
- #endregion
- public string Text { get { return parsed_text; } }
- public string Control { get { return control_tag; } }
- public string PreviousTag { get { return previous_tag; } }
- public long Number { get { return control_num; } }
- public char Delimiter { get { return status == ParserStatus.ControlTag ? delimiter : '\0'; } }
- public ParserStatus Status { get { return status; } }
- public bool HasValue { get { return has_has_value; } }
- public bool ListItem { get { return list_is_active; } set { list_is_active = value; } }
- public bool EndOfFile
- {
- get
- {
- if (text.Length == 0)
- return false;
- parsed_text = text.ToString();
- return true;
- }
- }
- public void ClearParsedText() { parsed_text = String.Empty; }
- #region Language selector and translator
- private void SelectUnicodeDecoder(int lcid)
- {
- if (lcid == 0)
- lcid = default_lang == 0 ? Lang_EN_US : default_lang;
- if (!unicode_decoders.ContainsKey(lcid))
- {
- System.Globalization.CultureInfo ci;
- try
- {
- ci = System.Globalization.CultureInfo.GetCultureInfo(lcid);
- }
- catch (Exception)
- {
- ci = System.Globalization.CultureInfo.CurrentCulture;
- }
- Encoding encoder = Encoding.GetEncoding(ci.TextInfo.ANSICodePage);
- current_unicode_decoder = encoder.GetDecoder();
- unicode_decoders.Add(lcid, current_unicode_decoder);
- }
- else
- {
- current_unicode_decoder = unicode_decoders[lcid];
- }
- current_lang = lcid;
- }
- int TranslateCharset(long charset)
- {
- switch (charset)
- {
- case 0: return 1033; // ANSI
- case 1: return default_lang; // default
- case 2: return 42; // Symbol - to fix
- case 77: return 10000; // Mac romant - to fix
- case 78: return 10001; // Mac Shift Jis - to fix
- case 79: return 10003; // Mac Hangul - to fix
- case 80: return 10008; // Mac GB2312 - to fix
- case 81: return 10002; // Mac Big5 - to fix
- //case 82: return 10002; // Johab old
- case 83: return 10005; // Mac Hebrew - to fix
- case 84: return 10004; // Mac Arabic - to fix
- case 85: return 10006; // Mac Greek - to fix
- case 86: return 10081; // Mac Turkish - to fix
- case 87: return 10021; // Mac Thai - to fix
- case 88: return 10029; // Mac East Europe - to fix
- case 89: return 10007; // Mac Russian - to fix
- case 128: return 1041; // 932; Shift JIS
- case 129: return 1042; // 949; Korean Hangul
- case 130: return default_lang; // 1361; Korean Johab
- case 134: return 2052; // 936; GB2312
- case 136: return 1028; // 950; BIG5
- case 161: return 1032; // 1253; Greel
- case 162: return 1055; // 1254; Turkish
- case 163: return 1066; // 1258; Vietnamese
- case 177: return 1037; // 1255; Hebrew
- case 178: return 1056; // 1256; Arabic
- //case 179: return 0; // Arabic Traditional (old)
- //case 180: return 0; // Arabic user (old)
- //case 181: return 0; // Hebrew user (old)
- case 186: return 1062; // 1257; Baltic
- case 204: return 1049; // 1251; Russian
- case 222: return 1054; // 874; Thai
- case 238: return 1045; // 1250; East Europe (Polland selected)
- case 254: return current_lang; // 437; // PC437
- case 255: return current_lang; // 850; // OEM
- }
- return current_lang;
- }
- internal void SelectCodepageByFontCharset(long charset)
- {
- int lcid = TranslateCharset(charset);
- SelectUnicodeDecoder(lcid);
- }
- private void PushLocaleDecoder()
- {
- lang_ids.Push(current_lang);
- }
- private void PopLocaleDecoder()
- {
- if (lang_ids.Count != 0)
- {
- current_lang = lang_ids.Pop();
- SelectUnicodeDecoder(current_lang);
- }
- else
- {
- #if DEBUG
- Debug.WriteLine("Rich document error: broken source document structure. Ignore error and continue");
- #else
- throw new SyntaxErrorException("Rich document structure error - broken source document");
- #endif
- }
- }
- private void CollectCharacters()
- {
- try
- {
- byte hex = byte.Parse(number.ToString(), System.Globalization.NumberStyles.HexNumber);
- number = new StringBuilder();
- raw_chars.Add(hex);
- }
- catch (Exception e)
- {
- ;
- }
- }
- //private void TranslateUnicode(System.Globalization.NumberStyles num_style)
- //{
- // uint unichar = uint.Parse(number.ToString(), num_style);
- // number.Length = 0;
- // byte[] conv = new byte[2];
- // char[] chars = new char[2];
- // conv[0] = (byte)unichar;
- // conv[1] = 0;
- // if (current_lang == 0)
- // SelectUnicodeDecoder(default_lang == 0 ? 1033 : default_lang);
- // current_unicode_decoder.GetChars(conv, 0, 1, chars, 0);
- // text.Append(chars[0]);
- //}
- #endregion
- private void ControlWord()
- {
- previous_tag = control_tag;
- control_tag = control.ToString();
- control_num = number.Length != 0 ? long.Parse(number.ToString()) : 0;
- if (control_tag == "lang")
- {
- if (font_charset == 0)
- SelectUnicodeDecoder((int)control_num);
- }
- if (control_tag == "deflang")
- {
- SelectUnicodeDecoder((int)control_num);
- default_lang = current_lang;
- }
- RestoreEncodedText();
- parsed_text = text.ToString();
- control.Length = 0;
- number.Length = 0;
- text.Length = 0;
- has_has_value = has_value;
- has_value = false;
- }
- private void RestoreEncodedText()
- {
- if (raw_chars.Count != 0)
- {
- byte[] arr = raw_chars.ToArray();
- char[] result = new char[raw_chars.Count];
- int count = current_unicode_decoder.GetChars(arr, 0, raw_chars.Count, result, 0);
- char[] str = new char[count];
- Array.Copy(result, str, count);
- string text = new string(str);
- this.text.Append(text);
- raw_chars.Clear();
- }
- }
- private void AppendCharacter(char ch)
- {
- RestoreEncodedText();
- this.text.Append(ch);
- }
- internal ParserStatus ParseByte(char ch)
- {
- //Console.Write(ch);
- status = ParserStatus.Collecting;
- delimiter = ch;
- if (ch == '{')
- {
- indirection_counter++;
- PushLocaleDecoder();
- }
- if (ch == '}')
- {
- PopLocaleDecoder();
- indirection_counter--;
- }
- bool loop;
- do
- {
- loop = false;
- switch (parser_state)
- {
- case ParserState.Neutral:
- switch (ch)
- {
- case '{':
- ControlWord();
- status = ParserStatus.OpenBlock;
- break;
- case '}':
- ControlWord();
- status = ParserStatus.CloseBlock;
- break;
- case '\\':
- parser_state = ParserState.Control;
- break;
- default:
- switch (ch)
- {
- case '\r':
- case '\n':
- case '\t':
- case '\0':
- break;
- default:
- AppendCharacter(ch);
- break;
- }
- break;
- }
- break;
- case ParserState.CheckHyphen:
- if (char.IsDigit(ch))
- {
- number.Append('-');
- number.Append(ch);
- parser_state = ParserState.Number;
- has_value = true;
- break;
- }
- // Substitute Optional HYPHEN with ZERO WIDTH SPACE
- AppendCharacter((char)8203);
- parser_state = ParserState.Neutral;
- status = ParseByte(ch);
- break;
- case ParserState.Control:
- if (char.IsLetter(ch))
- {
- control.Append(ch);
- }
- else if (ch == '-')
- {
- parser_state = ParserState.CheckHyphen;
- }
- else if (char.IsDigit(ch))
- {
- number.Append(ch);
- parser_state = ParserState.Number;
- has_value = true;
- }
- else if (ch == '\\')
- {
- if (control.Length > 0)
- {
- ControlWord();
- status = ParserStatus.ControlTag;
- }
- else
- {
- AppendCharacter(ch);
- parser_state = ParserState.Neutral;
- }
- }
- else if (ch == '{')
- {
- if (control.Length > 0)
- {
- ControlWord();
- status = ParserStatus.OpenBlock;
- }
- else
- {
- AppendCharacter(ch);
- status = ParserStatus.Collecting;
- }
- parser_state = ParserState.Neutral;
- }
- else if (ch == '}')
- {
- if (control.Length > 0)
- {
- ControlWord();
- status = ParserStatus.CloseBlock;
- }
- else
- {
- AppendCharacter(ch);
- status = ParserStatus.Collecting;
- }
- parser_state = ParserState.Neutral;
- }
- else if (char.IsWhiteSpace(ch))
- {
- parser_state = ParserState.IgnoreSpaces;
- ControlWord();
- status = ParserStatus.ControlTag;
- }
- else if (ch == '*')
- {
- #if false // Preivous version which ignore pictures in \* control (20210211)
- parser_state = ParserState.RichFormatExtensions;
- if (indirection_counter == 0)
- throw new Exception("Broken RTF format");
- extensions_skip_counter = indirection_counter - 1;
- #else
- parser_state = ParserState.Neutral;
- ControlWord();
- status = ParserStatus.ControlTag;
- #endif
- }
- else if (ch == ';')
- {
- parser_state = ParserState.Neutral;
- ControlWord();
- status = ParserStatus.ControlTag;
- }
- else if (ch == '\'')
- {
- parser_state = ParserState.FirstNibble;
- }
- else if (ch == '~')
- {
- // Non-breaking space
- AppendCharacter((char)0x00a0);
- parser_state = ParserState.Neutral;
- }
- else if (ch == '_')
- {
- // Non-breaking hyphen
- AppendCharacter((char)0x2011);
- parser_state = ParserState.Neutral;
- }
- else if (ch == '.')
- {
- AppendCharacter('.');
- parser_state = ParserState.Neutral;
- }
- else if (ch == ')')
- {
- AppendCharacter(')');
- parser_state = ParserState.Neutral;
- }
- else
- throw new Exception("RTF format not parsed");
- break;
- case ParserState.Number:
- if (char.IsDigit(ch))
- number.Append(ch);
- else
- {
- if (ch == '{')
- {
- parser_state = ParserState.Neutral;
- status = ParserStatus.OpenBlock;
- }
- else if (ch == '}')
- {
- parser_state = ParserState.Neutral;
- status = ParserStatus.CloseBlock;
- }
- else
- {
- if (this.control.ToString() == "u")
- {
- int bukva = int.Parse(number.ToString());
- AppendCharacter((char)bukva);
- number.Length = 0;
- if (ch != '?')
- {
- parser_state = ParserState.SkipToNext;
- skip_counter = 3;
- }
- else
- {
- parser_state = ParserState.Neutral;
- control.Length = 0;
- }
- break;
- }
- else if (ch == '\\')
- parser_state = ParserState.Control;
- else if (ch == ';' || char.IsWhiteSpace(ch))
- parser_state = ParserState.Neutral;
- status = ParserStatus.ControlTag;
- }
- ControlWord();
- }
- break;
- case ParserState.FirstNibble:
- parser_state = ParserState.SecondNibble;
- number.Append(ch);
- break;
- case ParserState.SecondNibble:
- number.Append(ch);
- CollectCharacters();
- //TranslateUnicode(System.Globalization.NumberStyles.HexNumber);
- parser_state = ParserState.Neutral;
- break;
- case ParserState.SkipToNext: // Ignore hexdecmal representation of the character
- skip_counter--;
- if (skip_counter == 0)
- {
- parser_state = ParserState.Neutral;
- control.Length = 0;
- }
- break;
- case ParserState.RichFormatExtensions:
- status = ParseExtensionByte(ch);
- break;
- case ParserState.IgnoreSpaces:
- if (ch == ' ')
- {
- // Debug.WriteLine("Skip space");
- }
- else
- {
- parser_state = ParserState.Neutral;
- loop = true;
- }
- break;
- }
- } while (loop);
- return status;
- }
- #if false // Debug
- StringBuilder dbg = new StringBuilder();
- private ParserStatus ParseExtensionByte(char ch)
- {
- if (extensions_skip_counter == indirection_counter)
- {
- dbg.Append("\n\n");
- parser_state = ParserState.Neutral;
- dbg.Clear();
- }
- else
- dbg.Append(ch);
- return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
- }
- #else
- private ParserStatus ParseExtensionByte(char ch)
- {
- if (extensions_skip_counter == indirection_counter)
- {
- parser_state = ParserState.Neutral;
- }
- return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
- }
- #endif
- internal void ResetRunFormat()
- {
- current_run_format.bold = false;
- current_run_format.italic = false;
- current_run_format.underline = false;
- current_run_format.font_size = 24;
- current_run_format.color = System.Drawing.Color.Black;
- current_run_format.BColor = System.Drawing.Color.White;
- current_run_format.FillColor = System.Drawing.Color.White;
- current_run_format.font_idx = 0;
- current_run_format.script_type = RunFormat.ScriptType.PlainText;
- }
- public void ResetParagraphFormat()
- {
- current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Left;
- current_paragraph_format.line_spacing = 0;
- current_paragraph_format.space_before = 0;
- current_paragraph_format.space_after = 0;
- current_paragraph_format.left_indent = 0;
- current_paragraph_format.right_indent = 0;
- current_paragraph_format.first_line_indent = 0;
- current_paragraph_format.lnspcmult = ParagraphFormat.LnSpcMult.Exactly;
- current_paragraph_format.pnstart = 0;
- current_lang = default_lang;
- current_paragraph_format.list_id = null;
- current_paragraph_format.tab_positions = null;
- }
- internal RTF_Parser()
- {
- parser_state = ParserState.Neutral;
- control = new StringBuilder();
- number = new StringBuilder(12, 12);
- text = new StringBuilder();
- has_value = false;
- override_default_color = false;
- current_lang = 0;
- indirection_counter = 0;
- ResetRunFormat();
- ResetParagraphFormat();
- }
- static RTF_Parser()
- {
- #if CROSSPLATFORM || COREWIN
- Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
- #endif
- }
- }
- }
|