RTF_Parser.cs 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Data;
  4. using System.Diagnostics;
  5. using System.Diagnostics.Contracts;
  6. using System.Globalization;
  7. using System.Text;
  8. using static FastReport.Fonts.GlyphSubstitutionClass;
  9. namespace FastReport.RichTextParser
  10. {
  11. enum ParserStatus
  12. {
  13. Collecting,
  14. Text,
  15. ControlTag,
  16. OpenBlock,
  17. CloseBlock
  18. }
  19. /// <summary>
  20. /// This class detect a RTF control sequences and text.
  21. /// </summary>
  22. class RTF_Parser
  23. {
  24. enum ParserState
  25. {
  26. Neutral,
  27. Control,
  28. Number,
  29. FirstNibble,
  30. SecondNibble,
  31. TryCollectChars,
  32. WaitHexcharPrefix,
  33. ParsePrefix_0x81_phase0,
  34. ParsePrefix_0x81_phase1,
  35. ParsePrefix_0x81_phase2,
  36. ParsePrefix_0x81_phase3,
  37. SkipToNext,
  38. RichFormatExtensions,
  39. CheckHyphen,
  40. IgnoreSpaces,
  41. ParserError
  42. }
  43. ParserStatus status;
  44. ParserState parser_state;
  45. StringBuilder control;
  46. StringBuilder number;
  47. StringBuilder text;
  48. bool has_value;
  49. bool list_is_active;
  50. string control_tag;
  51. string previous_tag;
  52. long control_num;
  53. string parsed_text;
  54. char delimiter;
  55. bool has_has_value;
  56. int indirection_counter;
  57. int skip_counter;
  58. int extensions_skip_counter;
  59. // 20200605
  60. internal bool override_default_color;
  61. // 20180511
  62. internal ParagraphFormat current_paragraph_format;
  63. internal RunFormat current_run_format;
  64. internal bool insideTable;
  65. // 20240226
  66. internal int skip_space_counter;
  67. #region Uniocode converters
  68. internal int current_lang;
  69. internal long font_charset;
  70. int default_lang;
  71. const int Lang_EN_US = 1033;
  72. Decoder current_unicode_decoder;
  73. Dictionary<long, Decoder> unicode_decoders = new Dictionary<long, Decoder>();
  74. Stack<int> lang_ids = new Stack<int>();
  75. Dictionary<long, long> translate_charset = new Dictionary<long, long>();
  76. List<byte> raw_chars = new List<byte>();
  77. #endregion
  78. public string Text { get { return parsed_text; } }
  79. public string Control { get { return control_tag; } }
  80. public string PreviousTag { get { return previous_tag; } }
  81. public long Number { get { return control_num; } }
  82. public char Delimiter { get { return status == ParserStatus.ControlTag ? delimiter : '\0'; } }
  83. public ParserStatus Status { get { return status; } }
  84. public bool HasValue { get { return has_has_value; } }
  85. public bool ListItem { get { return list_is_active; } set { list_is_active = value; } }
  86. public bool EndOfFile
  87. {
  88. get
  89. {
  90. if (text.Length == 0)
  91. return false;
  92. parsed_text = text.ToString();
  93. return true;
  94. }
  95. }
  96. public void ClearParsedText() { parsed_text = String.Empty; }
  97. #region Language selector and translator
  98. private int LCID2Codepade(int lcid)
  99. {
  100. int codepage = lcid;
  101. switch(lcid)
  102. {
  103. case 1048: // Romanian — Romania ro ro 1048 418 1250
  104. case 1052: // Albanian sq sq 1052 1250
  105. case 1045: // Polish pl pl 1045 415 1250
  106. case 1038: // Hungarian hu hu 1038 1250
  107. case 2074: // Serbian — Latin sr sr-sp 2074 1250
  108. case 1060: // Slovenian sl sl 1060 424 1250
  109. case 1029: // Czech cs cs 1029 405 1250
  110. case 1050: // Croatian hr hr 1050 1250
  111. case 1051: // Slovak sk sk 1051 1250
  112. codepage = 1250;
  113. break;
  114. case 1088: // Kyrgyz — Cyrillic 1088 440 1251
  115. case 2092: // Azeri — Cyrillic az az-az 2092 1251
  116. case 1049: // Russian ru ru 1049 419 1251
  117. case 1087: // Kazakh kk kk 1087 1251
  118. case 1058: // Ukrainian uk uk 1058 422 1251
  119. case 1092: // Tatar tt tt 1092 444 1251
  120. case 3098: // Serbian — Cyrillic sr sr-sp 3098 1251
  121. case 1059: // Belarusian be be 1059 423 1251
  122. case 2115: // Uzbek — Cyrillic uz uz-uz 2115 843 1251
  123. case 1104: // Mongolian mn mn 1104 450 1251
  124. case 1026: // Bulgarian bg bg 1026 402 1251
  125. case 1071: // FYRO Macedonia mk mk 1071 1251
  126. codepage = 1251;
  127. break;
  128. case 1078: // Afrikaans af af 1078 436 1252
  129. case 1080: // Faroese fo fo 1080 438 1252
  130. case 1053: // Swedish — Sweden sv sv-se 1053 1252
  131. case 1086: // Malay — Malaysia ms ms-my 1086 1252
  132. case 1089: // Swahili sw sw 1089 441 1252
  133. case 15370: // Spanish — Paraguay es es-py 15370 1252
  134. case 14346: // Spanish — Uruguay es es-uy 14346 1252
  135. case 11273: // English — Trinidad en en-tt 11273 1252
  136. case 13321: // English—Phillippines en en-ph 13321 3409 1252
  137. case 16394: // Spanish — Bolivia es es-bo 16394 1252
  138. case 19466: // Spanish — Nicaragua es es-ni 19466 1252
  139. case 1027: // Catalan ca ca 1027 403 1252
  140. case 1030: // Danish da da 1030 406 1252
  141. case 1031: // German—Germany de de-de 1031 407 1252
  142. case 1033: // English—USA en en-us 1033 409 1252
  143. case 1034: // Spanish—Spain(Tradi) es es-es 1034 1252
  144. case 1035: // Finnish fi fi 1035 1252
  145. case 1036: // French — France fr fr-fr 1036 1252
  146. case 1057: // Indonesian id id 1057 421 1252
  147. case 1040: // Italian—Italy it it-it 1040 410 1252
  148. case 1069: // Basque eu eu 1069 1252
  149. case 1043: // Dutch — Netherlands nl nl-nl 1043 413 1252
  150. case 1044: // Norwegian — Bokml nb no-no 1044 414 1252
  151. case 1046: // Portuguese — Brazil pt pt-br 1046 416 1252
  152. case 13322: // Spanish—Chile es es-cl 13322 1252
  153. case 18422: // Spanish — Honduras es es-hn 18442 1252
  154. case 1110: // Galician gl 1110 456 1252
  155. case 17418: // Spanish— El Salvador es es-sv 17418 1252
  156. case 1039: // Icelandic is is 1039 1252
  157. case 3081: // English — Australia en en-au 3081 1252
  158. case 7178: // Spanish — Dominican es es-do 7178 1252
  159. case 7177: // English—South Africa en en-za 7177 1252
  160. case 3079: // German — Austria de de-at 3079 1252
  161. case 6156: // French — Monaco fr 6156 1252
  162. case 6154: // Spanish — Panama es es-pa 6154 1252
  163. case 6153: // English — Ireland en en-ie 6153 1809 1252
  164. case 4106: // Spanish — Guatemala es es-gt 4106 1252
  165. case 5130: // Spanish — Costa Rica es es-cr 5130 1252
  166. case 2070: // Portuguese— Portugal pt pt-pt 2070 816 1252
  167. case 3084: // French — Canada fr fr-ca 3084 1252
  168. case 5129: // English— New Zealand en en-nz 5129 1409 1252
  169. case 12298: // Spanish — Ecuador es es-ec 12298 1252
  170. case 5127: // German—Liechtenstein de de-li 5127 1407 1252
  171. case 4108: // French — Switzerland fr fr-ch 4108 1252
  172. case 4103: // German — Luxembourg de de-lu 4103 1007 1252
  173. case 4105: // English — Canada en en-ca 4105 1009 1252
  174. case 5132: // French — Luxembourg fr fr-lu 5132 1252
  175. case 8202: // Spanish — Venezuela es es-ve 8202 1252
  176. case 12297: // English — Zimbabwe en 12297 3009 1252
  177. case 10250: // Spanish — Peru es es-pe 10250 1252
  178. case 10249: // English — Belize en en-bz 10249 2809 1252
  179. case 9226: // Spanish — Colombia es es-co 9226 1252
  180. case 20490: // Spanish— Puerto Rico es es-pr 20490 1252
  181. case 9225: // English — Caribbean en en-cb 9225 2409 1252
  182. case 2110: // Malay — Brunei ms ms-bn 2110 1252
  183. case 2057: // English — Gr.Britain en en-gb 2057 809 1252
  184. case 2077: // Swedish — Finland sv sv-fi 2077 1252
  185. case 2058: // Spanish — Mexico es es-mx 2058 1252
  186. case 2060: // French — Belgium fr fr-be 2060 1252
  187. case 2064: // Italian— Switzerland it it-ch 2064 810 1252
  188. case 2067: // Dutch — Belgium nl nl-be 2067 813 1252
  189. case 8201: // English — Jamaica en en-jm 8201 2009 1252
  190. case 2068: // Norwegian — Nynorsk nn no-no 2068 814 1252
  191. case 11274: // Spanish — Argentina es es-ar 11274 1252
  192. case 2055: // German — Switzerland de de-ch 2055 807 1252
  193. codepage = 1252;
  194. break;
  195. case 1032: // Greek el el 1032 408 1253
  196. codepage = 1253;
  197. break;
  198. case 1055: // Turkish tr tr 1055 1254
  199. case 1091: // Uzbek — Latin uz uz-uz 1091 443 1254
  200. case 1068: // Azeri — Latin az az-az 1068 1254
  201. codepage = 1254;
  202. break;
  203. case 1037: // Hebrew he he 1037 1255
  204. codepage = 1255;
  205. break;
  206. case 1025: // Arabic—Saudi Arabia ar ar-sa 1025 401 1256
  207. case 11265: // Arabic— Jordan ar ar-jo 11265 1256
  208. case 13313: // Arabic — Kuwait ar ar-kw 13313 3401 1256
  209. case 10241: // Arabic — Syria ar ar-sy 10241 2801 1256
  210. case 12289: // Arabic — Lebanon ar ar-lb 12289 3001 1256
  211. case 2049: // Arabic — Iraq ar ar-iq 2049 801 1256
  212. case 15361: // Arabic — Bahrain ar ar-bh 15361 1256
  213. case 9217: // Arabic — Yemen ar ar-ye 9217 2401 1256
  214. case 16385: // Arabic — Qatar ar ar-qa 16385 4001 1256
  215. case 1056: // Urdu ur ur 1056 420 1256
  216. case 3073: // Arabic — Egypt ar ar-eg 3073 1256
  217. case 14337: // Arabic — UAEmirates ar ar-ae 14337 3801 1256
  218. case 7169: // Arabic — Tunisia ar ar-tn 7169 1256
  219. case 1065: // Farsi — Persian fa fa 1065 429 1256
  220. case 6145: // Arabic — Morocco ar ar-ma 6145 1801 1256
  221. case 4097: // Arabic — Libya ar ar-ly 4097 1001 1256
  222. case 5121: // Arabic — Algeria ar ar-dz 5121 1401 1256
  223. case 8193: // Arabic — Oman ar ar-om 8193 2001 1256
  224. codepage = 1256;
  225. break;
  226. case 1063: // Lithuanian lt lt 1063 427 1257
  227. case 1062: // Latvian lv lv 1062 426 1257
  228. case 1061: // Estonian et et 1061 425 1257
  229. codepage = 1257;
  230. break;
  231. case 1066: // Vietnamese vi vi 1066 1258
  232. codepage = 1258;
  233. break;
  234. }
  235. return codepage;
  236. }
  237. private void SelectUnicodeDecoder(int lcid)
  238. {
  239. if (!unicode_decoders.ContainsKey(lcid))
  240. {
  241. int codepage = LCID2Codepade(lcid);
  242. Encoding encoder = Encoding.GetEncoding(codepage);
  243. current_unicode_decoder = encoder.GetDecoder();
  244. unicode_decoders.Add(lcid, current_unicode_decoder);
  245. }
  246. else
  247. {
  248. current_unicode_decoder = unicode_decoders[lcid];
  249. }
  250. current_lang = lcid;
  251. }
  252. int TranslateCharset(long charset)
  253. {
  254. switch (charset)
  255. {
  256. case 0: return 1033; // ANSI
  257. case 1: return default_lang; // default
  258. case 2: return 1038; // Symbol
  259. case 77: return 10000; // Mac romant - to fix
  260. case 78: return 10001; // Mac Shift Jis - to fix
  261. case 79: return 10003; // Mac Hangul - to fix
  262. case 80: return 10008; // Mac GB2312 - to fix
  263. case 81: return 10002; // Mac Big5 - to fix
  264. //case 82: return 10002; // Johab old
  265. case 83: return 10005; // Mac Hebrew - to fix
  266. case 84: return 10004; // Mac Arabic - to fix
  267. case 85: return 10006; // Mac Greek - to fix
  268. case 86: return 10081; // Mac Turkish - to fix
  269. case 87: return 10021; // Mac Thai - to fix
  270. case 88: return 10029; // Mac East Europe - to fix
  271. case 89: return 10007; // Mac Russian - to fix
  272. case 128: return 932; // Shift JIS
  273. case 129: return 949; // Korean Hangul
  274. case 130: return default_lang; // 1361; Korean Johab
  275. case 134: return 2052; // 936; GB2312
  276. case 136: return 1028; // 950; BIG5
  277. case 161: return 1253; // Greek
  278. case 162: return 1055; // Turkish
  279. case 163: return 1258; // Vietnamese // 1066; //
  280. case 177: return 1255; // Hebrew 1037; //
  281. case 178: return 1256; // Arabic 1056; //
  282. //case 179: return 0; // Arabic Traditional (old)
  283. //case 180: return 0; // Arabic user (old)
  284. //case 181: return 0; // Hebrew user (old)
  285. case 186: return 1257; // Baltic 1062; //
  286. case 204: return 1251; // Russian // 1049; //
  287. case 222: return 874; // Thai 1054; //
  288. case 238: return 1250; // East Europe (Polland selected) 1045; //
  289. case 254: return 437; // PC437
  290. case 255: return 850; // OEM
  291. }
  292. return default_lang;
  293. }
  294. internal void SelectCodepageByFontCharset(long charset)
  295. {
  296. int codepage = TranslateCharset(charset);
  297. SelectUnicodeDecoder(codepage);
  298. }
  299. private void PushLocaleDecoder()
  300. {
  301. lang_ids.Push(current_lang);
  302. }
  303. private void PopLocaleDecoder()
  304. {
  305. if (lang_ids.Count != 0)
  306. {
  307. current_lang = lang_ids.Pop();
  308. SelectUnicodeDecoder(current_lang);
  309. }
  310. else
  311. {
  312. #if DEBUG
  313. Debug.WriteLine("Rich document error: broken source document structure. Ignore error and continue");
  314. #else
  315. throw new SyntaxErrorException("Rich document structure error - broken source document");
  316. #endif
  317. }
  318. }
  319. #endregion
  320. private void ControlWord()
  321. {
  322. previous_tag = control_tag;
  323. control_tag = control.ToString();
  324. control_num = number.Length != 0 ? long.Parse(number.ToString()) : 0;
  325. if (control_tag == "lang" || control_tag == "ansicpg")
  326. {
  327. if (font_charset == 0)
  328. {
  329. SelectUnicodeDecoder((int)control_num);
  330. default_lang = (int) control_num;
  331. }
  332. }
  333. #if true // 2024 Jan 25 - deflang
  334. if (control_tag == "deflang")
  335. {
  336. if(default_lang != 0)
  337. {
  338. SelectUnicodeDecoder((int)control_num);
  339. default_lang = current_lang;
  340. }
  341. }
  342. #endif
  343. RestoreEncodedText();
  344. parsed_text = text.ToString();
  345. control.Length = 0;
  346. number.Length = 0;
  347. text.Length = 0;
  348. has_has_value = has_value;
  349. has_value = false;
  350. }
  351. private void RestoreEncodedText()
  352. {
  353. if (raw_chars.Count != 0)
  354. {
  355. byte[] arr = raw_chars.ToArray();
  356. char[] result = new char[raw_chars.Count];
  357. int count = current_unicode_decoder.GetChars(arr, 0, raw_chars.Count, result, 0);
  358. char[] str = new char[count];
  359. Array.Copy(result, str, count);
  360. string text = new string(str);
  361. this.text.Append(text);
  362. raw_chars.Clear();
  363. }
  364. number.Clear();
  365. }
  366. private void AppendCharacter(char ch)
  367. {
  368. RestoreEncodedText();
  369. this.text.Append(ch);
  370. }
  371. internal ParserStatus ParseByte(char ch)
  372. {
  373. //Console.Write(ch);
  374. byte hex;
  375. status = ParserStatus.Collecting;
  376. delimiter = ch;
  377. if (ch == '{')
  378. {
  379. indirection_counter++;
  380. PushLocaleDecoder();
  381. }
  382. if (ch == '}')
  383. {
  384. PopLocaleDecoder();
  385. indirection_counter--;
  386. }
  387. bool loop;
  388. do
  389. {
  390. loop = false;
  391. switch (parser_state)
  392. {
  393. case ParserState.Neutral:
  394. switch (ch)
  395. {
  396. case '{':
  397. ControlWord();
  398. status = ParserStatus.OpenBlock;
  399. break;
  400. case '}':
  401. ControlWord();
  402. status = ParserStatus.CloseBlock;
  403. break;
  404. case '\\':
  405. parser_state = ParserState.Control;
  406. break;
  407. default:
  408. switch (ch)
  409. {
  410. case '\r':
  411. case '\n':
  412. case '\t':
  413. case '\0':
  414. break;
  415. default:
  416. AppendCharacter(ch);
  417. break;
  418. }
  419. break;
  420. }
  421. break;
  422. case ParserState.CheckHyphen:
  423. if (char.IsDigit(ch))
  424. {
  425. number.Append('-');
  426. number.Append(ch);
  427. parser_state = ParserState.Number;
  428. has_value = true;
  429. break;
  430. }
  431. // Substitute Optional HYPHEN with ZERO WIDTH SPACE
  432. AppendCharacter((char)8203);
  433. parser_state = ParserState.Neutral;
  434. status = ParseByte(ch);
  435. break;
  436. case ParserState.Control:
  437. if (char.IsLetter(ch))
  438. {
  439. control.Append(ch);
  440. }
  441. else if (ch == '-')
  442. {
  443. parser_state = ParserState.CheckHyphen;
  444. }
  445. else if (char.IsDigit(ch))
  446. {
  447. number.Append(ch);
  448. parser_state = ParserState.Number;
  449. has_value = true;
  450. }
  451. else if (ch == '\\')
  452. {
  453. if (control.Length > 0)
  454. {
  455. ControlWord();
  456. status = ParserStatus.ControlTag;
  457. }
  458. else
  459. {
  460. AppendCharacter(ch);
  461. parser_state = ParserState.Neutral;
  462. }
  463. }
  464. else if (ch == '{')
  465. {
  466. if (control.Length > 0)
  467. {
  468. ControlWord();
  469. status = ParserStatus.OpenBlock;
  470. }
  471. else
  472. {
  473. AppendCharacter(ch);
  474. status = ParserStatus.Collecting;
  475. }
  476. parser_state = ParserState.Neutral;
  477. }
  478. else if (ch == '}')
  479. {
  480. if (control.Length > 0)
  481. {
  482. ControlWord();
  483. status = ParserStatus.CloseBlock;
  484. }
  485. else
  486. {
  487. AppendCharacter(ch);
  488. status = ParserStatus.Collecting;
  489. }
  490. parser_state = ParserState.Neutral;
  491. }
  492. else if (char.IsWhiteSpace(ch))
  493. {
  494. parser_state = ParserState.Neutral;
  495. ControlWord();
  496. status = ParserStatus.ControlTag;
  497. }
  498. else if (ch == '*')
  499. {
  500. #if false // Preivous version which ignore pictures in \* control (20210211)
  501. parser_state = ParserState.RichFormatExtensions;
  502. if (indirection_counter == 0)
  503. throw new Exception("Broken RTF format");
  504. extensions_skip_counter = indirection_counter - 1;
  505. #else
  506. parser_state = ParserState.Neutral;
  507. ControlWord();
  508. status = ParserStatus.ControlTag;
  509. #endif
  510. }
  511. else if (ch == ';')
  512. {
  513. parser_state = ParserState.Neutral;
  514. ControlWord();
  515. status = ParserStatus.ControlTag;
  516. }
  517. else if (ch == '\'')
  518. {
  519. number.Clear();
  520. parser_state = ParserState.FirstNibble;
  521. }
  522. else if (ch == '~')
  523. {
  524. // Non-breaking space
  525. AppendCharacter((char)0x00a0);
  526. parser_state = ParserState.Neutral;
  527. }
  528. else if (ch == '_')
  529. {
  530. // Non-breaking hyphen
  531. AppendCharacter((char)0x2011);
  532. parser_state = ParserState.Neutral;
  533. }
  534. else if (ch == '.')
  535. {
  536. AppendCharacter('.');
  537. parser_state = ParserState.Neutral;
  538. }
  539. else if (ch == ')')
  540. {
  541. AppendCharacter(')');
  542. parser_state = ParserState.Neutral;
  543. }
  544. else
  545. throw new Exception("RTF format not parsed");
  546. break;
  547. case ParserState.Number:
  548. if (char.IsDigit(ch))
  549. number.Append(ch);
  550. else
  551. {
  552. if (ch == '{')
  553. {
  554. parser_state = ParserState.Neutral;
  555. status = ParserStatus.OpenBlock;
  556. }
  557. else if (ch == '}')
  558. {
  559. parser_state = ParserState.Neutral;
  560. status = ParserStatus.CloseBlock;
  561. }
  562. else
  563. {
  564. if (this.control.ToString() == "u")
  565. {
  566. int bukva = int.Parse(number.ToString());
  567. if(bukva < 0)
  568. {
  569. // RTF control words generally accept signed 16-bit numbers as arguments.
  570. // For this reason, Unicode values greater than 32767 must be expressed as negative numbers.
  571. bukva = (char)bukva;
  572. if (bukva == 0xf0b7)
  573. bukva = '●';
  574. }
  575. AppendCharacter((char)bukva);
  576. number.Length = 0;
  577. if (ch != '?')
  578. {
  579. parser_state = ParserState.SkipToNext;
  580. skip_counter = 3;
  581. }
  582. else
  583. {
  584. parser_state = ParserState.Neutral;
  585. control.Length = 0;
  586. }
  587. break;
  588. }
  589. else if (ch == '\\')
  590. parser_state = ParserState.Control;
  591. else if (ch == ';' || char.IsWhiteSpace(ch))
  592. parser_state = ParserState.Neutral;
  593. status = ParserStatus.ControlTag;
  594. }
  595. ControlWord();
  596. }
  597. break;
  598. case ParserState.TryCollectChars:
  599. if(ch!= '\\')
  600. {
  601. RestoreEncodedText();
  602. parser_state = ParserState.Neutral;
  603. loop = true;
  604. continue;
  605. }
  606. parser_state = ParserState.WaitHexcharPrefix;
  607. break;
  608. case ParserState.WaitHexcharPrefix:
  609. if (ch != '\'')
  610. {
  611. RestoreEncodedText();
  612. parser_state = ParserState.Control;
  613. loop = true;
  614. continue;
  615. }
  616. parser_state = ParserState.FirstNibble;
  617. number.Clear();
  618. break;
  619. case ParserState.FirstNibble:
  620. parser_state = ParserState.SecondNibble;
  621. number.Append(ch);
  622. break;
  623. case ParserState.SecondNibble:
  624. hex = 0; // Just avoid warning
  625. try
  626. {
  627. number.Append(ch);
  628. hex = byte.Parse(number.ToString(), System.Globalization.NumberStyles.HexNumber);
  629. #if false // 2024/01/26 // This will stay disabled until we met os/platform which does not support Shift JIS encoding
  630. if (hex == 0x81)
  631. {
  632. parser_state = ParserState.ParsePrefix_0x81_phase0;
  633. break;
  634. }
  635. #endif
  636. }
  637. catch (Exception e)
  638. {
  639. Console.Error.WriteLine(e.ToString());
  640. hex = (byte) '?';
  641. }
  642. raw_chars.Add(hex);
  643. parser_state = ParserState.TryCollectChars;
  644. break;
  645. #if false // 2024/01/26 // This will stay disabled until we met os/platform which does not support Shift JIS encoding
  646. case ParserState.ParsePrefix_0x81_phase0:
  647. // if(ch == '\\')
  648. parser_state = ParserState.ParsePrefix_0x81_phase1;
  649. break;
  650. case ParserState.ParsePrefix_0x81_phase1:
  651. // if (ch == '\'')
  652. parser_state = ParserState.ParsePrefix_0x81_phase2;
  653. break;
  654. case ParserState.ParsePrefix_0x81_phase2:
  655. number.Append(ch);
  656. parser_state = ParserState.ParsePrefix_0x81_phase3;
  657. break;
  658. case ParserState.ParsePrefix_0x81_phase3:
  659. number.Append(ch);
  660. hex = byte.Parse(number.ToString(), System.Globalization.NumberStyles.HexNumber);
  661. switch(hex)
  662. {
  663. case 0x8b:
  664. text.Append('°');
  665. break;
  666. case 0x7e:
  667. text.Append('×');
  668. break;
  669. default:
  670. throw new Exception("Unparsed ANSI sequence");
  671. }
  672. number.Clear();
  673. parser_state = ParserState.Neutral;
  674. break;
  675. #endif
  676. case ParserState.SkipToNext: // Ignore hexdecmal representation of the character
  677. skip_counter--;
  678. if (skip_counter == 0)
  679. {
  680. parser_state = ParserState.Neutral;
  681. control.Length = 0;
  682. }
  683. break;
  684. case ParserState.RichFormatExtensions:
  685. status = ParseExtensionByte(ch);
  686. break;
  687. case ParserState.IgnoreSpaces:
  688. if (ch == ' ')
  689. {
  690. // Debug.WriteLine("Skip space");
  691. }
  692. else
  693. {
  694. parser_state = ParserState.Neutral;
  695. loop = true;
  696. }
  697. break;
  698. }
  699. } while (loop);
  700. return status;
  701. }
  702. #if false // Debug
  703. StringBuilder dbg = new StringBuilder();
  704. private ParserStatus ParseExtensionByte(char ch)
  705. {
  706. if (extensions_skip_counter == indirection_counter)
  707. {
  708. dbg.Append("\n\n");
  709. parser_state = ParserState.Neutral;
  710. dbg.Clear();
  711. }
  712. else
  713. dbg.Append(ch);
  714. return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
  715. }
  716. #else
  717. private ParserStatus ParseExtensionByte(char ch)
  718. {
  719. if (extensions_skip_counter == indirection_counter)
  720. {
  721. parser_state = ParserState.Neutral;
  722. }
  723. return ch == '{' ? ParserStatus.OpenBlock : ch == '}' ? ParserStatus.CloseBlock : ParserStatus.Collecting;
  724. }
  725. #endif
  726. internal void ResetRunFormat()
  727. {
  728. current_run_format.bold = false;
  729. current_run_format.italic = false;
  730. current_run_format.underline = false;
  731. current_run_format.strike = false;
  732. current_run_format.font_size = 24;
  733. current_run_format.color = System.Drawing.Color.Black;
  734. current_run_format.BColor = System.Drawing.Color.White;
  735. current_run_format.FillColor = System.Drawing.Color.White;
  736. current_run_format.font_idx = 0;
  737. current_run_format.script_type = RunFormat.ScriptType.PlainText;
  738. }
  739. public void ResetParagraphFormat()
  740. {
  741. current_paragraph_format.Valign = ParagraphFormat.VerticalAlign.Top; // 20210722
  742. current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Left;
  743. current_paragraph_format.line_spacing = 0;
  744. current_paragraph_format.space_before = 0;
  745. current_paragraph_format.space_after = 0;
  746. current_paragraph_format.left_indent = 0;
  747. current_paragraph_format.right_indent = 0;
  748. current_paragraph_format.first_line_indent = 0;
  749. current_paragraph_format.lnspcmult = ParagraphFormat.LnSpcMult.Exactly;
  750. current_paragraph_format.pnstart = 0;
  751. current_lang = default_lang;
  752. current_paragraph_format.list_id = null;
  753. current_paragraph_format.tab_positions = null;
  754. }
  755. internal RTF_Parser()
  756. {
  757. parser_state = ParserState.Neutral;
  758. control = new StringBuilder();
  759. number = new StringBuilder(12, 12);
  760. text = new StringBuilder();
  761. has_value = false;
  762. override_default_color = false;
  763. current_lang = 0;
  764. indirection_counter = 0;
  765. skip_space_counter = 0;
  766. ResetRunFormat();
  767. ResetParagraphFormat();
  768. }
  769. static RTF_Parser()
  770. {
  771. #if CROSSPLATFORM || COREWIN
  772. Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
  773. #endif
  774. }
  775. }
  776. }