RTF_Header.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Drawing;
  5. using System.Text;
  6. namespace FastReport.RichTextParser
  7. {
  8. /// <summary>
  9. /// This class represents a RTF document header.
  10. /// </summary>
  11. /// <remarks>
  12. /// Refer to RTF-1.7 spec for detail description
  13. /// </remarks>
  14. class RTF_Header
  15. {
  16. private RichDocument doc;
  17. private enum HeaderParserState
  18. {
  19. Starting,
  20. Signature,
  21. GlobalProperties,
  22. SubitemFirstLevel,
  23. FontTable,
  24. ColorTable,
  25. StyleSheet,
  26. DocInfoSection,
  27. SkipFormatExtension
  28. }
  29. private enum StyleState { Wait, Parse };
  30. private bool header_active;
  31. private int recursion_counter;
  32. private HeaderParserState tag_state;
  33. private StyleState style_state;
  34. private Dictionary<long, uint> font_ids;
  35. public RichDocument Document { get { return doc; } }
  36. public RTF_Header(RichDocument doc)
  37. {
  38. this.doc = doc;
  39. this.doc.font_list = new List<RFont>();
  40. this.doc.color_list = new List<Color>();
  41. this.doc.color_list.Add(Color.Black);
  42. this.doc.style_list = new List<Style>();
  43. font_ids = new Dictionary<long, uint>();
  44. tag_state = HeaderParserState.Starting;
  45. style_state = StyleState.Wait;
  46. }
  47. internal bool Header
  48. {
  49. get { return header_active; }
  50. set
  51. {
  52. if (value == false)
  53. {
  54. if (doc.color_list.Count == 0)
  55. doc.color_list.Add(Color.Black);
  56. // Most simple RTF document does not include fonts, so we need add at least one
  57. if (doc.font_list.Count == 0)
  58. {
  59. RFont f = new RFont();
  60. f.family = RFont.Family.Rroman;
  61. f.FontName = "Arial";
  62. doc.font_list.Add(f);
  63. }
  64. }
  65. header_active = value;
  66. }
  67. }
  68. internal uint GetFontID(long id)
  69. {
  70. uint result;
  71. if (font_ids.ContainsKey(id))
  72. result = font_ids[id];
  73. else
  74. result = 0;
  75. return result;
  76. }
  77. public void StartParseEmbeddedDocument()
  78. {
  79. Header = true;
  80. tag_state = HeaderParserState.GlobalProperties;
  81. }
  82. /// <summary>
  83. /// Parser of RTF header.
  84. /// </summary>
  85. /// <remarks>
  86. /// Return false on finish of header
  87. /// </remarks>
  88. public bool Parse(RTF_Parser parser)
  89. {
  90. if (parser.Status == ParserStatus.OpenBlock)
  91. ++recursion_counter;
  92. if (parser.Status == ParserStatus.CloseBlock)
  93. {
  94. if (recursion_counter == 0)
  95. return false;
  96. --recursion_counter;
  97. }
  98. switch (tag_state)
  99. {
  100. case HeaderParserState.Starting:
  101. if (parser.Status != ParserStatus.OpenBlock || parser.Text.Length != 0 || parser.Control.Length != 0)
  102. throw new DecoderFallbackException("Not a RichText format");
  103. tag_state = HeaderParserState.Signature;
  104. Header = true;
  105. break;
  106. case HeaderParserState.Signature:
  107. if (parser.Control != "rtf")
  108. throw new Exception("Document format error");
  109. if (parser.Number > 1)
  110. throw new Exception("Unsupported RTF format version");
  111. tag_state = HeaderParserState.GlobalProperties;
  112. break;
  113. case HeaderParserState.GlobalProperties:
  114. switch (parser.Control)
  115. {
  116. case "ansi":
  117. // Do nothing?
  118. break;
  119. case "ansicpg":
  120. doc.codepage = parser.Number;
  121. break;
  122. case "deff":
  123. doc.default_font = parser.Number;
  124. break;
  125. case "deflang":
  126. doc.default_lang = parser.Number;
  127. break;
  128. case "deftab":
  129. doc.default_tab_width = parser.Number;
  130. break;
  131. case "pard":
  132. case "cf":
  133. case "fs":
  134. Header = false;
  135. break;
  136. case "uc":
  137. // Saw this TAG in header of file created with "\abHTML to RTF .Net"
  138. break;
  139. }
  140. if (parser.Status == ParserStatus.OpenBlock)
  141. tag_state = HeaderParserState.SubitemFirstLevel;
  142. else if (parser.Status == ParserStatus.CloseBlock)
  143. Header = false;
  144. break;
  145. case HeaderParserState.SubitemFirstLevel:
  146. {
  147. switch (parser.Control)
  148. {
  149. case "fonttbl":
  150. if (parser.Status == ParserStatus.CloseBlock)
  151. {
  152. // Empty font table
  153. break;
  154. }
  155. tag_state = HeaderParserState.FontTable;
  156. font_state = FontInfoState.FontID;
  157. doc.font_list.Clear(); // Delete default font
  158. break;
  159. case "colortbl":
  160. tag_state = HeaderParserState.ColorTable;
  161. if (parser.Delimiter == ' ')
  162. break;
  163. parser.override_default_color = parser.Delimiter != ';';
  164. break;
  165. case "noqfpromote":
  166. // Ignore control word for moving info and stylesheet sections into header
  167. // how many such commands will be here? Who knows?
  168. break;
  169. case "info":
  170. tag_state = HeaderParserState.DocInfoSection;
  171. break;
  172. case "stylesheet":
  173. if (parser.Status == ParserStatus.CloseBlock)
  174. tag_state = HeaderParserState.SubitemFirstLevel;
  175. else
  176. {
  177. tag_state = HeaderParserState.StyleSheet;
  178. style = new Style();
  179. parser.ResetParagraphFormat();
  180. parser.ResetRunFormat();
  181. style_state = StyleState.Parse;
  182. }
  183. break;
  184. case "mmathPr":
  185. tag_state = HeaderParserState.SkipFormatExtension;
  186. break;
  187. case "":
  188. if (parser.Delimiter == '*')
  189. {
  190. tag_state = HeaderParserState.SkipFormatExtension;
  191. }
  192. break;
  193. default:
  194. Header = false;
  195. break;
  196. }
  197. }
  198. break;
  199. case HeaderParserState.FontTable:
  200. ParseFontTable(parser);
  201. break;
  202. case HeaderParserState.ColorTable:
  203. ParseColorTable(parser);
  204. break;
  205. case HeaderParserState.DocInfoSection:
  206. if (parser.Status == ParserStatus.CloseBlock)
  207. {
  208. if (recursion_counter == 1)
  209. tag_state = HeaderParserState.SubitemFirstLevel; ;
  210. }
  211. break;
  212. case HeaderParserState.StyleSheet:
  213. ParserStyleTable(parser);
  214. break;
  215. case HeaderParserState.SkipFormatExtension:
  216. if (parser.Status == ParserStatus.CloseBlock)
  217. {
  218. if (recursion_counter == 1)
  219. tag_state = HeaderParserState.SubitemFirstLevel;
  220. }
  221. break;
  222. }
  223. return Header;
  224. }
  225. public Style FindStyle(RTF_Parser parser, int styledef)
  226. {
  227. foreach (Style style in doc.style_list)
  228. {
  229. if (style.styledef == styledef)
  230. return style;
  231. }
  232. System.Diagnostics.Trace.WriteLine("RTF style not found. Create default style");
  233. Style new_style = new Style();
  234. new_style.styledef = styledef;
  235. new_style.stylename = "Autogenerated " + styledef.ToString();
  236. new_style.run_style = parser.current_run_format;
  237. new_style.paragraph_style = parser.current_paragraph_format;
  238. doc.style_list.Add(new_style);
  239. return new_style;
  240. }
  241. Style style;
  242. private void ParserStyleTable(RTF_Parser parser)
  243. {
  244. bool parsed = false;
  245. switch (style_state)
  246. {
  247. case StyleState.Wait:
  248. if (parser.Status == ParserStatus.OpenBlock)
  249. {
  250. style = new Style();
  251. parser.ResetParagraphFormat();
  252. parser.ResetRunFormat();
  253. //need to set 0 in order to understand whether the style changes the font size
  254. parser.current_run_format.font_size = 0;
  255. style_state = StyleState.Parse;
  256. }
  257. if (parser.Status == ParserStatus.CloseBlock)
  258. {
  259. if (recursion_counter == 1)
  260. tag_state = HeaderParserState.SubitemFirstLevel;
  261. }
  262. break;
  263. case StyleState.Parse:
  264. // Parse style here
  265. switch (parser.Control)
  266. {
  267. case "s":
  268. style.styledef = (int)parser.Number;
  269. break;
  270. case "sbasedon":
  271. break;
  272. default:
  273. parsed = RTF_DocumentParser.ParseParagraphFormat(parser);
  274. if (parsed)
  275. break;
  276. parsed = RTF_DocumentParser.ParseRunFormat(parser, this);
  277. break;
  278. }
  279. if (parser.Status == ParserStatus.CloseBlock)
  280. {
  281. if (parser.Text.Length != 0)
  282. {
  283. style.stylename = parser.Text;
  284. }
  285. // Add new style here
  286. style.paragraph_style = parser.current_paragraph_format;
  287. style.run_style = parser.current_run_format;
  288. style_state = StyleState.Wait;
  289. doc.style_list.Add(style);
  290. if (recursion_counter == 1)
  291. tag_state = HeaderParserState.SubitemFirstLevel;
  292. break;
  293. }
  294. break;
  295. }
  296. }
  297. byte color_red;
  298. byte color_green;
  299. byte color_blue;
  300. void ParseColorTable(RTF_Parser parser)
  301. {
  302. if (parser.Status != ParserStatus.CloseBlock)
  303. {
  304. if (parser.Number > 255)
  305. throw new Exception("Color value out of range");
  306. byte cl = (byte)parser.Number;
  307. switch (parser.Control)
  308. {
  309. case "red":
  310. color_red = cl;
  311. break;
  312. case "green":
  313. color_green = cl;
  314. break;
  315. case "blue":
  316. color_blue = cl;
  317. break;
  318. }
  319. //if (parser.Delimiter == ';')
  320. if (parser.Delimiter != '\\')
  321. {
  322. Color c = Color.FromArgb(color_red, color_green, color_blue);
  323. if (parser.override_default_color)
  324. {
  325. doc.color_list[0] = c;
  326. }
  327. else
  328. {
  329. doc.color_list.Add(c);
  330. }
  331. parser.override_default_color = false;
  332. }
  333. }
  334. else
  335. {
  336. if (recursion_counter == 1)
  337. tag_state = HeaderParserState.SubitemFirstLevel; ;
  338. }
  339. }
  340. enum FontInfoState
  341. {
  342. FontID,
  343. CheckThemAll,
  344. }
  345. FontInfoState font_state;
  346. RFont font;
  347. internal bool ParseFontAttributes(RTF_Parser parser)
  348. {
  349. bool status = true;
  350. switch (parser.Control)
  351. {
  352. case "fnil":
  353. font.family = RFont.Family.Nil;
  354. break;
  355. case "froman":
  356. font.family = RFont.Family.Rroman;
  357. break;
  358. case "fswiss":
  359. font.family = RFont.Family.Swiss;
  360. break;
  361. case "fmodern":
  362. font.family = RFont.Family.Modern;
  363. break;
  364. case "fscript":
  365. font.family = RFont.Family.Script;
  366. break;
  367. case "fdecor":
  368. font.family = RFont.Family.Decor;
  369. break;
  370. case "ftech":
  371. font.family = RFont.Family.Tech;
  372. break;
  373. case "fbidi":
  374. font.family = RFont.Family.Bidi;
  375. break;
  376. case "fcharset":
  377. font.charset = parser.Number;
  378. break;
  379. default:
  380. // Here is many options are skipped off
  381. // it is better to collect them in debug mode
  382. status = false;
  383. break;
  384. }
  385. return status;
  386. }
  387. void ParseFontTable(RTF_Parser parser)
  388. {
  389. // Propagate exit
  390. if (recursion_counter == 1 && parser.Status == ParserStatus.CloseBlock)
  391. {
  392. tag_state = HeaderParserState.SubitemFirstLevel;
  393. return;
  394. }
  395. int back = recursion_counter;
  396. if (parser.Status == ParserStatus.CloseBlock)
  397. ++back;
  398. else if (parser.Status == ParserStatus.OpenBlock)
  399. --back;
  400. switch (font_state)
  401. {
  402. case FontInfoState.FontID:
  403. switch (parser.Control)
  404. {
  405. case "f":
  406. font = new RFont();
  407. font.font_id = (uint)doc.font_list.Count;
  408. if (!font_ids.ContainsKey(parser.Number))
  409. font_ids.Add(parser.Number, font.font_id);
  410. else
  411. Debug.WriteLine("Duplication of font_id: " + font.font_id.ToString());
  412. font_state = FontInfoState.CheckThemAll;
  413. break;
  414. case "flomajor":
  415. case "fhimajor":
  416. case "fdbmajor":
  417. case "fbimajor":
  418. case "flominor":
  419. case "fhiminor":
  420. case "fdbminor":
  421. case "fbiminor":
  422. // We just ignore these extensions of modern versions of MS Word
  423. break;
  424. default:
  425. if (parser.Status != ParserStatus.OpenBlock)
  426. throw new Exception("RTF unknown font tag");
  427. break;
  428. }
  429. break;
  430. case FontInfoState.CheckThemAll:
  431. if (back == 3)
  432. {
  433. if (parser.Status != ParserStatus.CloseBlock)
  434. {
  435. ParseFontAttributes(parser);
  436. }
  437. else
  438. {
  439. string str = parser.Text;
  440. int len = str.Length;
  441. if (len > 0)
  442. {
  443. if (str[len - 1] == ';')
  444. font.FontName = str.TrimEnd(';');
  445. doc.font_list.Add(font);
  446. font_state = FontInfoState.FontID;
  447. }
  448. else
  449. throw new Exception("RTF malformed font name");
  450. }
  451. }
  452. break;
  453. }
  454. return;
  455. }
  456. }
  457. }