RTF_Header.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Drawing;
  5. using System.Text;
  6. namespace FastReport.RichTextParser
  7. {
  8. /// <summary>
  9. /// This class represents a RTF document header.
  10. /// </summary>
  11. /// <remarks>
  12. /// Refer to RTF-1.7 spec for detail description
  13. /// </remarks>
  14. class RTF_Header
  15. {
  16. private RichDocument doc;
  17. private enum HeaderParserState
  18. {
  19. Starting,
  20. Signature,
  21. GlobalProperties,
  22. SubitemFirstLevel,
  23. FontTable,
  24. ColorTable,
  25. StyleSheet,
  26. DocInfoSection,
  27. SkipFormatExtension
  28. }
  29. private enum StyleState { Wait, Parse };
  30. private bool header_active;
  31. private int recursion_counter;
  32. private HeaderParserState tag_state;
  33. private StyleState style_state;
  34. private Dictionary<long, uint> font_ids;
  35. public RichDocument Document { get { return doc; } }
  36. public RTF_Header(RichDocument doc)
  37. {
  38. this.doc = doc;
  39. this.doc.font_list = new List<RFont>();
  40. this.doc.color_list = new List<Color>();
  41. this.doc.color_list.Add(Color.Black);
  42. this.doc.style_list = new List<Style>();
  43. font_ids = new Dictionary<long, uint>();
  44. tag_state = HeaderParserState.Starting;
  45. style_state = StyleState.Wait;
  46. }
  47. internal bool Header
  48. {
  49. get { return header_active; }
  50. set
  51. {
  52. if(value == false)
  53. {
  54. if (doc.color_list.Count == 0)
  55. doc.color_list.Add(Color.Black);
  56. // Most simple RTF document does not include fonts, so we need add at least one
  57. if (doc.font_list.Count == 0)
  58. {
  59. RFont f = new RFont();
  60. f.family = RFont.Family.Rroman;
  61. f.FontName = "Arial";
  62. doc.font_list.Add(f);
  63. }
  64. }
  65. header_active = value;
  66. }
  67. }
  68. internal uint GetFontID(long id)
  69. {
  70. uint result;
  71. if (font_ids.ContainsKey(id))
  72. result = font_ids[id];
  73. else
  74. result = 0;
  75. return result;
  76. }
  77. public void StartParseEmbeddedDocument()
  78. {
  79. Header = true;
  80. tag_state = HeaderParserState.GlobalProperties;
  81. }
  82. /// <summary>
  83. /// Parser of RTF header.
  84. /// </summary>
  85. /// <remarks>
  86. /// Return false on finish of header
  87. /// </remarks>
  88. public bool Parse(RTF_Parser parser)
  89. {
  90. if (parser.Status == ParserStatus.OpenBlock)
  91. ++recursion_counter;
  92. if (parser.Status == ParserStatus.CloseBlock)
  93. {
  94. if (recursion_counter == 0)
  95. return false;
  96. --recursion_counter;
  97. }
  98. switch (tag_state)
  99. {
  100. case HeaderParserState.Starting:
  101. if (parser.Status != ParserStatus.OpenBlock || parser.Text.Length != 0 || parser.Control.Length != 0)
  102. throw new DecoderFallbackException("Not a RichText format");
  103. tag_state = HeaderParserState.Signature;
  104. Header = true;
  105. break;
  106. case HeaderParserState.Signature:
  107. if (parser.Control != "rtf")
  108. throw new Exception("Document format error");
  109. if (parser.Number > 1)
  110. throw new Exception("Unsupported RTF format version");
  111. tag_state = HeaderParserState.GlobalProperties;
  112. break;
  113. case HeaderParserState.GlobalProperties:
  114. switch (parser.Control)
  115. {
  116. case "ansi":
  117. // Do nothing?
  118. break;
  119. case "ansicpg":
  120. doc.codepage = parser.Number;
  121. break;
  122. case "deff":
  123. doc.default_font = parser.Number;
  124. break;
  125. case "deflang":
  126. doc.default_lang = parser.Number;
  127. break;
  128. case "pard":
  129. case "cf":
  130. case "fs":
  131. Header = false;
  132. break;
  133. case "uc":
  134. // Saw this TAG in header of file created with "\abHTML to RTF .Net"
  135. break;
  136. }
  137. if (parser.Status == ParserStatus.OpenBlock)
  138. tag_state = HeaderParserState.SubitemFirstLevel;
  139. else if (parser.Status == ParserStatus.CloseBlock)
  140. Header = false;
  141. break;
  142. case HeaderParserState.SubitemFirstLevel:
  143. {
  144. switch (parser.Control)
  145. {
  146. case "fonttbl":
  147. if(parser.Status == ParserStatus.CloseBlock)
  148. {
  149. // Empty font table
  150. break;
  151. }
  152. tag_state = HeaderParserState.FontTable;
  153. font_state = FontInfoState.FontID;
  154. doc.font_list.Clear(); // Delete default font
  155. break;
  156. case "colortbl":
  157. tag_state = HeaderParserState.ColorTable;
  158. parser.override_default_color = parser.Delimiter != ' ';
  159. break;
  160. case "noqfpromote":
  161. // Ignore control word for moving info and stylesheet sections into header
  162. // how many such commands will be here? Who knows?
  163. break;
  164. case "info":
  165. tag_state = HeaderParserState.DocInfoSection;
  166. break;
  167. case "stylesheet":
  168. if (parser.Status == ParserStatus.CloseBlock)
  169. tag_state = HeaderParserState.SubitemFirstLevel;
  170. else
  171. {
  172. tag_state = HeaderParserState.StyleSheet;
  173. style = new Style();
  174. parser.ResetParagraphFormat();
  175. parser.ResetRunFormat();
  176. style_state = StyleState.Parse;
  177. }
  178. break;
  179. case "mmathPr":
  180. tag_state = HeaderParserState.SkipFormatExtension;
  181. break;
  182. case "":
  183. if (parser.Delimiter == '*')
  184. {
  185. tag_state = HeaderParserState.SkipFormatExtension;
  186. }
  187. break;
  188. default:
  189. Header = false;
  190. break;
  191. }
  192. }
  193. break;
  194. case HeaderParserState.FontTable:
  195. ParseFontTable(parser);
  196. break;
  197. case HeaderParserState.ColorTable:
  198. ParseColorTable(parser);
  199. break;
  200. case HeaderParserState.DocInfoSection:
  201. if (parser.Status == ParserStatus.CloseBlock)
  202. {
  203. if (recursion_counter == 1)
  204. tag_state = HeaderParserState.SubitemFirstLevel; ;
  205. }
  206. break;
  207. case HeaderParserState.StyleSheet:
  208. ParserStyleTable(parser);
  209. break;
  210. case HeaderParserState.SkipFormatExtension:
  211. if (parser.Status == ParserStatus.CloseBlock)
  212. {
  213. if (recursion_counter == 1)
  214. tag_state = HeaderParserState.SubitemFirstLevel;
  215. }
  216. break;
  217. }
  218. return Header;
  219. }
  220. public Style FindStyle(RTF_Parser parser, int styledef)
  221. {
  222. foreach (Style style in doc.style_list)
  223. {
  224. if (style.styledef == styledef)
  225. return style;
  226. }
  227. System.Diagnostics.Trace.WriteLine("RTF style not found. Create default style");
  228. Style new_style = new Style();
  229. new_style.styledef = styledef;
  230. new_style.stylename = "Autogenerated " + styledef.ToString();
  231. new_style.run_style = parser.current_run_format;
  232. new_style.paragraph_style = parser.current_paragraph_format;
  233. doc.style_list.Add(new_style);
  234. return new_style;
  235. }
  236. Style style;
  237. private void ParserStyleTable(RTF_Parser parser)
  238. {
  239. bool parsed = false;
  240. switch (style_state)
  241. {
  242. case StyleState.Wait:
  243. if (parser.Status == ParserStatus.OpenBlock)
  244. {
  245. style = new Style();
  246. parser.ResetParagraphFormat();
  247. parser.ResetRunFormat();
  248. style_state = StyleState.Parse;
  249. }
  250. if (parser.Status == ParserStatus.CloseBlock)
  251. {
  252. if (recursion_counter == 1)
  253. tag_state = HeaderParserState.SubitemFirstLevel; ;
  254. }
  255. break;
  256. case StyleState.Parse:
  257. // Parse style here
  258. switch (parser.Control)
  259. {
  260. case "s":
  261. style.styledef = (int)parser.Number;
  262. break;
  263. case "sbasedon":
  264. break;
  265. default:
  266. parsed = RTF_DocumentParser.ParseParagraphFormat(parser);
  267. if (parsed)
  268. break;
  269. parsed = RTF_DocumentParser.ParseRunFormat(parser, this);
  270. break;
  271. }
  272. if (parser.Status == ParserStatus.CloseBlock)
  273. {
  274. if (parser.Text.Length != 0)
  275. {
  276. style.stylename = parser.Text;
  277. }
  278. // Add new style here
  279. style.paragraph_style = parser.current_paragraph_format;
  280. style.run_style = parser.current_run_format;
  281. style_state = StyleState.Wait;
  282. doc.style_list.Add(style);
  283. if (recursion_counter == 1)
  284. tag_state = HeaderParserState.SubitemFirstLevel;
  285. break;
  286. }
  287. break;
  288. }
  289. }
  290. byte color_red;
  291. byte color_green;
  292. byte color_blue;
  293. void ParseColorTable(RTF_Parser parser)
  294. {
  295. if (parser.Status != ParserStatus.CloseBlock)
  296. {
  297. if (parser.Number > 255)
  298. throw new Exception("Color value out of range");
  299. byte cl = (byte)parser.Number;
  300. switch (parser.Control)
  301. {
  302. case "red":
  303. color_red = cl;
  304. break;
  305. case "green":
  306. color_green = cl;
  307. break;
  308. case "blue":
  309. color_blue = cl;
  310. break;
  311. }
  312. // if (parser.Delimiter == ';')
  313. if (parser.Delimiter != '\\')
  314. {
  315. Color c = Color.FromArgb(color_red, color_green, color_blue);
  316. if (parser.override_default_color)
  317. {
  318. doc.color_list[0] = c;
  319. }
  320. else
  321. {
  322. doc.color_list.Add(c);
  323. }
  324. parser.override_default_color = false;
  325. }
  326. }
  327. else
  328. {
  329. if (recursion_counter == 1)
  330. tag_state = HeaderParserState.SubitemFirstLevel; ;
  331. }
  332. }
  333. enum FontInfoState
  334. {
  335. FontID,
  336. CheckThemAll,
  337. }
  338. FontInfoState font_state;
  339. RFont font;
  340. internal bool ParseFontAttributes(RTF_Parser parser)
  341. {
  342. bool status = true;
  343. switch (parser.Control)
  344. {
  345. case "fnil":
  346. font.family = RFont.Family.Nil;
  347. break;
  348. case "froman":
  349. font.family = RFont.Family.Rroman;
  350. break;
  351. case "fswiss":
  352. font.family = RFont.Family.Swiss;
  353. break;
  354. case "fmodern":
  355. font.family = RFont.Family.Modern;
  356. break;
  357. case "fscript":
  358. font.family = RFont.Family.Script;
  359. break;
  360. case "fdecor":
  361. font.family = RFont.Family.Decor;
  362. break;
  363. case "ftech":
  364. font.family = RFont.Family.Tech;
  365. break;
  366. case "fbidi":
  367. font.family = RFont.Family.Bidi;
  368. break;
  369. case "fcharset":
  370. font.charset = parser.Number;
  371. break;
  372. default:
  373. // Here is many options are skipped off
  374. // it is better to collect them in debug mode
  375. status = false;
  376. break;
  377. }
  378. return status;
  379. }
  380. void ParseFontTable(RTF_Parser parser)
  381. {
  382. // Propagate exit
  383. if (recursion_counter == 1 && parser.Status == ParserStatus.CloseBlock)
  384. {
  385. tag_state = HeaderParserState.SubitemFirstLevel;
  386. return;
  387. }
  388. int back = recursion_counter;
  389. if (parser.Status == ParserStatus.CloseBlock)
  390. ++back;
  391. else if (parser.Status == ParserStatus.OpenBlock)
  392. --back;
  393. switch (font_state)
  394. {
  395. case FontInfoState.FontID:
  396. switch (parser.Control)
  397. {
  398. case "f":
  399. font = new RFont();
  400. font.font_id = (uint)doc.font_list.Count;
  401. if (!font_ids.ContainsKey(parser.Number))
  402. font_ids.Add(parser.Number, font.font_id);
  403. else
  404. Debug.WriteLine("Duplication of font_id: " + font.font_id.ToString());
  405. font_state = FontInfoState.CheckThemAll;
  406. break;
  407. case "flomajor":
  408. case "fhimajor":
  409. case "fdbmajor":
  410. case "fbimajor":
  411. case "flominor":
  412. case "fhiminor":
  413. case "fdbminor":
  414. case "fbiminor":
  415. // We just ignore these extensions of modern versions of MS Word
  416. break;
  417. default:
  418. if (parser.Status != ParserStatus.OpenBlock)
  419. throw new Exception("RTF unknown font tag");
  420. break;
  421. }
  422. break;
  423. case FontInfoState.CheckThemAll:
  424. if (back == 3)
  425. {
  426. if (parser.Status != ParserStatus.CloseBlock)
  427. {
  428. ParseFontAttributes(parser);
  429. }
  430. else
  431. {
  432. string str = parser.Text;
  433. int len = str.Length;
  434. if (len > 0)
  435. {
  436. if (str[len - 1] == ';')
  437. font.FontName = str.TrimEnd(';');
  438. doc.font_list.Add(font);
  439. font_state = FontInfoState.FontID;
  440. }
  441. else
  442. throw new Exception("RTF malformed font name");
  443. }
  444. }
  445. break;
  446. }
  447. return;
  448. }
  449. }
  450. }