using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Text;
namespace FastReport.RichTextParser
{
internal static class ExportUtils
{
public static void Write(Stream stream, string value)
{
byte[] buf = Encoding.UTF8.GetBytes(value);
stream.Write(buf, 0, buf.Length);
}
public static void WriteLn(Stream stream, string value)
{
Write(stream, value);
stream.WriteByte(13);
stream.WriteByte(10);
}
}
///
/// This class represents a RTF run.
///
internal class RTF_Run
{
private readonly Run run;
internal RTF_Run(RunFormat f, string text)
{
run = new Run(text, f);
}
}
internal abstract class RTF_CommonRichElement
{
static readonly int DpiX = 96;
protected static int Twips2Pixels(int twips)
{
return (int)(((double)twips) * (1.0 / 1440.0) * DpiX);
}
protected static int Twips2Pixels(long twips)
{
return (int)(((double)twips) * (1.0 / 1440.0) * DpiX);
}
internal abstract RichObject RichObject { get; }
internal abstract bool Parse(RTF_Parser parser, RTF_Header header);
}
internal class RTF_SequenceParser
{
private RichObjectSequence sequence;
private RTF_Picture picture_parser = null;
private RTF_Paragraph paragraph_parser = null;
private RTF_Column curr_column = new RTF_Column();
private RTF_Row current_row = null;
private List cells_queue;
RichObjectSequence cell_sequence;
private Table table;
private bool new_table = false;
public RichObjectSequence Sequence { get { return sequence; } }
public Table CurrentTable { get { return table; } }
internal RTF_SequenceParser()
{
sequence.objects = new List();
cells_queue = new List();
cell_sequence.objects = new List();
}
///
/// Insert paragraph into list of paragraphs
///
private void InsertParagraph(RTF_Parser parser)
{
paragraph_parser.Fix(parser);
if (!parser.insideTable)
{
sequence.objects.Add(paragraph_parser.RichObject);
sequence.size += paragraph_parser.RichObject.size;
}
else
{
cell_sequence.objects.Add(paragraph_parser.RichObject);
}
paragraph_parser = new RTF_Paragraph(parser);
}
private void InsertCell(RTF_Parser parser)
{
Fix(parser);
cells_queue.Add(cell_sequence);
cell_sequence = new RichObjectSequence();
cell_sequence.objects = new List();
paragraph_parser = new RTF_Paragraph(parser);
}
///
/// Insert row into list of paragraphs
///
private void InsertRow()
{
// Move parsed cells to current row
foreach (RichObjectSequence sequence in cells_queue)
{
current_row.AddCell(sequence);
}
cells_queue = new List();
table.rows.Add(current_row.Row);
current_row = new RTF_Row(this);
if (new_table)
{
InsertTable();
new_table = false;
}
}
private void InsertTable()
{
RichObject rich = new RichObject();
rich.type = RichObject.Type.Table;
rich.table = table;
sequence.objects.Add(rich);
sequence.size += rich.size;
}
private void CreateTable()
{
new_table = true;
table.columns = new List();
table.rows = new List();
current_row = new RTF_Row(this);
}
internal void Fix(RTF_Parser parser)
{
if (paragraph_parser != null)
{
paragraph_parser.Fix(parser);
if (parser.insideTable)
cell_sequence.objects.Add(paragraph_parser.RichObject);
/* Following code adds empty paragraph at end of page, so disable it now */
else if (paragraph_parser.RichObject.type == RichObject.Type.Paragraph)
{
if (paragraph_parser.RichObject.pargraph.runs.Count != 0)
{
sequence.objects.Add(paragraph_parser.RichObject);
sequence.size += paragraph_parser.RichObject.size;
}
}
}
if (new_table)
{
InsertTable();
new_table = false;
}
return;
}
internal bool Parse(RTF_Parser parser, RTF_Header header)
{
bool parsed = true;
if (picture_parser != null)
{
parsed = picture_parser.Parse(parser, header);
if (parsed)
return true;
// 20210211: check if picture within paragraph
if(paragraph_parser.Runs.Count > 0)
{
sequence.objects.Add(paragraph_parser.RichObject);
paragraph_parser = new RTF_Paragraph(parser);
}
if (parser.insideTable)
cell_sequence.objects.Add(picture_parser.RichObject);
else
{
sequence.objects.Add(picture_parser.RichObject);
sequence.size += picture_parser.RichObject.size;
}
picture_parser = null;
return true;
}
if (paragraph_parser == null)
paragraph_parser = new RTF_Paragraph(parser);
parsed = paragraph_parser.Parse(parser, header);
if (parsed)
return true;
parsed = curr_column.Parse(parser, header);
if (parsed)
return true;
if (current_row != null)
{
parsed = current_row.Parse(parser, header);
if (parsed)
return true;
}
parsed = true;
switch (parser.Control)
{
case "par":
InsertParagraph(parser);
parser.ListItem = false; // Disable list
break;
case "cellx":
uint w = (uint)parser.Number;
curr_column.SetWidth(w);
table.columns.Add(curr_column.Column);
curr_column = new RTF_Column();
break;
case "cell":
InsertCell(parser);
break;
case "row":
InsertRow();
break;
case "trowd":
CreateTable();
break;
case "pict":
picture_parser = new RTF_Picture();
break;
default:
////if(parser.Status == ParserStatus.CloseBlock)
////{
//// InsertParagraph(parser);
//// break;
////}
parsed = false;
break;
}
return parsed;
}
}
///
/// This class represents a RTF properies.
///
class RTF_PageParser
{
private Page page;
RichDocument document;
RTF_SequenceParser sequence_parser = new RTF_SequenceParser();
private RTF_SequenceParser page_header = null;
private RTF_SequenceParser page_footer = null;
public Page Page { get { return page; } }
public RTF_PageParser(bool soft, RichDocument document)
{
page.soft_break = soft;
page.margin_top = 0;
page.margin_left = 0;
page.margin_right = 0;
page.margin_bottom = 0;
page.sequence.objects = new List();
this.document = document;
}
internal void Fix(RTF_Parser parser)
{
sequence_parser.Fix(parser);
page.sequence = sequence_parser.Sequence;
page.size = page.sequence.size;
}
static int indirection_count = 0;
internal bool Parse(RTF_Parser parser, RTF_Header header)
{
bool parsed = false;
if (page_header != null)
{
parsed = page_header.Parse(parser, header);
if (!parsed)
{
//this.page_header.AddString(parser, "\\" + parser.Control);
}
if (parser.Status == ParserStatus.CloseBlock)
{
indirection_count--;
if (indirection_count < 0)
{
indirection_count = 0;
if (Page.header.objects == null)
this.page.header.objects = new List();
foreach (RichObject o in page_header.Sequence.objects)
{
this.Page.header.objects.Add(o);
}
page_header = null;
return false;
}
}
else if (parser.Status == ParserStatus.OpenBlock)
{
indirection_count++;
}
return true;
}
if (page_footer != null)
{
parsed = page_footer.Parse(parser, header);
if (!parsed)
{
//this.page_footer.AddString(parser, "\\" + parser.Control);
}
if (parser.Status == ParserStatus.CloseBlock)
{
indirection_count--;
if (indirection_count < 0)
{
indirection_count = 0;
if (Page.footer.objects == null)
this.page.footer.objects = new List();
foreach (RichObject o in page_footer.Sequence.objects)
{
this.Page.footer.objects.Add(o);
}
page_footer = null;
return false;
}
}
else if (parser.Status == ParserStatus.OpenBlock)
{
indirection_count++;
}
return true;
}
parsed = sequence_parser.Parse(parser, header);
if (!parsed)
{
parsed = true;
switch (parser.Control)
{
case "pgwsxn":
page.page_width = parser.Number;
break;
case "pghsxn":
page.page_heigh = parser.Number;
break;
case "marglsxn":
page.margin_left = parser.Number;
break;
case "margrsxn":
page.margin_right = parser.Number;
break;
case "margtsxn":
page.margin_top = parser.Number;
break;
case "margbsxn":
page.margin_bottom = parser.Number;
break;
case "headerr":
case "header":
page_header = new RTF_SequenceParser();
break;
case "footer":
case "footerr":
page_footer = new RTF_SequenceParser();
break;
default:
parsed = false;
break;
}
}
return parsed;
}
}
class RTF_BorderLine_Parser
{
internal BorderLine line;
internal void Clear()
{
line.style = BorderLine.Style.Thin;
line.width = 0;
line.color = System.Drawing.Color.Black;
}
internal bool Parse(RTF_Parser parser, RTF_Header header)
{
bool parsed = true;
switch (parser.Control)
{
case "brdrs":
line.style = BorderLine.Style.Thin;
break;
case "brdrth":
line.style = BorderLine.Style.Thick;
break;
case "brdrdb":
line.style = BorderLine.Style.Double;
break;
case "brdrdot":
line.style = BorderLine.Style.Dotted;
break;
case "brdrw":
line.width = (uint)parser.Number;
break;
case "brdrcf":
{
int cidx = (int)parser.Number;
line.color = header.Document.color_list[cidx];
break;
}
default:
parsed = false;
break;
}
return parsed;
}
}
///
/// This class parses an entiry RTF document.
///
public class RTF_DocumentParser : IDisposable
{
private RichDocument doc;
int nested_block_count;
bool skip_rtf_extension;
enum GlobalMode { Header, Document }
private RTF_PageParser curr_page;
private RTF_Header header_parser;
private Stack run_formats_stack;
private Stack parahraph_format_stack; // Do we need keep track of paragraphs format?
public RichDocument Document { get { return doc; } }
#if false
Dictionary format_hash;
internal RTF_RunFormat FindFormat(RTF_RunFormat key)
{
if (!format_hash.ContainsKey(key))
return null;
return format_hash[key];
}
#endif
internal static bool ParseParagraphFormat(RTF_Parser parser)
{
bool status = true;
switch (parser.Control)
{
case "clvertalt":
parser.current_paragraph_format.Valign = ParagraphFormat.VerticalAlign.Top;
break;
case "clvertalc":
parser.current_paragraph_format.Valign = ParagraphFormat.VerticalAlign.Center;
break;
case "clvertalb":
parser.current_paragraph_format.Valign = ParagraphFormat.VerticalAlign.Bottom;
break;
default:
parser.current_paragraph_format.Valign = ParagraphFormat.VerticalAlign.Top; // 20210722
break;
}
switch (parser.Control)
{
case "qc":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Centered;
break;
case "ql":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Left;
break;
case "qr":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Right;
break;
case "qj":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Justified;
break;
case "qd":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Distributed;
break;
case "qk":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Kashida;
break;
case "qt":
parser.current_paragraph_format.align = ParagraphFormat.HorizontalAlign.Thai;
break;
case "sl":
parser.current_paragraph_format.line_spacing = (int)parser.Number;
break;
case "sb":
parser.current_paragraph_format.space_before = (int)parser.Number;
break;
case "sa":
parser.current_paragraph_format.space_after = (int)parser.Number;
break;
case "li":
parser.current_paragraph_format.left_indent = (int)parser.Number;
break;
case "ri":
parser.current_paragraph_format.right_indent = (int)parser.Number;
break;
case "fi":
parser.current_paragraph_format.first_line_indent = (int)parser.Number;
break;
case "slmult":
parser.current_paragraph_format.lnspcmult = (ParagraphFormat.LnSpcMult)parser.Number;
break;
case "pntext":
parser.ResetRunFormat();
parser.current_paragraph_format.list_id = new List();
parser.current_paragraph_format.pnstart = 1; // No support of nested numbering in this version
parser.ListItem = true;
break;
case "ltrpar":
parser.current_paragraph_format.text_direction = ParagraphFormat.Direction.LeftToRight;
break;
case "rtlpar":
parser.current_paragraph_format.text_direction = ParagraphFormat.Direction.RighgToLeft;
break;
case "tx":
if (parser.current_paragraph_format.tab_positions == null)
parser.current_paragraph_format.tab_positions = new List();
parser.current_paragraph_format.tab_positions.Add((int)parser.Number);
break;
case "pntxta":
parser.ClearParsedText();
break;
case "pntxtb":
parser.ClearParsedText();
break;
default:
status = false;
break;
}
return status;
}
internal static bool ParseRunFormat(RTF_Parser parser, RTF_Header header)
{
bool accepted = true;
int cidx;
switch (parser.Control)
{
case "b":
parser.current_run_format.bold = parser.HasValue ? (parser.Number == 0 ? false : true) : true;
break;
case "i":
parser.current_run_format.italic = parser.HasValue ? (parser.Number == 0 ? false : true) : true;
break;
case "cf":
cidx = (int) parser.Number;
if(cidx > 0)
{
if (cidx > header.Document.color_list.Count - 1)
cidx = header.Document.color_list.Count - 1;
}
parser.current_run_format.color = header.Document.color_list[cidx];
break;
case "highlight":
cidx = (int)parser.Number;
if (cidx != 0)
parser.current_run_format.BColor = header.Document.color_list[cidx];
else
parser.current_run_format.BColor = Color.White;
break;
case "cbpat":
cidx = (int)parser.Number;
parser.current_run_format.FillColor = header.Document.color_list[cidx];
break;
case "ul":
parser.current_run_format.underline = parser.HasValue ? (parser.Number == 0 ? false : true) : true;
break;
case "f":
uint idx = header.GetFontID(parser.Number);
parser.current_run_format.font_idx = idx;
RFont rf = header.Document.font_list[(int)idx];
parser.font_charset = rf.charset;
if(rf.charset != 0)
{
parser.SelectCodepageByFontCharset(rf.charset);
}
break;
case "fs":
parser.current_run_format.font_size = (int)parser.Number;
break;
case "ulnone":
parser.current_run_format.underline = false;
break;
case "plain":
parser.ResetRunFormat();
break;
case "up":
if (parser.Number == 0)
parser.current_run_format.script_type = RunFormat.ScriptType.PlainText;
else
parser.current_run_format.script_type = RunFormat.ScriptType.Superscript;
break;
case "dn":
if (parser.Number == 0)
parser.current_run_format.script_type = RunFormat.ScriptType.PlainText;
else
parser.current_run_format.script_type = RunFormat.ScriptType.Subscript;
break;
case "super":
parser.current_run_format.script_type = RunFormat.ScriptType.Superscript;
break;
case "sub":
parser.current_run_format.script_type = RunFormat.ScriptType.Subscript;
break;
case "nosupersub":
parser.current_run_format.script_type = RunFormat.ScriptType.PlainText;
break;
case "strike":
parser.current_run_format.strike = parser.HasValue ? (parser.Number == 0 ? false : true) : true;
break;
default:
accepted = false;
break;
}
return accepted;
}
///
public void Load(byte[] bytes)
{
using (MemoryStream stream = new MemoryStream(bytes))
Load(stream);
}
///
public Color GetFillColor()
{
Color color = Color.White;
foreach(RichObject obj in curr_page.Page.sequence.objects)
{
if (obj.type == RichObject.Type.Paragraph )
{
if(obj.pargraph.runs.Count > 0)
{
color = obj.pargraph.runs[0].format.FillColor;
break;
}
}
}
return color;
}
///
public void Load(string rich_text)
{
// System.Diagnostics.Trace.WriteLine(rich_text);
MemoryStream stream = new MemoryStream();
StreamWriter writer = new StreamWriter(stream);
writer.Write(rich_text);
writer.Flush();
stream.Position = 0;
Load(stream);
}
///
public void Load(Stream stream)
{
GlobalMode mode;
ParserStatus status = ParserStatus.Collecting;
int ch;
RTF_Parser parser = new RTF_Parser();
int block_sychro = 0;
header_parser = new RTF_Header(doc);
mode = GlobalMode.Header;
while (true)
{
ch = stream.ReadByte();
if (ch == -1)
{
if (status == ParserStatus.Collecting && parser.EndOfFile)
{
header_parser.Header = false;
doc = header_parser.Document;
mode = GlobalMode.Document;
Parse(parser);
}
break;
}
status = parser.ParseByte((char)ch);
if (status == ParserStatus.Collecting)
continue;
if (status == ParserStatus.OpenBlock)
{
run_formats_stack.Push(parser.current_run_format);
++block_sychro;
}
//if(parser.Control == "tx")
//{
// System.Diagnostics.Trace.Write("tx");
//}
switch (mode)
{
case GlobalMode.Header:
if (header_parser.Parse(parser) == true)
break;
doc = header_parser.Document;
mode = GlobalMode.Document;
Parse(parser);
break;
case GlobalMode.Document:
if(parser.Control == "rtf")
{
mode = GlobalMode.Header;
header_parser.StartParseEmbeddedDocument();
break;
}
Parse(parser);
break;
}
if (status == ParserStatus.CloseBlock)
{
--block_sychro;
parser.ListItem = false; // Disable list
if (block_sychro < 0)
throw new Exception("Document structure error");
parser.current_run_format = run_formats_stack.Pop();
if (block_sychro == 0)
{
if (mode == GlobalMode.Header)
Parse(parser);
break;
}
}
}
AddPage(parser);
}
internal void Parse(RTF_Parser parser)
{
bool parsed = false;
#if false // debug
if (parser.Status == ParserStatus.OpenBlock)
{
string dbg = String.Format("{{{0}", formats_stack.Count);
System.Diagnostics.Trace.Write(dbg);
}
else if (parser.Status == ParserStatus.CloseBlock)
{
System.Diagnostics.Trace.Write(@"}");
}
//if (parser.Control == "cell")
// System.Diagnostics.Trace.WriteLine(@"Cell is not parsed yet");
#endif
if (skip_rtf_extension)
{
switch (parser.Control)
{
case "fldinst":
break;
}
if (parser.Status == ParserStatus.OpenBlock)
++nested_block_count;
else if (parser.Status == ParserStatus.CloseBlock)
{
if (nested_block_count == 0)
throw new Exception("Document structure error");
--nested_block_count;
if (nested_block_count == 0)
skip_rtf_extension = false;
}
return;
}
parsed = curr_page.Parse(parser, this.header_parser);
if (!parsed)
switch (parser.Control)
{
case "page":
AddPage(parser);
curr_page = new RTF_PageParser(false, doc);
break;
case "softpage":
doc.pages.Add(curr_page.Page);
curr_page = new RTF_PageParser(true, doc);
break;
case "paperw":
doc.paper_width = parser.Number;
break;
case "paperh":
doc.paper_height = parser.Number;
break;
case "margl":
doc.global_margin_left = parser.Number;
break;
case "margt":
doc.global_margin_top = parser.Number;
break;
case "margr":
doc.global_margin_right = parser.Number;
break;
case "margb":
doc.global_margin_bottom = parser.Number;
break;
case "deftab":
doc.default_tab_width = parser.Number;
break;
case "viewkind":
doc.view_kind = parser.Number;
break;
case "shp":
//// Shape must be parsed in another place
//skip_rtf_extension = true;
break;
case "ftnsep":
case "ftnsepc":
case "aftnsep":
case "aftnsepc":
skip_rtf_extension = true;
nested_block_count = 1; // Not sure
break;
case "":
if (parser.Delimiter == '*')
{
// Preivous version which ignore pictures in \*\shppict tag (20210211)
// Just ignore delimiter
}
break;
default:
;
break;
}
}
void AddPage(RTF_Parser parser)
{
curr_page.Fix(parser);
Page pg = curr_page.Page;
doc.pages.Add(pg);
long sz = pg.sequence.size;
doc.size += sz;
}
public void Dispose()
{
header_parser = null;
}
///
/// Get RTF structure based on range of elements
///
public RichDocument GetRange(int Start, int Length)
{
RichDocument ranged_doc = new RichDocument();
long position = 0;
long finish = Start + Length;
ranged_doc.pages = new List();
foreach (Page page in doc.pages)
{
if (Start > position)
{
position += page.size;
if (Start > position)
continue;
position -= page.size;
}
Page ranged_page = new Page();
ranged_page.sequence.objects = new List();
foreach (RichObject sequence in page.sequence.objects)
{
position += sequence.size;
if (Start > position)
continue;
position -= sequence.size;
RichObject ranged_object = new RichObject();
switch (sequence.type)
{
case RichObject.Type.Paragraph:
{
ranged_object.type = RichObject.Type.Paragraph;
ranged_object.pargraph = new Paragraph();
ranged_object.pargraph.runs = new List();
Paragraph par = sequence.pargraph;
ranged_object.pargraph.format = par.format;
foreach (Run run in par.runs)
{
position += run.text.Length;
if (Start > position)
continue;
position -= run.text.Length;
// Here is it
string run_text;
if (Start == position)
run_text = run.text;
else
{
int diff = (int)(Start - position);
run_text = run.text.Substring(diff);
}
if (Length < run_text.Length)
run_text = run_text.Substring(0, Length);
Run ranged_run = new Run(run_text, run.format);
ranged_object.pargraph.runs.Add(ranged_run);
Length -= run_text.Length;
if (Length == 0)
break;
}
}
break;
case RichObject.Type.Table:
{
position += sequence.size;
if (Start > position)
continue;
position -= sequence.size;
// TODO: split table
if (Length < sequence.size)
Length = 0; // and this too
}
break;
case RichObject.Type.Picture:
{
position += sequence.size;
if (Start > position)
continue;
position -= sequence.size;
// TODO: split picture?
if (Length < sequence.size)
Length = 0; // and this too
}
break;
}
ranged_page.sequence.objects.Add(ranged_object);
if (Length == 0)
break;
if (Length < 0)
throw new Exception("Negative length in RTF_DocumentParser::GetRange()");
}
ranged_doc.pages.Add(ranged_page);
if (Length == 0)
break;
if (Length < 0)
throw new Exception("Negative length in RTF_DocumentParser::GetRange()");
ranged_page = new Page();
ranged_page.sequence.objects = new List();
}
ranged_doc.font_list = doc.font_list;
ranged_doc.color_list = doc.color_list;
ranged_doc.style_list = doc.style_list;
ranged_doc.codepage = doc.codepage;
ranged_doc.default_font = doc.default_font;
ranged_doc.default_lang = doc.default_lang;
ranged_doc.paper_width = doc.paper_width;
ranged_doc.paper_height = doc.paper_height;
ranged_doc.global_margin_left = doc.global_margin_left;
ranged_doc.global_margin_top = doc.global_margin_top;
ranged_doc.global_margin_right = doc.global_margin_right;
ranged_doc.global_margin_bottom = doc.global_margin_bottom;
ranged_doc.default_tab_width = doc.default_tab_width;
ranged_doc.view_kind = doc.view_kind;
return ranged_doc;
}
/// RichText document object
///
public RTF_DocumentParser()
{
doc.paper_width = 12240;
doc.paper_height = 15840;
doc.global_margin_left = 1800;
doc.global_margin_top = 1440;
doc.global_margin_right = 1800;
doc.global_margin_bottom = 1440;
doc.default_tab_width = 720;
doc.pages = new List();
curr_page = new RTF_PageParser(false, doc);
skip_rtf_extension = false;
run_formats_stack = new Stack();
parahraph_format_stack = new Stack(); // Do we need keep track of paragraphs format?
}
}
}