Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 1452 | chris | 1 | using System; |
| 2 | using System.IO; |
||
| 3 | using System.Text.RegularExpressions; |
||
| 4 | using System.Xml; |
||
| 5 | using System.Xml.Xsl; |
||
| 6 | |||
| 7 | namespace Bind |
||
| 8 | { |
||
| 9 | class DocProcessor |
||
| 10 | { |
||
| 11 | static readonly Regex remove_mathml = new Regex(@"<(mml:math)[^>]*?>(?:.|\n)*?</\s*\1\s*>", |
||
| 12 | RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); |
||
| 13 | |||
| 14 | static readonly XslCompiledTransform xslt = new XslCompiledTransform(); |
||
| 15 | static readonly XmlReaderSettings settings = new XmlReaderSettings(); |
||
| 16 | |||
| 17 | public DocProcessor(string transform_file) |
||
| 18 | { |
||
| 19 | xslt.Load(transform_file); |
||
| 20 | settings.ProhibitDtd = false; |
||
| 21 | settings.XmlResolver = null; |
||
| 22 | } |
||
| 23 | |||
| 24 | // Strips MathML tags from the source and replaces the equations with the content |
||
| 25 | // found in the <!-- eqn: :--> comments in the docs. |
||
| 26 | // Todo: Some simple MathML tags do not include comments, find a solution. |
||
| 27 | // Todo: Some files include more than 1 function - find a way to map these extra functions. |
||
| 28 | public string ProcessFile(string file) |
||
| 29 | { |
||
| 30 | string text = File.ReadAllText(file); |
||
| 31 | |||
| 32 | Match m = remove_mathml.Match(text); |
||
| 33 | while (m.Length > 0) |
||
| 34 | { |
||
| 35 | string removed = text.Substring(m.Index, m.Length); |
||
| 36 | text = text.Remove(m.Index, m.Length); |
||
| 37 | int equation = removed.IndexOf("eqn"); |
||
| 38 | if (equation > 0) |
||
| 39 | { |
||
| 40 | text = text.Insert(m.Index, |
||
| 41 | "<![CDATA[" + |
||
| 42 | removed.Substring(equation + 4, removed.IndexOf(":-->") - equation - 4) + |
||
| 43 | "]]>"); |
||
| 44 | } |
||
| 45 | m = remove_mathml.Match(text); |
||
| 46 | } |
||
| 47 | |||
| 48 | XmlReader doc = null; |
||
| 49 | try |
||
| 50 | { |
||
| 51 | // The pure XmlReader is ~20x faster than the XmlTextReader. |
||
| 52 | doc = XmlReader.Create(new StringReader(text), settings); |
||
| 53 | //doc = new XmlTextReader(new StringReader(text)); |
||
| 54 | |||
| 55 | using (StringWriter sw = new StringWriter()) |
||
| 56 | { |
||
| 57 | xslt.Transform(doc, null, sw); |
||
| 58 | return sw.ToString().TrimEnd('\n'); |
||
| 59 | } |
||
| 60 | } |
||
| 61 | catch (XmlException e) |
||
| 62 | { |
||
| 63 | Console.WriteLine(e.ToString()); |
||
| 64 | Console.WriteLine(doc.ToString()); |
||
| 65 | return String.Empty; |
||
| 66 | } |
||
| 67 | } |
||
| 68 | } |
||
| 69 | } |