Index: xword/ContentFiltering/ContentFiltering.csproj =================================================================== --- xword/ContentFiltering/ContentFiltering.csproj (revision 21847) +++ xword/ContentFiltering/ContentFiltering.csproj (working copy) @@ -81,6 +81,7 @@ + @@ -109,6 +110,7 @@ + Index: xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs =================================================================== --- xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs (revision 0) +++ xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs (revision 0) @@ -0,0 +1,227 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using XWiki.Office.Word; +using System.Xml; +using System.Collections; +using System.Text.RegularExpressions; + +namespace ContentFiltering.Office.Word.Filters +{ + /// + /// Extracts the CSS inline styles, optimizes CSS and adds CSS classes in the head section for current page. + /// + public class LocalToWebStyleFilter : IDOMFilter + { + private int counter = 0; + private Hashtable cssClasses; + private ConversionManager manager; + + public LocalToWebStyleFilter(ConversionManager manager) + { + this.manager = manager; + this.cssClasses = new Hashtable(); + this.counter = 0; + } + + #region IDOMFilter Members + + /// + /// Extracts the CSS inline styles, optimizes CSS and adds CSS classes in the head section for current page + /// + /// + public void Filter(ref System.Xml.XmlDocument xmlDoc) + { + XmlNode body = xmlDoc.GetElementsByTagName("body")[0]; + XmlNode head = xmlDoc.GetElementsByTagName("head")[0]; + if (head == null) + { + head = xmlDoc.CreateNode(XmlNodeType.Element, "head", xmlDoc.NamespaceURI); + body.ParentNode.InsertBefore(head, body); + } + ConvertCSSClassesToInlineStyles(ref head, ref body, ref xmlDoc); + body = ConvertInlineStylesToCssClasses(body, ref xmlDoc); + OptimizeCssClasses(); + InsertCssClassesInHeader(ref head, ref xmlDoc); + } + + #endregion IDOMFilter Members + + + private void ConvertCSSClassesToInlineStyles(ref XmlNode head, ref XmlNode body, ref XmlDocument xmlDoc) + { + + } + + /// + /// Extracts inline styles and replaces them with CSS classes. + /// + /// Node to filter + /// A reference to the XmlDocument. + /// Filtered node: 'class' attribute instead of 'style'. + private XmlNode ConvertInlineStylesToCssClasses(XmlNode xnode, ref XmlDocument xmlDoc) + { + XmlNode node = xnode; + if (node.ChildNodes.Count > 0) + { + RemoveXOfficeCSSClasses(ref node); + ExtractStyle(ref node, ref xmlDoc); + for (int i = 0; i < node.ChildNodes.Count; i++) + { + XmlNode childNode = node.ChildNodes[i]; + childNode = ConvertInlineStylesToCssClasses(childNode, ref xmlDoc); + } + } + return node; + } + + /// + /// Removes previous XOffice CSS classes. + /// + /// A reference to an XmlNode to filter. + private void RemoveXOfficeCSSClasses(ref XmlNode node) + { + XmlAttribute classAttribute = node.Attributes["class"]; + if (classAttribute != null) + { + if (classAttribute.Value.IndexOf("xoffice") >= 0) + { + classAttribute.Value = Regex.Replace(classAttribute.Value, "xoffice[0-9]+", "", RegexOptions.IgnoreCase | RegexOptions.Multiline); + } + node.Attributes.Remove(node.Attributes["class"]); + if (("" + classAttribute.Value).Length > 0) + { + node.Attributes.Append(classAttribute); + } + } + } + + private void ExtractStyle(ref XmlNode node, ref XmlDocument xmlDoc) + { + if (node.Attributes.Count > 0) + { + if (node.Attributes["style"] != null) + { + if (node.ChildNodes.Count > 0 && ("" + node.Attributes["style"].Value).Length > 0) + { + string className = "xoffice" + counter; + string classValue = CleanCSSProperties(node.Attributes["style"].Value); + if (classValue.Length > 0) + { + + cssClasses.Add("." + className, classValue); + node.Attributes.Remove(node.Attributes["style"]); + XmlAttribute classAttribute = node.Attributes["class"]; + if (classAttribute == null) + { + classAttribute = xmlDoc.CreateAttribute("class"); + } + else + { + classAttribute.Value += " "; + } + classAttribute.Value += className; + node.Attributes.Remove(node.Attributes["class"]); + node.Attributes.Append(classAttribute); + counter++; + + } + } + else + { + //An empty node, so delete it's attributes + //This way the node could be safely removed by other DOM filters + node.Attributes.RemoveAll(); + } + } + } + } + + /// + /// Cleans CSS properties by removing the ones specific to MS Office. + /// + /// + /// + private string CleanCSSProperties(string style) + { + StringBuilder acceptedProperties = new StringBuilder(); + + string[] separator = new string[1]; + separator[0] = ";"; + string[] props = style.Split(separator, StringSplitOptions.RemoveEmptyEntries); + foreach (string property in props) + { + string propName = property.Substring(0, property.IndexOf(':')); + if (ValidCSSProperties.GetList().Contains(propName.ToLower().Trim())) + { + acceptedProperties.Append(property); + acceptedProperties.Append(";"); + } + } + return acceptedProperties.ToString(); + } + + private void OptimizeCssClasses() + { + + } + + private void InsertCssClassesInHeader(ref XmlNode headNode, ref XmlDocument xmlDoc) + { + XmlNode styleNode = xmlDoc.CreateNode(XmlNodeType.Element, "style", xmlDoc.NamespaceURI); + + string value = ""; + + foreach (Object key in cssClasses.Keys) + { + value += Environment.NewLine; + value += (string)key; + value += "{"; + value += cssClasses[key]; + value += "}"; + } + + XmlNode styleNodeContent = xmlDoc.CreateNode(XmlNodeType.Text, "#text", xmlDoc.NamespaceURI); + styleNodeContent.Value = value; + + styleNode.AppendChild(styleNodeContent); + headNode.AppendChild(styleNode); + } + } + + + class ValidCSSProperties + { + private static List validCSSProperties = new List() + { + "accelerator", "azimuth", "background", "background-attachment", "background-color", "background-image", + "background-position", "background-position-x", "background-position-y", "background-repeat", "behavior", "border", + "border-bottom", "border-bottom-color", "border-bottom-style", "border-bottom-width", "border-collapse", "border-color", + "border-left", "border-left-color", "border-left-style", "border-left-width", "border-right", "border-right-color", + "border-right-style", "border-right-width", "border-spacing", "border-style", "border-top", "border-top-color", + "border-top-style", "border-top-width", "border-width", "bottom", "caption-side", "clear", + "clip", "color", "content", "counter-increment", "counter-reset", "cue", + "cue-after", "cue-before", "cursor", "direction", "display", "elevation", + "empty-cells", "filter", "float", "font", "font-family", "font-size", + "font-size-adjust", "font-stretch", "font-style", "font-variant", "font-weight", "height", + "ime-mode", "include-source", "layer-background-color", "layer-background-image", "layout-flow", "layout-grid", + "layout-grid-char", "layout-grid-char-spacing", "layout-grid-line", "layout-grid-mode", "layout-grid-type", "left", + "letter-spacing", "line-break", "line-height", "list-style", "list-style-image", "list-style-position", + "list-style-type", "margin", "margin-bottom", "margin-left", "margin-right", "margin-top", + "marker-offset", "marks", "max-height", "max-width", "min-height", "min-width", + "orphans", "outline", "outline-color", "outline-style", "outline-width", "overflow", + "overflow-X", "overflow-Y", "padding", "padding-bottom", "padding-left", "padding-right", + "padding-top", "page", "page-break-after", "page-break-before", "page-break-inside", "pause", + "pause-after", "pause-before", "pitch", "pitch-range", "play-during", "position", + "size", "table-layout", "text-align", "text-decoration", "text-indent", "text-transform", + "text-shadow", "top", "vertical-align", "visibility", "white-space", "width", + "word-break", "word-spacing", "z-index" + }; + + public static List GetList() + { + return validCSSProperties; + } + } +} Index: xword/ContentFiltering/Office/Word/LocalToWebHTML.cs =================================================================== --- xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (revision 21847) +++ xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (working copy) @@ -37,6 +37,7 @@ List contentFilters = new List() { + new LocalToWebStyleFilter(manager), new StyleRemoverFilter(manager), new GrammarAndSpellingErrorsFilter(manager), new LocalImageAdaptorFilter(manager), Index: xword/ContentFiltering/Test/Office/Word/Filters/LocalToWebStyleFilterTest.cs =================================================================== --- xword/ContentFiltering/Test/Office/Word/Filters/LocalToWebStyleFilterTest.cs (revision 0) +++ xword/ContentFiltering/Test/Office/Word/Filters/LocalToWebStyleFilterTest.cs (revision 0) @@ -0,0 +1,165 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using NUnit.Framework; +using System.Xml; +using ContentFiltering.Test.Util; +using ContentFiltering.Office.Word.Filters; +using XWiki.Office.Word; + +namespace ContentFiltering.Test.Office.Word.Filters +{ + /// + /// Test class for LocalToWebStyleFilter. + /// + [TestFixture] + public class LocalToWebStyleFilterTest + { + private ConversionManager manager; + private string initialHTML; + private string expectedHTML; + private XmlDocument initialXmlDoc; + + /// + /// Default constructor. + /// + public LocalToWebStyleFilterTest() + { + manager = ConversionManagerTestUtil.DummyConversionManager(); + initialHTML = ""; + expectedHTML = ""; + initialXmlDoc = new XmlDocument(); + } + + /// + /// Test setup. + /// + [TestFixtureSetUp] + public void TestSetup() + { + initialHTML = "" + + "

Some content

" + + "

some code

" + + "

More content

" + + "

more code

" + + ""; + + expectedHTML = "" + + "" + + "

Some content

" + + "

some code

" + + "

More content

" + + "

more code

" + + ""; + + initialXmlDoc.LoadXml(initialHTML); + } + + /// + /// Tests the LocalToWebStyle filter: + /// - No inline styles. + /// - Only 'xoffice[0-9]+' CSS classes. + /// - Exactly 4 'xoffice[0-9]+' CSS classes, grouped in 2 parts (optimized). + /// + [Test] + public void TestLocalToWebStyleFilter() + { + bool foundInlineStyles = false; + bool foundNonXOfficeClasses = false; + + new LocalToWebStyleFilter(manager).Filter(ref initialXmlDoc); + + XmlNodeList allNodes = initialXmlDoc.GetElementsByTagName("*"); + + foreach (XmlNode node in allNodes) + { + //searching for inline styles + if (node.Attributes["style"] != null) + { + if (("" + node.Attributes["style"].Value).Length > 0) + { + foundInlineStyles = true; + break; //no need to continue searching other problems + } + } + + //searching for non-XOffice CSS classes in nodes + if (node.Attributes["class"] != null) + { + if (("" + node.Attributes["class"].Value).Length > 0) + { + if (node.Attributes["class"].Value.ToLower().IndexOf("xoffice") < 0) + { + foundNonXOfficeClasses = true; + break; + } + } + } + } + + XmlNode styleNode = initialXmlDoc.GetElementsByTagName("style")[0]; + string cssContent = ExtractStyleContent(styleNode); + + Assert.IsFalse(foundInlineStyles); + Assert.IsFalse(foundNonXOfficeClasses); + Assert.IsNotNull(styleNode); + Assert.IsTrue(CountCSSClasses(cssContent) == 4); + Assert.IsTrue(OptimizedCSSClasses(cssContent)); + } + + private string ExtractStyleContent(XmlNode styleNode) + { + string cssContent = ""; + if (styleNode != null) + { + if (("" + styleNode.InnerText).Length > 0) + { + cssContent = styleNode.InnerText; + } + } + return cssContent; + } + + /// + /// Counts the CSS classes from a CSS content. + /// + /// The CSS content. + /// Number of CSS classes found. + private int CountCSSClasses(string cssContent) + { + int count = 0; + int startIndex = 0; + while (startIndex != -1) + { + while (startIndex >= 0) + { + startIndex = cssContent.IndexOf(".xoffice", startIndex); + count++; + } + } + return count; + } + + /// + /// Verifies the CSS content four classes grouped in 2 parts. + /// + /// The CSS content. + /// TRUE if CSS seems to be optimized. + private bool OptimizedCSSClasses(string cssContent) + { + bool foundOptimizedCSS = false; + char[] separator = new char[] { '}' }; + string[] groups = cssContent.Split(separator, StringSplitOptions.RemoveEmptyEntries); + if (groups.Length == 2) + { + Console.WriteLine("Counting.."); + foundOptimizedCSS = (CountCSSClasses(groups[0]) == 2) && (CountCSSClasses(groups[1]) == 2); + } + return foundOptimizedCSS; + } + } +}