Index: xword/ContentFiltering/Html/CSSUtil.cs =================================================================== --- xword/ContentFiltering/Html/CSSUtil.cs (revision 22373) +++ xword/ContentFiltering/Html/CSSUtil.cs (working copy) @@ -4,6 +4,7 @@ using System.Text; using System.Xml; using System.Collections; +using System.Text.RegularExpressions; namespace ContentFiltering.Html { @@ -171,5 +172,154 @@ return optimizedCSSSelectors; } + + + /// + /// Extracts inline styles and replaces them with CSS classes. + /// + /// Node to filter + /// A reference to the XmlDocument. + /// Filtered node: 'class' attribute instead of 'style'. + public static XmlNode ConvertInlineStylesToCssClasses(XmlNode xnode, ref XmlDocument xmlDoc, ref int counter, ref Hashtable cssClasses) + { + XmlNode node = xnode; + if (node.ChildNodes.Count > 0) + { + RemoveXOfficeCSSClasses(ref node); + ExtractStyle(ref node, ref xmlDoc, ref counter, ref cssClasses); + for (int i = 0; i < node.ChildNodes.Count; i++) + { + XmlNode childNode = node.ChildNodes[i]; + childNode = ConvertInlineStylesToCssClasses(childNode, ref xmlDoc, ref counter, ref cssClasses); + } + } + return node; + } + + + /// + /// Extracts inline style from an XmlNode to a CSS class. + /// Adds new CSS class to 'class' property of the node. + /// + /// A reference to the node to be filtered. + /// A reference to the document containing the node. + private static void ExtractStyle(ref XmlNode node, ref XmlDocument xmlDoc, ref int counter, ref Hashtable cssClasses) + { + if (node.Attributes.Count > 0) + { + if (node.Attributes["style"] != null) + { + if (node.ChildNodes.Count > 0 && ("" + node.Attributes["style"].Value).Length > 0) + { + string className = "xoffice" + counter; + string classValue = CSSUtil.CleanCSSProperties(node.Attributes["style"].Value); + if (classValue.Length > 0) + { + + cssClasses.Add("." + className, classValue); + node.Attributes.Remove(node.Attributes["style"]); + XmlAttribute classAttribute = node.Attributes["class"]; + if (classAttribute == null) + { + classAttribute = xmlDoc.CreateAttribute("class"); + } + else + { + classAttribute.Value += " "; + } + classAttribute.Value += className; + node.Attributes.Remove(node.Attributes["class"]); + node.Attributes.Append(classAttribute); + counter++; + + } + } + else + { + //An empty node, so delete it's attributes + //This way the node could be safely removed by other DOM filters + node.Attributes.RemoveAll(); + } + } + } + } + + + /// + /// Removes previous XOffice CSS classes from an XmlNode. + /// + /// A reference to the XmlNode. + private static void RemoveXOfficeCSSClasses(ref XmlNode node) + { + XmlAttribute classAttribute = node.Attributes["class"]; + if (classAttribute != null) + { + if (classAttribute.Value.IndexOf("xoffice") >= 0) + { + classAttribute.Value = Regex.Replace(classAttribute.Value, "xoffice[0-9]+", "", RegexOptions.IgnoreCase | RegexOptions.Multiline); + } + node.Attributes.Remove(node.Attributes["class"]); + if (("" + classAttribute.Value).Length > 0) + { + node.Attributes.Append(classAttribute); + } + } + } + + + /// + /// Cleans CSS properties by allowing only valid properties. + /// + /// Initial properties. + /// Cleaned properties. + private static string CleanCSSProperties(string style) + { + StringBuilder acceptedProperties = new StringBuilder(); + + string[] separator = new string[1]; + separator[0] = ";"; + string[] props = style.Split(separator, StringSplitOptions.RemoveEmptyEntries); + foreach (string property in props) + { + string propName = property.Substring(0, property.IndexOf(':')); + if (validCSSProperties.Contains(propName.ToLower().Trim())) + { + acceptedProperties.Append(property); + acceptedProperties.Append(";"); + } + } + return acceptedProperties.ToString(); + } + + + /// + /// Common valid CSS properties. + /// + private static readonly List validCSSProperties = new List() + { + "accelerator", "azimuth", "background", "background-attachment", "background-color", "background-image", + "background-position", "background-position-x", "background-position-y", "background-repeat", "behavior", "border", + "border-bottom", "border-bottom-color", "border-bottom-style", "border-bottom-width", "border-collapse", "border-color", + "border-left", "border-left-color", "border-left-style", "border-left-width", "border-right", "border-right-color", + "border-right-style", "border-right-width", "border-spacing", "border-style", "border-top", "border-top-color", + "border-top-style", "border-top-width", "border-width", "bottom", "caption-side", "clear", + "clip", "color", "content", "counter-increment", "counter-reset", "cue", + "cue-after", "cue-before", "cursor", "direction", "display", "elevation", + "empty-cells", "filter", "float", "font", "font-family", "font-size", + "font-size-adjust", "font-stretch", "font-style", "font-variant", "font-weight", "height", + "ime-mode", "include-source", "layer-background-color", "layer-background-image", "layout-flow", "layout-grid", + "layout-grid-char", "layout-grid-char-spacing", "layout-grid-line", "layout-grid-mode", "layout-grid-type", "left", + "letter-spacing", "line-break", "line-height", "list-style", "list-style-image", "list-style-position", + "list-style-type", "margin", "margin-bottom", "margin-left", "margin-right", "margin-top", + "marker-offset", "marks", "max-height", "max-width", "min-height", "min-width", + "orphans", "outline", "outline-color", "outline-style", "outline-width", "overflow", + "overflow-X", "overflow-Y", "padding", "padding-bottom", "padding-left", "padding-right", + "padding-top", "page", "page-break-after", "page-break-before", "page-break-inside", "pause", + "pause-after", "pause-before", "pitch", "pitch-range", "play-during", "position", + "size", "table-layout", "text-align", "text-decoration", "text-indent", "text-transform", + "text-shadow", "top", "vertical-align", "visibility", "white-space", "width", + "word-break", "word-spacing", "z-index" + }; + } } Index: xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs =================================================================== --- xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs (revision 22373) +++ xword/ContentFiltering/Office/Word/Filters/LocalToWebStyleFilter.cs (working copy) @@ -47,7 +47,7 @@ //step2: convert all inlined CSS to CSS classes //(including, but not limited to, those generated at step1) - body = ConvertInlineStylesToCssClasses(body, ref xmlDoc); + body = CSSUtil.ConvertInlineStylesToCssClasses(body, ref xmlDoc, ref counter, ref cssClasses); //step3: optimize CSS by grouping selectors with the same properties cssClasses = CSSUtil.GroupCSSSelectors(cssClasses); @@ -58,124 +58,12 @@ #endregion IDOMFilter Members - - /// - /// Extracts inline styles and replaces them with CSS classes. + /// Inserts the CSS from the cssClasses in the style node of the head section, + /// creating the style node if neccessary. /// - /// Node to filter - /// A reference to the XmlDocument. - /// Filtered node: 'class' attribute instead of 'style'. - private XmlNode ConvertInlineStylesToCssClasses(XmlNode xnode, ref XmlDocument xmlDoc) - { - XmlNode node = xnode; - if (node.ChildNodes.Count > 0) - { - RemoveXOfficeCSSClasses(ref node); - ExtractStyle(ref node, ref xmlDoc); - for (int i = 0; i < node.ChildNodes.Count; i++) - { - XmlNode childNode = node.ChildNodes[i]; - childNode = ConvertInlineStylesToCssClasses(childNode, ref xmlDoc); - } - } - return node; - } - - /// - /// Removes previous XOffice CSS classes. - /// - /// A reference to an XmlNode to filter. - private void RemoveXOfficeCSSClasses(ref XmlNode node) - { - XmlAttribute classAttribute = node.Attributes["class"]; - if (classAttribute != null) - { - if (classAttribute.Value.IndexOf("xoffice") >= 0) - { - classAttribute.Value = Regex.Replace(classAttribute.Value, "xoffice[0-9]+", "", RegexOptions.IgnoreCase | RegexOptions.Multiline); - } - node.Attributes.Remove(node.Attributes["class"]); - if (("" + classAttribute.Value).Length > 0) - { - node.Attributes.Append(classAttribute); - } - } - } - - /// - /// Extracts inline style from an XmlNode to a CSS class. - /// Adds new CSS class to 'class' property of the node. - /// - /// A reference to the node to be filtered. - /// A reference to the document containing the node. - private void ExtractStyle(ref XmlNode node, ref XmlDocument xmlDoc) - { - if (node.Attributes.Count > 0) - { - if (node.Attributes["style"] != null) - { - if (node.ChildNodes.Count > 0 && ("" + node.Attributes["style"].Value).Length > 0) - { - string className = "xoffice" + counter; - string classValue = CleanCSSProperties(node.Attributes["style"].Value); - if (classValue.Length > 0) - { - - cssClasses.Add("." + className, classValue); - node.Attributes.Remove(node.Attributes["style"]); - XmlAttribute classAttribute = node.Attributes["class"]; - if (classAttribute == null) - { - classAttribute = xmlDoc.CreateAttribute("class"); - } - else - { - classAttribute.Value += " "; - } - classAttribute.Value += className; - node.Attributes.Remove(node.Attributes["class"]); - node.Attributes.Append(classAttribute); - counter++; - - } - } - else - { - //An empty node, so delete it's attributes - //This way the node could be safely removed by other DOM filters - node.Attributes.RemoveAll(); - } - } - } - } - - /// - /// Cleans CSS properties by removing the ones specific to MS Office. - /// - /// - /// - private string CleanCSSProperties(string style) - { - StringBuilder acceptedProperties = new StringBuilder(); - - string[] separator = new string[1]; - separator[0] = ";"; - string[] props = style.Split(separator, StringSplitOptions.RemoveEmptyEntries); - foreach (string property in props) - { - string propName = property.Substring(0, property.IndexOf(':')); - if (ValidCSSProperties.GetList().Contains(propName.ToLower().Trim())) - { - acceptedProperties.Append(property); - acceptedProperties.Append(";"); - } - } - return acceptedProperties.ToString(); - } - - - + /// A reference to the head node of the document. + /// A reference to the XmlDocument. private void InsertCssClassesInHeader(ref XmlNode headNode, ref XmlDocument xmlDoc) { XmlNode styleNode = xmlDoc.CreateNode(XmlNodeType.Element, "style", xmlDoc.NamespaceURI); @@ -199,45 +87,4 @@ } } - /// - /// Most common valid CSS properties. - /// - /// TODO: move to application settings mechanism? - class ValidCSSProperties - { - private static List validCSSProperties = new List() - { - "accelerator", "azimuth", "background", "background-attachment", "background-color", "background-image", - "background-position", "background-position-x", "background-position-y", "background-repeat", "behavior", "border", - "border-bottom", "border-bottom-color", "border-bottom-style", "border-bottom-width", "border-collapse", "border-color", - "border-left", "border-left-color", "border-left-style", "border-left-width", "border-right", "border-right-color", - "border-right-style", "border-right-width", "border-spacing", "border-style", "border-top", "border-top-color", - "border-top-style", "border-top-width", "border-width", "bottom", "caption-side", "clear", - "clip", "color", "content", "counter-increment", "counter-reset", "cue", - "cue-after", "cue-before", "cursor", "direction", "display", "elevation", - "empty-cells", "filter", "float", "font", "font-family", "font-size", - "font-size-adjust", "font-stretch", "font-style", "font-variant", "font-weight", "height", - "ime-mode", "include-source", "layer-background-color", "layer-background-image", "layout-flow", "layout-grid", - "layout-grid-char", "layout-grid-char-spacing", "layout-grid-line", "layout-grid-mode", "layout-grid-type", "left", - "letter-spacing", "line-break", "line-height", "list-style", "list-style-image", "list-style-position", - "list-style-type", "margin", "margin-bottom", "margin-left", "margin-right", "margin-top", - "marker-offset", "marks", "max-height", "max-width", "min-height", "min-width", - "orphans", "outline", "outline-color", "outline-style", "outline-width", "overflow", - "overflow-X", "overflow-Y", "padding", "padding-bottom", "padding-left", "padding-right", - "padding-top", "page", "page-break-after", "page-break-before", "page-break-inside", "pause", - "pause-after", "pause-before", "pitch", "pitch-range", "play-during", "position", - "size", "table-layout", "text-align", "text-decoration", "text-indent", "text-transform", - "text-shadow", "top", "vertical-align", "visibility", "white-space", "width", - "word-break", "word-spacing", "z-index" - }; - - /// - /// Gets the list of valid CSS properties. - /// - /// - public static List GetList() - { - return validCSSProperties; - } - } } Index: xword/ContentFiltering/Test/Html/CSSUtilTest.cs =================================================================== --- xword/ContentFiltering/Test/Html/CSSUtilTest.cs (revision 22373) +++ xword/ContentFiltering/Test/Html/CSSUtilTest.cs (working copy) @@ -230,5 +230,82 @@ } + + /// + /// Test for ConvertInlineStylesToCssClasses method. + /// + [Test] + public void TestConvertInlineStylesToCssClasses() + { + initialXmlDoc = new XmlDocument(); + expectedXmlDoc = new XmlDocument(); + + initialHTML = "TITLE" + + "" + + "
" + + "

Text 1

" + + "

Text 2

" + + "

" + + "Text 3" + + "Text 4" + + "Text 5" + + "

" + + "
" + + ""; + + initialXmlDoc.LoadXml(initialHTML); + + + expectedHTML="TITLE" + + "" + + "
" + + "

Text 1

" + + "

Text 2

" + + "

" + + "Text 3" + + "Text 4" + + "Text 5" + + "

" + + "
" + + ""; + + expectedXmlDoc.LoadXml(expectedHTML); + + int counter = 0; + Hashtable cssClasses = new Hashtable(); + XmlNode node = initialXmlDoc.GetElementsByTagName("div")[0]; + + CSSUtil.ConvertInlineStylesToCssClasses(node, ref initialXmlDoc, ref counter, ref cssClasses); + + string[] properties = + { + "border:1px red solid;padding:3px;margin:3px;", + "font-family:sans-serif;", + "color:orange;", + "color:black;" + }; + + //after conversion we should get the expected document + Assert.IsTrue(XmlDocComparator.AreIdentical(expectedXmlDoc, initialXmlDoc)); + + //6 key/value pairs in cssClasses hashtable + Assert.IsTrue(cssClasses.Count == 6); + + //6 CSS classes from .xoffice0 to .xoffice5 + for (int i = 0; i < 6; i++) + { + + Assert.IsTrue(cssClasses.ContainsKey(".xoffice" + i)); + } + + //all the properties extracted from inline styles should be found in the cssClasses hashtable + foreach (string cssProp in properties) + { + Assert.IsTrue(cssClasses.ContainsValue(cssProp)); + } + + } + + } }