Index: xword/ContentFiltering/Office/Word/LocalToWebHTML.cs
===================================================================
--- xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (revision 20809)
+++ xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (working copy)
@@ -54,12 +54,171 @@
AdaptLists(ref xmlDoc);
AdaptMacros(ref xmlDoc);
ClearOfficeAttributes(ref xmlDoc);
+ FixEmptyNestedElements(ref xmlDoc);
StringBuilder sb = new StringBuilder(xmlDoc.GetIndentedXml());
sb.Replace(" xmlns=\"\"","");
return sb.ToString();
}
///
+ /// Cleans up the XmlDocument
by removing nested and empty elements.
+ ///
+ /// A reference to the XmlDocument
+ private void FixEmptyNestedElements(ref XmlDocument xmlDoc)
+ {
+ XmlNode body = xmlDoc.GetElementsByTagName("body")[0];
+ XmlNodeList bodyChildNodes = body.ChildNodes;
+
+ Dictionary> nestedNodesToProtect = new Dictionary>();
+ nestedNodesToProtect.Add("b", new List() { "u","i","span"});
+ nestedNodesToProtect.Add("u", new List() { "b", "i","span"});
+ nestedNodesToProtect.Add("i", new List() { "b", "u","span"});
+ nestedNodesToProtect.Add("span", new List() { "b", "u", "i"});
+ nestedNodesToProtect.Add("p", new List() {"span","b","u","i" });
+ nestedNodesToProtect.Add("h1", new List() { "span", "p", "div", "b", "u", "i" });
+ nestedNodesToProtect.Add("h2", new List() { "span", "p", "div", "b", "u", "i" });
+ nestedNodesToProtect.Add("h3", new List() { "span", "p", "div", "b", "u", "i" });
+ nestedNodesToProtect.Add("ol", new List() { "li","ol","ul" });
+ nestedNodesToProtect.Add("ul", new List() { "li", "ol", "ul" });
+ nestedNodesToProtect.Add("table",new List(){"tr","td","thead","tbody"});
+ nestedNodesToProtect.Add("tr",new List(){"td"});
+ nestedNodesToProtect.Add("td", new List() { "p", "span", "div", "b", "u", "i", "ol", "ul" });
+
+ foreach (XmlNode node in bodyChildNodes)
+ {
+ RemoveNestedElements(ref xmlDoc, node,nestedNodesToProtect);
+
+ }
+
+ List emptyNodesToProtect = new List() { "br", "img", "hr" };
+
+
+ RemoveEmptyElements(ref xmlDoc, emptyNodesToProtect);
+ }
+
+
+ ///
+ /// Removes nested empty elements from the specified node and it's children, if node and children
+ /// are not in the rules from nodesToProtect
Dictionary
.
+ ///
+ /// A reference to an XmlDocument
+ /// Node to process.
+ /// Nodes not to alter.
+ private void RemoveNestedElements(ref XmlDocument xmlDoc,XmlNode nodeToProcess,Dictionary> nodesToProtect)
+ {
+ if (nodeToProcess.NodeType == XmlNodeType.Element
+ && nodeToProcess.ChildNodes.Count > 0)
+ {
+
+ XmlNodeList childNodes = nodeToProcess.ChildNodes;
+ foreach (XmlNode child in childNodes)
+ {
+ RemoveNestedElements(ref xmlDoc, child,nodesToProtect);
+ }
+
+ if (nodeToProcess.FirstChild.NodeType == XmlNodeType.Element
+ && nodeToProcess.Value == null)
+ {
+ XmlNode parentNode = nodeToProcess.ParentNode;
+ XmlNode afterNode = nodeToProcess;
+ List nodesToMove = new List();
+ bool readyForRemoval=true;
+
+ foreach (string protectedParent in nodesToProtect.Keys)
+ {
+ if (nodeToProcess.Name == protectedParent.Trim().ToLower() && readyForRemoval)
+ {
+ foreach (XmlNode child in childNodes)
+ {
+ foreach (string protectedChild in nodesToProtect[protectedParent])
+ {
+ if (protectedChild.Trim().ToLower() == child.Name)
+ {
+ readyForRemoval = false;
+ break;
+ }
+ }
+ if (!readyForRemoval)
+ {
+ break;
+ }
+ }
+ }
+ if (!readyForRemoval)
+ {
+ break;
+ }
+ }
+
+ if (readyForRemoval)
+ {
+ foreach (XmlNode child in childNodes)
+ {
+ nodesToMove.Add(child);
+ }
+ foreach (XmlNode nodeToMove in nodesToMove)
+ {
+ afterNode = parentNode.InsertAfter(nodeToProcess.RemoveChild(nodeToMove), afterNode);
+ }
+
+ parentNode.RemoveChild(nodeToProcess);
+ }
+ }
+ }
+
+
+ }
+
+ ///
+ /// Removes empty elements if not in nodesToProtect
list.
+ /// An element is considered to be empty if it has no children and no innerText.
+ /// Empty paragraphs are transformed to breaks.
+ ///
+ /// A reference to the XmlDocument
+ /// A list of strings containing nodes to protect from erasing(like br, img, hr).
+ private void RemoveEmptyElements(ref XmlDocument xmlDoc, List nodesToProtect)
+ {
+ XmlNodeList nodes = xmlDoc.GetElementsByTagName("*");
+ List nodesToDelete = new List();
+ List paragraphsToAlter = new List();
+ foreach (XmlNode node in nodes)
+ {
+ if (node.NodeType == XmlNodeType.Element
+ && node.Value==null
+ && node.ChildNodes.Count==0)
+ {
+ bool preventDelete=false;
+ foreach (string nodeName in nodesToProtect)
+ {
+ if (nodeName.ToLower().Trim() == node.Name)
+ {
+ preventDelete = true;
+ break;
+ }
+ }
+
+ if (node.Name == "p")
+ {
+ preventDelete = true;
+ paragraphsToAlter.Add(node);
+ }
+ if (!preventDelete)
+ {
+ nodesToDelete.Add(node);
+ }
+ }
+ }
+ foreach (XmlNode node in nodesToDelete)
+ {
+ node.ParentNode.RemoveChild(node);
+ }
+ foreach (XmlNode node in paragraphsToAlter)
+ {
+ node.ParentNode.ReplaceChild(xmlDoc.CreateElement("br"), node);
+ }
+ }
+
+ ///
/// Removes 'class' attribute from text marked as containing grammar or spelling errors.
/// (when values are 'gramE' or 'spellE'). Removes 'lang' attribute. Adds a space character
/// (' ') to the affected text, to make sure words marked as errors are separated.
@@ -126,7 +285,9 @@
xIterator = navigator.Select(expression);
foreach (XPathNavigator nav in xIterator)
{
- if (nav.Value == "MsoNormal" || nav.Value == "MsoNormalTable" || nav.Value == "MsoTableGrid")
+ if (nav.Value == "MsoNormal" || nav.Value == "MsoNormalTable"
+ || nav.Value == "MsoTableGrid" || nav.Value=="MsoSubtitle"
+ || nav.Value == "MsoTitle")
{
nav.DeleteSelf();
}