Index: xword/ContentFiltering/ContentFiltering.csproj
===================================================================
--- xword/ContentFiltering/ContentFiltering.csproj (revision 21243)
+++ xword/ContentFiltering/ContentFiltering.csproj (working copy)
@@ -54,8 +54,18 @@
+
+
+
+
+
+
+
+
+
+
Index: xword/ContentFiltering/Office/Word/Filters/GrammarAndSpellingErrorsFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/GrammarAndSpellingErrorsFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/GrammarAndSpellingErrorsFilter.cs (revision 0)
@@ -0,0 +1,71 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class GrammarAndSpellingErrorsFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+ public GrammarAndSpellingErrorsFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Removes 'class' attribute from text marked as containing grammar or spelling errors.
+ /// (when values are 'gramE' or 'spellE'). Removes 'lang' attribute. Adds a space character
+ /// (' ') to the affected text, to make sure words marked as errors are separated.
+ ///
+ /// A reference to an xml document.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNodeList nodes = xmlDoc.GetElementsByTagName("span");
+ bool insertASpace = false;
+ XmlNode tempNode = null;
+ foreach (XmlNode node in nodes)
+ {
+ insertASpace = false;
+ XmlAttribute classAttribute = node.Attributes["class"];
+ if (classAttribute != null)
+ {
+ if (classAttribute.Value.ToLower().Trim().IndexOf("grame") >= 0
+ ||
+ classAttribute.Value.ToLower().Trim().IndexOf("spelle") >= 0)
+ {
+ node.Attributes.Remove(classAttribute);
+ insertASpace = true;
+ }
+ }
+ XmlAttribute langAttribute = node.Attributes["lang"];
+ if (langAttribute != null)
+ {
+ node.Attributes.Remove(langAttribute);
+ insertASpace = true;
+ }
+
+ if (insertASpace)
+ {
+ if (node.NodeType == XmlNodeType.Element)
+ {
+ tempNode = node.ChildNodes[0];
+ }
+ else
+ {
+ tempNode = node;
+ }
+ if (tempNode.Value != null)
+ {
+ tempNode.Value += " ";
+ }
+ }
+ }
+ }
+
+ #endregion
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/IDOMFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/IDOMFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/IDOMFilter.cs (revision 0)
@@ -0,0 +1,21 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ ///
+ /// Interface for XML cleaning.
+ ///
+ public interface IDOMFilter
+ {
+ ///
+ /// The Filter()
method implemented by each filter.
+ ///
+ /// A reference to the XmlDocument
+ void Filter(ref XmlDocument xmlDoc);
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/LocalImageAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/LocalImageAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/LocalImageAdaptorFilter.cs (revision 0)
@@ -0,0 +1,173 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+using System.IO;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class LocalImageAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public LocalImageAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Adapts the images from the local(file:///) to the xwiki format.
+ ///
+ /// A reference to the xml document.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNodeList images = xmlDoc.GetElementsByTagName("img");
+ List adaptedSrcs = new List();
+ foreach (XmlNode node in images)
+ {
+ if (node.NodeType == XmlNodeType.Element)
+ {
+ String imagePath = node.Attributes["src"].Value;
+ if (!adaptedSrcs.Contains(imagePath))
+ {
+ String newPath = "";
+ List imgIds = GetMatchingImages(node);
+ if (imgIds.Count != 0)
+ {
+ ImageInfo imageInfo = manager.States.Images[imgIds[0]];
+ newPath = imageInfo.imgWebSrc;
+ }
+ else
+ {
+ //set src and upload
+ String attachmentName = Path.GetFileName(imagePath);
+ manager.States.LocalFolder = manager.States.LocalFolder.Replace("\\\\", "\\");
+ if (!Path.IsPathRooted(imagePath))
+ {
+ imagePath = Path.Combine(manager.States.LocalFolder, imagePath);
+ }
+ bool sucess = manager.XWikiClient.AddAttachment(manager.States.PageFullName, imagePath);
+ //TODO report if the attachment cannot be loaded.
+ newPath = manager.XWikiClient.GetAttachmentURL(manager.States.PageFullName, attachmentName);
+ }
+ node.Attributes["src"].Value = newPath;
+ adaptedSrcs.Add(newPath);
+ }
+ }
+ }
+ BorderImages(ref xmlDoc);
+ }
+
+ #endregion
+
+ ///
+ /// Adds comments before and after image tags.
+ ///
+ /// A reference to the filtered XmlDocument instance.
+ private void BorderImages(ref XmlDocument xmlDoc)
+ {
+ foreach (XmlNode node in xmlDoc.GetElementsByTagName("img"))
+ {
+ String imageName = node.Attributes["src"].Value;
+ imageName = Path.GetFileName(imageName);
+ XmlNode startComment = xmlDoc.CreateComment("startimage:" + imageName);
+ XmlNode endComment = xmlDoc.CreateComment("stopimage");
+ XmlNode parent = node.ParentNode;
+ parent.InsertBefore(startComment, node);
+ parent.InsertAfter(endComment, node);
+ }
+ }
+
+ ///
+ /// Gets a list with the GUIDs assigned assigned to image across the html source.
+ ///
+ /// Teh
+ ///
+ private List GetMatchingImages(XmlNode node)
+ {
+ List imgIds = new List();
+ XmlAttribute srcAttr = node.Attributes["src"];
+ if (srcAttr == null)
+ {
+ return imgIds;
+ }
+ foreach (KeyValuePair pair in manager.States.Images)
+ {
+ String firstLocalPath = pair.Value.imgLocalSrc.Replace("\\", "/");
+ String currentLocalPath = srcAttr.Value.Replace("\\", "/");
+ if (firstLocalPath.Contains(currentLocalPath)) //
+ {
+ imgIds.Add(pair.Key);
+ }
+ }
+ return imgIds;
+ }
+
+ ///
+ /// Specifies if an image is new or has been modified.
+ ///
+ /// The XML node(element) that contains the image tag
+ /// Returns true is the image is new or has been modified. Otherwise returns false.
+ private bool IsImageDirty(XmlNode node)
+ {
+ //Tests if the image was added in Word
+ if (HasXWordId(node))
+ {
+ return true;
+ }
+ else
+ {
+ //Gets the unique identifier for the image
+ String imgId = node.Attributes[ImageInfo.XWORD_IMG_ATTRIBUTE].Value;
+ Guid imgGuid;
+ try
+ {
+ imgGuid = new Guid(imgId);
+ }
+ catch (Exception)
+ {
+ return true;
+ }
+ ImageInfo imageInfo = manager.States.Images[imgGuid];
+ //Verifies if the image was modified.
+ String src = node.Attributes["src"].Value;
+ if (!Path.IsPathRooted(src))
+ {
+ src = Path.Combine(manager.States.LocalFolder, src);
+ }
+ FileInfo fileInfo = new FileInfo(src);
+ if ((fileInfo.CreationTime == imageInfo.fileCreationDate) && (fileInfo.FullName == imageInfo.filePath))
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+ }
+
+ ///
+ /// Specifies if an image has an XWordId
+ ///
+ /// The currently processed
+ /// True if the note has a xword attribute, false if not.
+ private bool HasXWordId(XmlNode node)
+ {
+ XmlAttribute idAttr = node.Attributes[ImageInfo.XWORD_IMG_ATTRIBUTE];
+ if (idAttr == null)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/LocalListsAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/LocalListsAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/LocalListsAdaptorFilter.cs (revision 0)
@@ -0,0 +1,192 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class LocalListsAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public LocalListsAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+ ///
+ /// Adapts to the lists to a less styled format.
+ ///
+ /// A reference to the xml document instance.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
+ //Remove the extra paragraph from list items.
+ foreach (XmlNode node in listItems)
+ {
+ if (node.NodeType == XmlNodeType.Element && node.FirstChild.NodeType == XmlNodeType.Element)
+ {
+ if (node.FirstChild.Name == "p")
+ {
+ node.InnerXml = node.FirstChild.InnerXml;
+ }
+ }
+ }
+ bool foundExtraLists = false;
+ do
+ {
+ foundExtraLists = RemoveExtraLists(ref xmlDoc);
+ } while (foundExtraLists);
+ //Remove attributes from list declarations.
+ XmlNodeList lists = xmlDoc.GetElementsByTagName("ul");
+ foreach (XmlNode node in lists)
+ {
+ node.Attributes.RemoveAll();
+ }
+ lists = xmlDoc.GetElementsByTagName("ol");
+ foreach (XmlNode node in lists)
+ {
+ node.Attributes.RemoveAll();
+ }
+ RemoveDivFromLists(ref xmlDoc, "ul");
+ RemoveDivFromLists(ref xmlDoc, "ol");
+ MoveChildListToTheirParent(ref xmlDoc);
+ }
+
+ #endregion
+
+
+ ///
+ /// Removes the extra lists Word creates for sublists.
+ /// The child 'ul' is moved to the previous sibling.
+ ///
+ /// A reference to the xml document.
+ private bool RemoveExtraLists(ref XmlDocument xmlDoc)
+ {
+ bool foundExtraLists = false;
+ XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
+ foreach (XmlNode node in listItems)
+ {
+ //A 'li' with no innerText but with 'ul' or 'ol' children should be moved up
+ if (node.NodeType == XmlNodeType.Element && ("" + node.Value).Length < 1)
+ {
+ if (node.ChildNodes[0].Name == "ul" || node.ChildNodes[0].Name == "ol")
+ {
+ XmlNode prevListItem = node.PreviousSibling;
+ //XmlNode subList = node.RemoveChild(node.FirstChild);
+ XmlNodeList children = node.ChildNodes;
+ foreach (XmlNode child in children)
+ {
+ prevListItem.AppendChild(child);
+ }
+ //prevListItem.AppendChild(subList);
+ foundExtraLists = true;
+ }
+ }
+ }
+ return foundExtraLists;
+ }
+
+ ///
+ /// Remove 'div' elements used for alignment and move their children in the correct position.
+ ///
+ /// A reference to the xml document.
+ /// List type/name (like "ol", "ul").
+ private void RemoveDivFromLists(ref XmlDocument xmlDoc, string listName)
+ {
+ XmlNodeList lists = xmlDoc.GetElementsByTagName(listName);
+ Dictionary> childrenToMoveUp = new Dictionary>();
+ foreach (XmlNode node in lists)
+ {
+ if (node.Name.ToLower().Trim() == "div")
+ {
+ XmlNode prevListItem = node.PreviousSibling;
+ XmlNode parent = node.ParentNode;
+ List value = new List();
+ if (prevListItem != null)
+ {
+ if (childrenToMoveUp.ContainsKey(prevListItem))
+ {
+ value = childrenToMoveUp[prevListItem];
+ }
+ value.Add(node);
+ childrenToMoveUp.Add(prevListItem, value);
+ }
+ }
+ }
+
+ foreach (XmlNode key in childrenToMoveUp.Keys)
+ {
+ XmlNode prevListItem = (XmlNode)key;
+ XmlNode parent = prevListItem.ParentNode;
+ List nodes = childrenToMoveUp[key];
+ foreach (XmlNode node in nodes)
+ {
+ //take all the children from this div and put them in the right position
+ foreach (XmlNode child in node.ChildNodes)
+ {
+ parent.InsertAfter(child, prevListItem);
+ }
+ //remove the node
+ parent.RemoveChild(node);
+ }
+ }
+
+ }
+
+ ///
+ /// Move orphan 'ul' and 'ol' elements to their coresponding 'li' parent.
+ ///
+ /// A reference to the xml document.
+ private void MoveChildListToTheirParent(ref XmlDocument xmlDoc)
+ {
+ MoveElementsUp(ref xmlDoc, "ul");
+ MoveElementsUp(ref xmlDoc, "ol");
+ }
+
+ ///
+ /// Move an element from an inner list to it's correct position.
+ ///
+ /// A reference to the xml document.
+ /// Element type/name (like "ol", "ul").
+ private void MoveElementsUp(ref XmlDocument xmlDoc, string element)
+ {
+ Dictionary> childrenToMoveUp = new Dictionary>();
+
+ XmlNodeList items = xmlDoc.GetElementsByTagName(element.ToLower().Trim());
+ foreach (XmlNode node in items)
+ {
+ XmlNode prevListItem = node.PreviousSibling;
+ XmlNode parent = node.ParentNode;
+ List value = new List();
+ if (prevListItem != null)
+ {
+ if (prevListItem.Name.ToLower().Trim() == "li")
+ {
+ if (childrenToMoveUp.ContainsKey(prevListItem))
+ {
+ value = childrenToMoveUp[prevListItem];
+ }
+ value.Add(node);
+ childrenToMoveUp.Add(prevListItem, value);
+ }
+ }
+ }
+ foreach (XmlNode key in childrenToMoveUp.Keys)
+ {
+ XmlNode liParent = (XmlNode)key;
+ XmlNode parent = liParent.ParentNode;
+ List nodes = childrenToMoveUp[key];
+ foreach (XmlNode node in nodes)
+ {
+ parent.RemoveChild(node);
+ liParent.AppendChild(node);
+ }
+ }
+ }
+
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/LocalMacrosAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/LocalMacrosAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/LocalMacrosAdaptorFilter.cs (revision 0)
@@ -0,0 +1,62 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+using XWiki;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class LocalMacrosAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public LocalMacrosAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Replaces the read-only Word content controls with XWiki macro markup.
+ ///
+ /// A reference to the xml document instance.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNodeList macroNodes = xmlDoc.GetElementsByTagName("Sdt", "urn:schemas-microsoft-com:office:word");
+ XmlDocumentFragment docFrag = xmlDoc.CreateDocumentFragment();
+ Dictionary macros = this.manager.States.Macros;
+ //We use a new list because the XmlNodeList will break when operationg with its' elements.
+ List nodeList = new List();
+ foreach (XmlNode node in macroNodes)
+ {
+ nodeList.Add(node);
+ }
+ foreach (XmlNode node in nodeList)
+ {
+ try
+ {
+ String id = node.Attributes["ID"].Value;
+ if (macros.ContainsKey(id))
+ {
+ String content = macros[id];
+ docFrag.InnerXml = content;
+ node.ParentNode.ReplaceChild(docFrag, node);
+ }
+ }
+ catch (NullReferenceException nre)
+ {
+ Log.Exception(nre);
+ }
+ catch (Exception ex)
+ {
+ Log.Exception(ex);
+ }
+ }
+ }
+
+ #endregion
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/OfficeAttributesRemoverFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/OfficeAttributesRemoverFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/OfficeAttributesRemoverFilter.cs (revision 0)
@@ -0,0 +1,46 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using System.Xml.XPath;
+using XWiki.Office.Word;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class OfficeAttributesRemoverFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public OfficeAttributesRemoverFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Deletes all office specific attributes.
+ ///
+ /// A reference to the xml document.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XPathNavigator navigator = xmlDoc.CreateNavigator();
+ XmlNamespaceManager nsMr = new XmlNamespaceManager(xmlDoc.NameTable);
+ nsMr.AddNamespace(String.Empty, "http://www.w3.org/1999/xhtml");
+ nsMr.AddNamespace("v", "urn:schemas-microsoft-com:vml");
+ nsMr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
+ nsMr.AddNamespace("w", "urn:schemas-microsoft-com:office:word");
+ nsMr.AddNamespace("m", "http://schemas.microsoft.com/office/2004/12/omml");
+
+ XPathExpression expression = navigator.Compile("//@v:* | //@o:* | //@w:* | //@m:*");
+ XPathNodeIterator xIterator = navigator.Select(expression.Expression, nsMr);
+ foreach (XPathNavigator nav in xIterator)
+ {
+ nav.DeleteSelf();
+ }
+ }
+
+ #endregion
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/StyleRemoverFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/StyleRemoverFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/StyleRemoverFilter.cs (revision 0)
@@ -0,0 +1,70 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using System.Xml.XPath;
+using XWiki.Office.Word;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class StyleRemoverFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public StyleRemoverFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+ ///
+ /// Deletes the style attributes from the Word generated content.
+ ///
+ /// A refrence to the xml document.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XPathNavigator navigator = xmlDoc.CreateNavigator();
+ XPathExpression expression = navigator.Compile("//@style");
+ XPathNodeIterator xIterator = navigator.Select(expression);
+ foreach (XPathNavigator nav in xIterator)
+ {
+ nav.DeleteSelf();
+ }
+ expression = navigator.Compile("//@class");
+ xIterator = navigator.Select(expression);
+ foreach (XPathNavigator nav in xIterator)
+ {
+ if (nav.Value == "MsoNormal" || nav.Value == "MsoNormalTable" || nav.Value == "MsoTableGrid")
+ {
+ nav.DeleteSelf();
+ }
+ }
+ expression = navigator.Compile("//td[@valign]");
+ XmlNodeList nodes = xmlDoc.GetElementsByTagName("td");
+ XmlAttribute colspanAttribute, rowspanAttribute;
+ foreach (XmlNode node in nodes)
+ {
+ //XmlAttribute valign = node.Attributes["valign"];
+ //node.Attributes.Remove(valign);
+
+ //get colspan and rowspan values
+ colspanAttribute = node.Attributes["colspan"];
+ rowspanAttribute = node.Attributes["rowspan"];
+ //remove all valid and invalid attributes
+ node.Attributes.RemoveAll();
+ //put back the colspan and rowspan attributes
+ if (colspanAttribute != null)
+ {
+ node.Attributes.Append(colspanAttribute);
+ }
+ if (rowspanAttribute != null)
+ {
+ node.Attributes.Append(rowspanAttribute);
+ }
+ }
+ }
+
+ #endregion
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/WebImageAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/WebImageAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/WebImageAdaptorFilter.cs (revision 0)
@@ -0,0 +1,113 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using XWiki.Office.Word;
+using System.Xml;
+using System.Threading;
+using System.IO;
+using System.Net;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class WebImageAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+ private string serverURL;
+ private string localFolder;
+ private string localFilename;
+
+ public WebImageAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ serverURL = manager.States.ServerURL;
+ localFolder = manager.States.LocalFolder;
+ localFilename = manager.States.LocalFileName;
+ }
+
+ #region IDOMFilter Members
+ ///
+ /// Adapts the html source returned by the XWiki server and makes it usable by Word using a local html file.
+ ///
+ /// A reference to the xml dom.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNodeList images = xmlDoc.GetElementsByTagName("img");
+ foreach (XmlNode node in images)
+ {
+ if (node.NodeType == XmlNodeType.Element)
+ {
+ XmlAttribute vshapesAttr = node.Attributes["v:shapes"];
+ if (vshapesAttr != null)
+ {
+ node.Attributes.Remove(vshapesAttr);
+ }
+ //Creating an additional attribute to help identifing the image in the html.
+ String src = node.Attributes["src"].Value;
+ XmlAttribute attr = xmlDoc.CreateAttribute(ImageInfo.XWORD_IMG_ATTRIBUTE);
+ //Adding the attribute to the xhtml code.
+ Guid imgId = Guid.NewGuid();
+ attr.Value = imgId.ToString();
+ node.Attributes.Append(attr);
+ //Adding the image to the current image list.
+ ImageInfo imgInfo = new ImageInfo();
+ imgInfo.imgWebSrc = src;
+ if (node.Attributes["alt"] != null)
+ {
+ imgInfo.altText = node.Attributes["alt"].Value;
+ }
+ manager.States.Images.Add(imgId, imgInfo);
+ //Downloading image
+ String imgURL = node.Attributes["src"].Value;
+ if (imgURL == "") continue;
+ if (imgURL[0] == '/')
+ {
+ imgURL = serverURL + imgURL;
+ }
+ ParameterizedThreadStart pts = new ParameterizedThreadStart(DownloadImage);
+ String folder = localFolder + "\\" + localFilename + "_Files";
+ Object param = new ImageDownloadInfo(imgURL, folder, imgInfo);
+ pts.Invoke(param);
+ imgURL = folder + "\\" + Path.GetFileName(imgURL);
+ imgURL = "file:///" + imgURL.Replace("\\", "/");
+ node.Attributes["src"].Value = imgURL;
+ }
+ }
+ }
+
+ #endregion
+
+ ///
+ /// Downloads the image from the server and saves it to a local file.
+ ///
+ /// The image data. Instance of ImageDownloadInfo used in cross thread data sharing.
+ private void DownloadImage(Object obj)
+ {
+ try
+ {
+ ImageDownloadInfo idi = (ImageDownloadInfo)obj;
+ String targetFolder = idi.DownloadFolder;
+ String URI = idi.URI;
+ WebClient webClient = new WebClient();
+ if (!Directory.Exists(targetFolder))
+ {
+ Directory.CreateDirectory(targetFolder);
+ }
+ String path = targetFolder + "\\" + Path.GetFileName(URI);
+ FileInfo fileInfo = new FileInfo(path);
+ byte[] binaryContent = webClient.DownloadData(URI);
+ FileStream fileStream = fileInfo.Create();
+ fileStream.Write(binaryContent, 0, binaryContent.Length);
+ fileStream.Close();
+ //Set the image element properties in the converters imageList.
+ idi.ImageInfo.filePath = fileInfo.FullName;
+ idi.ImageInfo.imgLocalSrc = "file:///" + fileInfo.FullName.Replace("\\", "/");
+ idi.ImageInfo.fileURI = URI;
+ idi.ImageInfo.fileSize = fileInfo.Length;
+ idi.ImageInfo.fileCreationDate = fileInfo.CreationTime;
+ }
+ catch (InvalidCastException) { }
+ catch (WebException) { };
+ }
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/WebListsAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/WebListsAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/WebListsAdaptorFilter.cs (revision 0)
@@ -0,0 +1,70 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using XWiki.Office.Word;
+using System.Xml;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class WebListsAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+
+ public WebListsAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Adapts the HTML lists to lists known by MS Word, because Word doesn't like
+ /// 'ul' inside 'li' elements with innerText.
+ ///
+ /// A reference to an xml document.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ Dictionary> itemsToMoveUp = new Dictionary>();
+ XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
+
+ //itentify elements with children
+ foreach (XmlNode node in listItems)
+ {
+ XmlNodeList children = node.ChildNodes;
+ //only nodes with both text and other xml elements
+ if (("" + node.Value).Length < 1)
+ {
+ continue;
+ }
+
+ foreach (XmlNode child in children)
+ {
+ if (child.Name.ToLower().Trim() == "ul" || child.Name.ToLower().Trim() == "ol")
+ {
+ List value = new List();
+
+ if (itemsToMoveUp.ContainsKey(node))
+ {
+ value = itemsToMoveUp[node];
+ }
+ value.Add(child);
+ itemsToMoveUp.Add(node, value);
+ }
+ }
+ }
+
+ //move elements one level up if they are inside - elements with no innerText
+ foreach (XmlNode node in itemsToMoveUp.Keys)
+ {
+ foreach (XmlNode child in itemsToMoveUp[node])
+ {
+ XmlNode n = node.RemoveChild(child);
+ node.ParentNode.InsertAfter(n, node);
+ }
+ }
+ }
+
+ #endregion
+ }
+}
Index: xword/ContentFiltering/Office/Word/Filters/WebMacrosAdaptorFilter.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Filters/WebMacrosAdaptorFilter.cs (revision 0)
+++ xword/ContentFiltering/Office/Word/Filters/WebMacrosAdaptorFilter.cs (revision 0)
@@ -0,0 +1,143 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Xml;
+using XWiki.Office.Word;
+using XWiki;
+
+namespace ContentFiltering.Office.Word.Filters
+{
+ public class WebMacrosAdaptorFilter:IDOMFilter
+ {
+ private ConversionManager manager;
+ private Random random = new Random();
+
+ public WebMacrosAdaptorFilter(ConversionManager manager)
+ {
+ this.manager = manager;
+ }
+
+ #region IDOMFilter Members
+
+ ///
+ /// Adapts the html source to convert XWiki macros to Word Content Controls.
+ ///
+ /// A reference to the xml dom containing the source.
+ public void Filter(ref XmlDocument xmlDoc)
+ {
+ XmlNode node = xmlDoc;
+ ReplaceMacros(ref node, ref xmlDoc);
+ }
+
+ #endregion
+
+
+ ///
+ /// Replaces the macros in a xml node with a Word content control tag.
+ ///
+ /// The xml node to be adapted.
+ /// A refrence to the xml document.
+ private void ReplaceMacros(ref XmlNode node, ref XmlDocument xmlDoc)
+ {
+ int context = 0; //0 - outside macros, 1- inside macro.
+ List
> macroNodes = new List>();
+ List currentMacroNodes = new List();
+ List regularNodes = new List();
+ foreach (XmlNode childNode in node.ChildNodes)
+ {
+ if (childNode.NodeType == XmlNodeType.Comment)
+ {
+ if (childNode.InnerText.StartsWith("startmacro"))
+ {
+ context = 1;
+ currentMacroNodes = new List();
+ currentMacroNodes.Add(childNode);
+ macroNodes.Add(currentMacroNodes);
+ }
+ else if (childNode.InnerText.StartsWith("stopmacro"))
+ {
+ context = 0;
+ currentMacroNodes.Add(childNode);
+ }
+ }
+ else if (childNode.NodeType != XmlNodeType.Document && childNode.NodeType != XmlNodeType.DocumentType)
+ {
+ if (context == 0)
+ {
+ regularNodes.Add(childNode);
+ }
+ else
+ {
+ currentMacroNodes.Add(childNode);
+ }
+ }
+ }
+ foreach (List macroElements in macroNodes)
+ {
+ if (macroElements.Count > 0)
+ {
+ try
+ {
+ String macroContent = "";
+ XmlNode element = GenerateContentControlNode(ref xmlDoc);
+ String id = element.Attributes["ID"].Value;
+ XmlNode parent = macroElements[0].ParentNode;
+ parent.InsertBefore(element, macroElements[0]);
+ foreach (XmlNode n in macroElements)
+ {
+ String s = n.OuterXml;
+ if (n.NamespaceURI.Length > 0)
+ {
+ //Removing inline namespace declaration
+ String ns = " xmlns=\"" + n.NamespaceURI + "\"";
+ s = s.Replace(ns, "");
+ }
+ macroContent += s;
+ parent.RemoveChild(n);
+ element.AppendChild(n);
+ }
+ this.manager.States.Macros.Add(id, macroContent);
+ }
+ catch (XmlException ex)
+ {
+ Log.Exception(ex);
+ }
+ }
+ }
+ foreach (XmlNode n in regularNodes)
+ {
+
+ XmlNode clone = n.Clone();
+ n.ParentNode.ReplaceChild(clone, n);
+ ReplaceMacros(ref clone, ref xmlDoc);
+ }
+ }
+
+
+
+ ///
+ /// Generates a new node instance for the Word Content Control.
+ ///
+ /// A refence to the xml document.
+ /// The instance of the new node.
+ private XmlNode GenerateContentControlNode(ref XmlDocument xmlDoc)
+ {
+ //Initialize the node of the content control.
+ XmlElement element = xmlDoc.CreateElement("w:Sdt", "urn:schemas-microsoft-com:office:word");
+ XmlAttribute sdtLocked = xmlDoc.CreateAttribute("SdtLocked");
+ sdtLocked.Value = "t";
+ XmlAttribute contentLocked = xmlDoc.CreateAttribute("ContentLocked");
+ contentLocked.Value = "t";
+ XmlAttribute docPart = xmlDoc.CreateAttribute("DocPart");
+ docPart.Value = "DefaultPlaceholder_" + random.Next(9000000, 9999999).ToString();
+ XmlAttribute id = xmlDoc.CreateAttribute("ID");
+ id.Value = random.Next(9000000, 9999999).ToString();
+ element.Attributes.Append(sdtLocked);
+ element.Attributes.Append(contentLocked);
+ element.Attributes.Append(docPart);
+ element.Attributes.Append(id);
+ return element;
+ }
+ }
+}
Index: xword/ContentFiltering/Office/Word/LocalToWebHTML.cs
===================================================================
--- xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (revision 21243)
+++ xword/ContentFiltering/Office/Word/LocalToWebHTML.cs (working copy)
@@ -7,6 +7,8 @@
using System.Xml.XPath;
using XWiki.Xml;
using System.Collections;
+using ContentFiltering.Office.Word;
+using ContentFiltering.Office.Word.Filters;
namespace XWiki.Office.Word
{
@@ -48,488 +50,28 @@
content = content.Replace("", "");
content = content.Replace(" ", " ");
xmlDoc.LoadXml(content);
- ClearStyles(ref xmlDoc);
- FilterGrammarAndSpellingErrors(ref xmlDoc);
- AdaptImages(ref xmlDoc);
- AdaptLists(ref xmlDoc);
- AdaptMacros(ref xmlDoc);
- ClearOfficeAttributes(ref xmlDoc);
- StringBuilder sb = new StringBuilder(xmlDoc.GetIndentedXml());
- sb.Replace(" xmlns=\"\"","");
- return sb.ToString();
- }
- ///
- /// Removes 'class' attribute from text marked as containing grammar or spelling errors.
- /// (when values are 'gramE' or 'spellE'). Removes 'lang' attribute. Adds a space character
- /// (' ') to the affected text, to make sure words marked as errors are separated.
- ///
- /// A refrence to the xml document.
- private void FilterGrammarAndSpellingErrors(ref XmlDocument xmlDoc)
- {
- XmlNodeList nodes = xmlDoc.GetElementsByTagName("span");
- bool insertASpace = false;
- XmlNode tempNode = null;
- foreach (XmlNode node in nodes)
+ List contentFilters = new List()
{
- insertASpace = false;
- XmlAttribute classAttribute = node.Attributes["class"];
- if (classAttribute != null)
- {
- if (classAttribute.Value.ToLower().Trim().IndexOf("grame") >= 0
- ||
- classAttribute.Value.ToLower().Trim().IndexOf("spelle") >= 0)
- {
- node.Attributes.Remove(classAttribute);
- insertASpace = true;
- }
- }
- XmlAttribute langAttribute = node.Attributes["lang"];
- if (langAttribute != null)
- {
- node.Attributes.Remove(langAttribute);
- insertASpace = true;
- }
-
- if (insertASpace)
- {
- if (node.NodeType == XmlNodeType.Element)
- {
- tempNode = node.ChildNodes[0];
- }
- else
- {
- tempNode = node;
- }
- if (tempNode.Value != null)
- {
- tempNode.Value += " ";
- }
- }
- }
- }
-
- ///
- /// Deletes the style attributes from the Word generated content
- ///
- /// A refrence to the xml document.
- private void ClearStyles(ref XmlDocument xmlDoc)
- {
- XPathNavigator navigator = xmlDoc.CreateNavigator();
- XPathExpression expression = navigator.Compile("//@style");
- XPathNodeIterator xIterator = navigator.Select(expression);
- foreach (XPathNavigator nav in xIterator)
+ new StyleRemoverFilter(manager),
+ new GrammarAndSpellingErrorsFilter(manager),
+ new LocalImageAdaptorFilter(manager),
+ new LocalListsAdaptorFilter(manager),
+ new LocalMacrosAdaptorFilter(manager),
+ new OfficeAttributesRemoverFilter(manager)
+ };
+
+
+ foreach(IDOMFilter contentFilter in contentFilters)
{
- nav.DeleteSelf();
+ contentFilter.Filter(ref xmlDoc);
}
- expression = navigator.Compile("//@class");
- xIterator = navigator.Select(expression);
- foreach (XPathNavigator nav in xIterator)
- {
- if (nav.Value == "MsoNormal" || nav.Value == "MsoNormalTable" || nav.Value == "MsoTableGrid")
- {
- nav.DeleteSelf();
- }
- }
- expression = navigator.Compile("//td[@valign]");
- XmlNodeList nodes = xmlDoc.GetElementsByTagName("td");
- XmlAttribute colspanAttribute, rowspanAttribute;
- foreach (XmlNode node in nodes)
- {
- //XmlAttribute valign = node.Attributes["valign"];
- //node.Attributes.Remove(valign);
-
- //get colspan and rowspan values
- colspanAttribute = node.Attributes["colspan"];
- rowspanAttribute = node.Attributes["rowspan"];
- //remove all valid and invalid attributes
- node.Attributes.RemoveAll();
- //put back the colspan and rowspan attributes
- if (colspanAttribute != null)
- {
- node.Attributes.Append(colspanAttribute);
- }
- if (rowspanAttribute != null)
- {
- node.Attributes.Append(rowspanAttribute);
- }
- }
+
+
+ StringBuilder sb = new StringBuilder(xmlDoc.GetIndentedXml());
+ sb.Replace(" xmlns=\"\"","");
+ return sb.ToString();
}
-
- ///
- /// Deletes all office specific attributes
- ///
- /// A reference to the xml document.
- private void ClearOfficeAttributes(ref XmlDocument xmlDoc)
- {
- XPathNavigator navigator = xmlDoc.CreateNavigator();
- XmlNamespaceManager nsMr = new XmlNamespaceManager(xmlDoc.NameTable);
- nsMr.AddNamespace(String.Empty, "http://www.w3.org/1999/xhtml");
- nsMr.AddNamespace("v", "urn:schemas-microsoft-com:vml");
- nsMr.AddNamespace("o", "urn:schemas-microsoft-com:office:office");
- nsMr.AddNamespace("w", "urn:schemas-microsoft-com:office:word");
- nsMr.AddNamespace("m", "http://schemas.microsoft.com/office/2004/12/omml");
-
- XPathExpression expression = navigator.Compile("//@v:* | //@o:* | //@w:* | //@m:*");
- XPathNodeIterator xIterator = navigator.Select(expression.Expression, nsMr);
- foreach (XPathNavigator nav in xIterator)
- {
- nav.DeleteSelf();
- }
- }
-
- ///
- /// Adapts the images from the local(file:///) to the xwiki format.
- ///
- /// A reference to the xml document.
- private void AdaptImages(ref XmlDocument xmlDoc)
- {
- XmlNodeList images = xmlDoc.GetElementsByTagName("img");
- List adaptedSrcs = new List();
- foreach (XmlNode node in images)
- {
- if (node.NodeType == XmlNodeType.Element)
- {
- String imagePath = node.Attributes["src"].Value;
- if (!adaptedSrcs.Contains(imagePath))
- {
- String newPath = "";
- List imgIds = GetMatchingImages(node);
- if (imgIds.Count != 0)
- {
- ImageInfo imageInfo = manager.States.Images[imgIds[0]];
- newPath = imageInfo.imgWebSrc;
- }
- else
- {
- //set src and upload
- String attachmentName = Path.GetFileName(imagePath);
- manager.States.LocalFolder = manager.States.LocalFolder.Replace("\\\\", "\\");
- if (!Path.IsPathRooted(imagePath))
- {
- imagePath = Path.Combine(manager.States.LocalFolder, imagePath);
- }
- bool sucess = manager.XWikiClient.AddAttachment(manager.States.PageFullName, imagePath);
- //TODO report if the attachment cannot be loaded.
- newPath = manager.XWikiClient.GetAttachmentURL(manager.States.PageFullName, attachmentName);
- }
- node.Attributes["src"].Value = newPath;
- adaptedSrcs.Add(newPath);
- }
- }
- }
- BorderImages(ref xmlDoc);
- }
-
- ///
- /// Replaces the read-only Word content controls with XWiki macro markup.
- ///
- /// A reference to the xml document instance.
- private void AdaptMacros(ref XmlDocument xmlDoc)
- {
- XmlNodeList macroNodes = xmlDoc.GetElementsByTagName("Sdt", "urn:schemas-microsoft-com:office:word");
- XmlDocumentFragment docFrag = xmlDoc.CreateDocumentFragment();
- Dictionary macros = this.manager.States.Macros;
- //We use a new list because the XmlNodeList will break when operationg with its' elements.
- List nodeList = new List();
- foreach (XmlNode node in macroNodes)
- {
- nodeList.Add(node);
- }
- foreach (XmlNode node in nodeList)
- {
- try
- {
- String id = node.Attributes["ID"].Value;
- if (macros.ContainsKey(id))
- {
- String content = macros[id];
- docFrag.InnerXml = content;
- node.ParentNode.ReplaceChild(docFrag, node);
- }
- }
- catch (NullReferenceException nre)
- {
- Log.Exception(nre);
- }
- catch (Exception ex)
- {
- Log.Exception(ex);
- }
- }
- }
-
- ///
- /// Adapts to the lists to a less styled format.
- ///
- /// A reference to the xml document instance.
- private void AdaptLists(ref XmlDocument xmlDoc)
- {
- XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
- //Remove the extra paragraph from list items.
- foreach (XmlNode node in listItems)
- {
- if (node.NodeType == XmlNodeType.Element && node.FirstChild.NodeType == XmlNodeType.Element)
- {
- if (node.FirstChild.Name == "p")
- {
- node.InnerXml = node.FirstChild.InnerXml;
- }
- }
- }
- bool foundExtraLists = false;
- do
- {
- foundExtraLists = RemoveExtraLists(ref xmlDoc);
- } while (foundExtraLists);
- //Remove attributes from list declarations.
- XmlNodeList lists = xmlDoc.GetElementsByTagName("ul");
- foreach (XmlNode node in lists)
- {
- node.Attributes.RemoveAll();
- }
- lists = xmlDoc.GetElementsByTagName("ol");
- foreach (XmlNode node in lists)
- {
- node.Attributes.RemoveAll();
- }
- RemoveDivFromLists(ref xmlDoc, "ul");
- RemoveDivFromLists(ref xmlDoc, "ol");
- MoveChildListToTheirParent(ref xmlDoc);
- }
-
- ///
- /// Removes the extra lists Word creates for sublists.
- /// The child 'ul' is moved to the previous sibling.
- ///
- /// A reference to the xml document.
- private bool RemoveExtraLists(ref XmlDocument xmlDoc)
- {
- bool foundExtraLists = false;
- XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
- foreach (XmlNode node in listItems)
- {
- //A 'li' with no innerText but with 'ul' or 'ol' children should be moved up
- if (node.NodeType == XmlNodeType.Element && (""+node.Value).Length<1)
- {
- if (node.ChildNodes[0].Name == "ul" || node.ChildNodes[0].Name == "ol")
- {
- XmlNode prevListItem = node.PreviousSibling;
- //XmlNode subList = node.RemoveChild(node.FirstChild);
- XmlNodeList children = node.ChildNodes;
- foreach (XmlNode child in children)
- {
- prevListItem.AppendChild(child);
- }
- //prevListItem.AppendChild(subList);
- foundExtraLists = true;
- }
- }
- }
- return foundExtraLists;
- }
-
- ///
- /// Remove 'div' elements used for alignment and move their children in the correct position.
- ///
- /// A reference to the xml document.
- /// List type/name (like "ol", "ul").
- private void RemoveDivFromLists(ref XmlDocument xmlDoc,string listName)
- {
- XmlNodeList lists = xmlDoc.GetElementsByTagName(listName);
- Dictionary> childrenToMoveUp = new Dictionary>();
- foreach (XmlNode node in lists)
- {
- if (node.Name.ToLower().Trim() == "div")
- {
- XmlNode prevListItem = node.PreviousSibling;
- XmlNode parent = node.ParentNode;
- List value = new List();
- if (prevListItem != null)
- {
- if (childrenToMoveUp.ContainsKey(prevListItem))
- {
- value = childrenToMoveUp[prevListItem];
- }
- value.Add(node);
- childrenToMoveUp.Add(prevListItem, value);
- }
- }
- }
-
- foreach (XmlNode key in childrenToMoveUp.Keys)
- {
- XmlNode prevListItem = (XmlNode)key;
- XmlNode parent = prevListItem.ParentNode;
- List nodes = childrenToMoveUp[key];
- foreach (XmlNode node in nodes)
- {
- //take all the children from this div and put them in the right position
- foreach (XmlNode child in node.ChildNodes)
- {
- parent.InsertAfter(child, prevListItem);
- }
- //remove the node
- parent.RemoveChild(node);
- }
- }
-
- }
-
- ///
- /// Move orphan 'ul' and 'ol' elements to their coresponding 'li' parent.
- ///
- /// A reference to the xml document.
- private void MoveChildListToTheirParent(ref XmlDocument xmlDoc)
- {
- MoveElementsUp(ref xmlDoc, "ul");
- MoveElementsUp(ref xmlDoc, "ol");
- }
-
- ///
- /// Move an element from an inner list to it's correct position.
- ///
- /// A reference to the xml document.
- /// Element type/name (like "ol", "ul").
- private void MoveElementsUp(ref XmlDocument xmlDoc, string element)
- {
- Dictionary> childrenToMoveUp = new Dictionary>();
-
- XmlNodeList items = xmlDoc.GetElementsByTagName(element.ToLower().Trim());
- foreach (XmlNode node in items)
- {
- XmlNode prevListItem = node.PreviousSibling;
- XmlNode parent = node.ParentNode;
- List value = new List();
- if (prevListItem != null)
- {
- if (prevListItem.Name.ToLower().Trim() == "li")
- {
- if (childrenToMoveUp.ContainsKey(prevListItem))
- {
- value = childrenToMoveUp[prevListItem];
- }
- value.Add(node);
- childrenToMoveUp.Add(prevListItem, value);
- }
- }
- }
- foreach (XmlNode key in childrenToMoveUp.Keys)
- {
- XmlNode liParent = (XmlNode)key;
- XmlNode parent = liParent.ParentNode;
- List nodes = childrenToMoveUp[key];
- foreach (XmlNode node in nodes)
- {
- parent.RemoveChild(node);
- liParent.AppendChild(node);
- }
- }
- }
-
- ///
- /// Specifies if an image is new or has been modified.
- ///
- /// The XML node(element) that contains the image tag
- /// Returns true is the image is new or has been modified. Otherwise returns false.
- private bool IsImageDirty(XmlNode node)
- {
- //Tests if the image was added in Word
- if (HasXWordId(node))
- {
- return true;
- }
- else
- {
- //Gets the unique identifier for the image
- String imgId = node.Attributes[ImageInfo.XWORD_IMG_ATTRIBUTE].Value;
- Guid imgGuid;
- try
- {
- imgGuid = new Guid(imgId);
- }
- catch (Exception)
- {
- return true;
- }
- ImageInfo imageInfo = manager.States.Images[imgGuid];
- //Verifies if the image was modified.
- String src = node.Attributes["src"].Value;
- if (!Path.IsPathRooted(src))
- {
- src = Path.Combine(manager.States.LocalFolder, src);
- }
- FileInfo fileInfo = new FileInfo(src);
- if ((fileInfo.CreationTime == imageInfo.fileCreationDate) && (fileInfo.FullName == imageInfo.filePath))
- {
- return false;
- }
- else
- {
- return true;
- }
- }
- }
-
- ///
- /// Specifies if an image has an XWordId
- ///
- /// The currently processed
- /// True if the note has a xword attribute, false if not.
- private bool HasXWordId(XmlNode node)
- {
- XmlAttribute idAttr = node.Attributes[ImageInfo.XWORD_IMG_ATTRIBUTE];
- if (idAttr == null)
- {
- return true;
- }
- else
- {
- return false;
- }
- }
-
- ///
- /// Gets a list with the GUIDs assigned assigned to image across the html source.
- ///
- /// Teh
- ///
- private List GetMatchingImages(XmlNode node)
- {
- List imgIds = new List();
- XmlAttribute srcAttr = node.Attributes["src"];
- if (srcAttr == null)
- {
- return imgIds;
- }
- foreach (KeyValuePair pair in manager.States.Images)
- {
- String firstLocalPath = pair.Value.imgLocalSrc.Replace("\\", "/");
- String currentLocalPath = srcAttr.Value.Replace("\\", "/");
- if (firstLocalPath.Contains(currentLocalPath)) //
- {
- imgIds.Add(pair.Key);
- }
- }
- return imgIds;
- }
-
- ///
- /// Adds comments before and after image tags.
- ///
- /// A reference to the filtered XmlDocument instance.
- private void BorderImages(ref XmlDocument xmlDoc)
- {
- foreach(XmlNode node in xmlDoc.GetElementsByTagName("img"))
- {
- String imageName = node.Attributes["src"].Value;
- imageName = Path.GetFileName(imageName);
- XmlNode startComment = xmlDoc.CreateComment("startimage:" + imageName);
- XmlNode endComment = xmlDoc.CreateComment("stopimage");
- XmlNode parent = node.ParentNode;
- parent.InsertBefore(startComment, node);
- parent.InsertAfter(endComment, node);
- }
- }
+
}
}
Index: xword/ContentFiltering/Office/Word/WebToLocalHTML.cs
===================================================================
--- xword/ContentFiltering/Office/Word/WebToLocalHTML.cs (revision 21243)
+++ xword/ContentFiltering/Office/Word/WebToLocalHTML.cs (working copy)
@@ -11,6 +11,7 @@
using XWiki.Html;
using XWiki.Xml;
using System.Collections;
+using ContentFiltering.Office.Word.Filters;
namespace XWiki.Office.Word
{
@@ -109,257 +110,21 @@
Log.Exception(ex);
return "Sorry, a problem appeared when loading the page";
}
- AdaptMacros(ref xmlDoc);
- AdaptImages(ref xmlDoc);
- AdaptLists(ref xmlDoc);
- return xmlDoc.GetIndentedXml();
- }
-
- ///
- /// Adapts the HTML lists to lists known by MS Word, because Word doesn't like
- /// 'ul' inside 'li' elements with innerText.
- ///
- ///
- private void AdaptLists(ref XmlDocument xmlDoc)
- {
- Dictionary> itemsToMoveUp = new Dictionary>();
- XmlNodeList listItems = xmlDoc.GetElementsByTagName("li");
-
- //itentify - elements with
children
- foreach (XmlNode node in listItems)
+
+ List webToLocalFilters = new List()
{
- XmlNodeList children=node.ChildNodes;
- //only nodes with both text and other xml elements
- if ((""+node.Value).Length < 1)
- {
- continue;
- }
+ new WebMacrosAdaptorFilter(manager),
+ new WebImageAdaptorFilter(manager),
+ new WebListsAdaptorFilter(manager)
+ };
- foreach (XmlNode child in children)
- {
- if (child.Name.ToLower().Trim() == "ul" || child.Name.ToLower().Trim() == "ol")
- {
- List value=new List();
-
- if (itemsToMoveUp.ContainsKey(node))
- {
- value = itemsToMoveUp[node];
- }
- value.Add(child);
- itemsToMoveUp.Add(node,value);
- }
- }
- }
-
- //move elements one level up if they are inside - elements with no innerText
- foreach (XmlNode node in itemsToMoveUp.Keys)
+ foreach (IDOMFilter webToLocalFilter in webToLocalFilters)
{
- foreach(XmlNode child in itemsToMoveUp[node])
- {
- XmlNode n=node.RemoveChild(child);
- node.ParentNode.InsertAfter(n,node);
- }
+ webToLocalFilter.Filter(ref xmlDoc);
}
- }
- ///
- /// Adapts the html source to convert XWiki macros to Word Content Controls.
- ///
- /// A reference to the xml dom containing the source.
- private void AdaptMacros(ref XmlDocument xmlDoc)
- {
- XmlNode node = xmlDoc;
- ReplaceMacros(ref node, ref xmlDoc);
+ return xmlDoc.GetIndentedXml();
}
- ///
- /// Replaces the macros in a xml node with a Word content control tag.
- ///
- /// The xml node to be adapted.
- /// A refrence to the xml document.
- private void ReplaceMacros(ref XmlNode node, ref XmlDocument xmlDoc)
- {
- int context = 0; //0 - outside macros, 1- inside macro.
- List
> macroNodes = new List>();
- List currentMacroNodes = new List();
- List regularNodes = new List();
- foreach (XmlNode childNode in node.ChildNodes)
- {
- if (childNode.NodeType == XmlNodeType.Comment)
- {
- if (childNode.InnerText.StartsWith("startmacro"))
- {
- context = 1;
- currentMacroNodes = new List();
- currentMacroNodes.Add(childNode);
- macroNodes.Add(currentMacroNodes);
- }
- else if (childNode.InnerText.StartsWith("stopmacro"))
- {
- context = 0;
- currentMacroNodes.Add(childNode);
- }
- }
- else if(childNode.NodeType != XmlNodeType.Document && childNode.NodeType != XmlNodeType.DocumentType )
- {
- if (context == 0)
- {
- regularNodes.Add(childNode);
- }
- else
- {
- currentMacroNodes.Add(childNode);
- }
- }
- }
- foreach (List macroElements in macroNodes)
- {
- if (macroElements.Count > 0)
- {
- try
- {
- String macroContent = "";
- XmlNode element = GenerateContentControlNode(ref xmlDoc);
- String id = element.Attributes["ID"].Value;
- XmlNode parent = macroElements[0].ParentNode;
- parent.InsertBefore(element, macroElements[0]);
- foreach (XmlNode n in macroElements)
- {
- String s = n.OuterXml;
- if (n.NamespaceURI.Length > 0)
- {
- //Removing inline namespace declaration
- String ns = " xmlns=\"" + n.NamespaceURI + "\"";
- s = s.Replace(ns, "");
- }
- macroContent += s;
- parent.RemoveChild(n);
- element.AppendChild(n);
- }
- this.manager.States.Macros.Add(id, macroContent);
- }
- catch (XmlException ex)
- {
- Log.Exception(ex);
- }
- }
- }
- foreach (XmlNode n in regularNodes)
- {
-
- XmlNode clone = n.Clone();
- n.ParentNode.ReplaceChild(clone, n);
- ReplaceMacros(ref clone, ref xmlDoc);
- }
- }
-
- private Random random = new Random();
-
- ///
- /// Generates a new node instance for the Word Content Control.
- ///
- /// A refence to the xml document.
- /// The instance of the new node.
- private XmlNode GenerateContentControlNode(ref XmlDocument xmlDoc)
- {
- //Initialize the node of the content control.
- XmlElement element = xmlDoc.CreateElement("w:Sdt", "urn:schemas-microsoft-com:office:word");
- XmlAttribute sdtLocked = xmlDoc.CreateAttribute("SdtLocked");
- sdtLocked.Value = "t";
- XmlAttribute contentLocked = xmlDoc.CreateAttribute("ContentLocked");
- contentLocked.Value = "t";
- XmlAttribute docPart = xmlDoc.CreateAttribute("DocPart");
- docPart.Value = "DefaultPlaceholder_" + random.Next(9000000, 9999999).ToString();
- XmlAttribute id = xmlDoc.CreateAttribute("ID");
- id.Value = random.Next(9000000, 9999999).ToString();
- element.Attributes.Append(sdtLocked);
- element.Attributes.Append(contentLocked);
- element.Attributes.Append(docPart);
- element.Attributes.Append(id);
- return element;
- }
- ///
- /// Adapts the html source returned by the XWiki server and makes it usable by Word using a local html file.
- ///
- /// A reference to the xml dom.
- private void AdaptImages(ref XmlDocument xmlDoc)
- {
- XmlNodeList images = xmlDoc.GetElementsByTagName("img");
- foreach (XmlNode node in images)
- {
- if (node.NodeType == XmlNodeType.Element)
- {
- XmlAttribute vshapesAttr = node.Attributes["v:shapes"];
- if (vshapesAttr != null)
- {
- node.Attributes.Remove(vshapesAttr);
- }
- //Creating an additional attribute to help identifing the image in the html.
- String src = node.Attributes["src"].Value;
- XmlAttribute attr = xmlDoc.CreateAttribute(ImageInfo.XWORD_IMG_ATTRIBUTE);
- //Adding the attribute to the xhtml code.
- Guid imgId = Guid.NewGuid();
- attr.Value = imgId.ToString();
- node.Attributes.Append(attr);
- //Adding the image to the current image list.
- ImageInfo imgInfo = new ImageInfo();
- imgInfo.imgWebSrc = src;
- if (node.Attributes["alt"] != null)
- {
- imgInfo.altText = node.Attributes["alt"].Value;
- }
- manager.States.Images.Add(imgId, imgInfo);
- //Downloading image
- String imgURL = node.Attributes["src"].Value;
- if (imgURL == "") continue;
- if (imgURL[0] == '/')
- {
- imgURL = ServerURL + imgURL;
- }
- ParameterizedThreadStart pts = new ParameterizedThreadStart(DownloadImage);
- String folder = LocalFolder + "\\" + LocalFilename + "_Files";
- Object param = new ImageDownloadInfo(imgURL, folder, imgInfo);
- pts.Invoke(param);
- imgURL = folder + "\\" + Path.GetFileName(imgURL);
- imgURL = "file:///" + imgURL.Replace("\\", "/");
- node.Attributes["src"].Value = imgURL;
- }
- }
- }
-
-
-
- ///
- /// Downloads the image from the server and saves it to a local file.
- ///
- /// The image data. Instance of ImageDownloadInfo used in cross thread data sharing.
- private void DownloadImage(Object obj)
- {
- try
- {
- ImageDownloadInfo idi = (ImageDownloadInfo)obj;
- String targetFolder = idi.DownloadFolder;
- String URI = idi.URI;
- WebClient webClient = new WebClient();
- if (!Directory.Exists(targetFolder))
- {
- Directory.CreateDirectory(targetFolder);
- }
- String path = targetFolder + "\\" + Path.GetFileName(URI);
- FileInfo fileInfo = new FileInfo(path);
- byte[] binaryContent = webClient.DownloadData(URI);
- FileStream fileStream = fileInfo.Create();
- fileStream.Write(binaryContent, 0, binaryContent.Length);
- fileStream.Close();
- //Set the image element properties in the converters imageList.
- idi.ImageInfo.filePath = fileInfo.FullName;
- idi.ImageInfo.imgLocalSrc = "file:///" + fileInfo.FullName.Replace("\\","/");
- idi.ImageInfo.fileURI = URI;
- idi.ImageInfo.fileSize = fileInfo.Length;
- idi.ImageInfo.fileCreationDate = fileInfo.CreationTime;
- }
- catch (InvalidCastException) { }
- catch (WebException) { };
- }
}
}
\ No newline at end of file