Index: xword/ContentFiltering/ContentFiltering.csproj
===================================================================
--- xword/ContentFiltering/ContentFiltering.csproj	(revision 21692)
+++ xword/ContentFiltering/ContentFiltering.csproj	(working copy)
@@ -60,10 +60,24 @@
     <Compile Include="Html\HtmlUtil.cs" />
     <Compile Include="Office\Word\AbstractConverter.cs" />
     <Compile Include="Office\Word\BidirectionalConversionStates.cs" />
+    <Compile Include="Office\Word\Cleaners\BodyContentExtractor.cs" />
+    <Compile Include="Office\Word\Cleaners\CommentsRemover.cs" />
+    <Compile Include="Office\Word\Cleaners\CorrectAttributesCleaner.cs" />
+    <Compile Include="Office\Word\Cleaners\CorrectTagsClosingCleaner.cs" />
+    <Compile Include="Office\Word\Cleaners\DoctypeRemover.cs" />
+    <Compile Include="Office\Word\Cleaners\EmptyParagraphsCleaner.cs" />
+    <Compile Include="Office\Word\Cleaners\HeadSectionRemover.cs" />
+    <Compile Include="Office\Word\Cleaners\ListCharsCleaner.cs" />
+    <Compile Include="Office\Word\Cleaners\NbspBetweenTagsRemover.cs" />
+    <Compile Include="Office\Word\Cleaners\NbspReplacer.cs" />
+    <Compile Include="Office\Word\Cleaners\OfficeNameSpacesTagsRemover.cs" />
+    <Compile Include="Office\Word\Cleaners\TidyHTMLCleaner.cs" />
+    <Compile Include="Office\Word\Cleaners\XmlNamespaceDefinitionsReplacer.cs" />
     <Compile Include="Office\Word\ConversionManager.cs" />
     <Compile Include="Office\Word\ConverterActionState.cs" />
     <Compile Include="Office\Word\Filters\GrammarAndSpellingErrorsFilter.cs" />
     <Compile Include="Office\Word\Filters\IDOMFilter.cs" />
+    <Compile Include="Office\Word\Cleaners\IHTMLCleaner.cs" />
     <Compile Include="Office\Word\Filters\LocalImageAdaptorFilter.cs" />
     <Compile Include="Office\Word\Filters\WebImageAdaptorFilter.cs" />
     <Compile Include="Office\Word\Filters\WebListsAdaptorFilter.cs" />
@@ -76,6 +90,18 @@
     <Compile Include="Office\Word\Filters\StyleRemoverFilter.cs" />
     <Compile Include="Office\Word\WebToLocalHTML.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\BodyContentExtractorTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\CommentsRemoverTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\CorrectAttributesCleanerTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\CorrectTagsClosingCleanerTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\DoctypeRemoverTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\EmptyParagraphsCleanerTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\HeadSectionRemoverTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\ListCharsCleanerTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\NbspBetweenTagsRemoverTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\NbspReplacerTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\OfficeNameSpacesTagsRemoverTest.cs" />
+    <Compile Include="Test\Office\Word\Cleaners\XmlNamespaceDefinitionsReplacerTest.cs" />
     <Compile Include="Test\Office\Word\Filters\GrammarAndSpellingErrorsFilterTest.cs" />
     <Compile Include="Test\Office\Word\Filters\LocalImageAdaptorFilterTest.cs" />
     <Compile Include="Test\Office\Word\Filters\LocalMacrosAdaptorFilterTest.cs" />
Index: xword/ContentFiltering/Html/HtmlUtil.cs
===================================================================
--- xword/ContentFiltering/Html/HtmlUtil.cs	(revision 21692)
+++ xword/ContentFiltering/Html/HtmlUtil.cs	(working copy)
@@ -6,6 +6,7 @@
 using System.Text.RegularExpressions;
 using TidyNet;
 using TidyNet.Dom;
+using ContentFiltering.Office.Word.Cleaners;
 
 namespace XWiki.Html
 {
@@ -21,7 +22,7 @@
         /// <returns>The cleaned html code.</returns>
         public String HtmlToXhtml(String htmlSource)
         {
-            return CleanHTML(htmlSource, false);            
+            return new TidyHTMLCleaner(false).Clean(htmlSource);       
         }
 
         /// <summary>
@@ -31,191 +32,11 @@
         /// <returns>The cleaned html.</returns>
         public String WordHtmlToXhtml(String htmlSource)
         {
-            return CleanHTML(htmlSource, true);            
+            return new TidyHTMLCleaner(true).Clean(htmlSource);
         }
 
-        /// <summary>
-        /// Uses Tidy.Net to clean a html source.
-        /// </summary>
-        /// <param name="htmlSource">The original html source.</param>
-        /// <param name="isWordHtml">Specifies if the source is an output from Microsoft Word</param>
-        /// <returns>The cleaned Html.</returns>
-        public String CleanHTML(String htmlSource,bool isWordHtml)
-        {
-            Tidy tidy = new Tidy();
-            //Options required dor xhtml conversion.
-            tidy.Options.DocType = DocType.Strict;
-            tidy.Options.DropFontTags = true;
-            tidy.Options.LogicalEmphasis = true;
-            tidy.Options.Xhtml = true;
-            tidy.Options.XmlOut = true;
-            tidy.Options.MakeClean = true;
-            tidy.Options.TidyMark = false;
-            tidy.Options.DropEmptyParas = true;
-            tidy.Options.IndentContent = true;
-            tidy.Options.SmartIndent = true;
-            tidy.Options.Word2000 = isWordHtml;
-            tidy.Options.EncloseBlockText = true;
-            
-            tidy.Options.XmlTags = true;
-            tidy.Options.FixComments = true;
-            TidyMessageCollection tmc = new TidyMessageCollection();
-            MemoryStream input = new MemoryStream();
-            MemoryStream output = new MemoryStream();
-            
-            byte[] byteArray = Encoding.UTF8.GetBytes(htmlSource);
-            input.Write(byteArray, 0, byteArray.Length);
-            input.Position = 0;
-            try
-            {
-                tidy.Parse(input, output, tmc);
-            }
-            catch (FormatException ex)
-            {
-                Log.Exception(ex);
-                return htmlSource;
-            }
-            string cleanContent = Encoding.UTF8.GetString(output.ToArray());
-            return cleanContent;
-        }
 
         /// <summary>
-        /// Gets a list with all the tags that contain attributes.
-        /// </summary>
-        /// <param name="htmlSource">The html source.</param>
-        /// <returns>A of strings with the tags containing attributes.</returns>
-        public List<String> GetTagsWithAttributes(String htmlSource)
-        {
-            List<String> tags = new List<String>();
-            int startIndex = 0;
-            int endIndex = 0;
-            do
-            {
-                startIndex = htmlSource.IndexOf('<', endIndex);
-                if (startIndex >= 0)
-                {
-                    endIndex = htmlSource.IndexOf('>', startIndex);
-                    if (endIndex >= 0)
-                    {
-                        String tag = htmlSource.Substring(startIndex, endIndex - startIndex + 1);
-                        if (tag.Contains('='))
-                        {
-                            tags.Add(tag);
-                        }
-                    }
-                }
-
-            } while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
-            return tags;
-        }
-        
-        /// <summary>
-        /// Corrects the img and br tags generated by Word.
-        /// </summary>
-        /// <param name="htmlSource">The html source to be corrected.</param>
-        /// <param name="tagName">The name if the tag. Eg: "img", "br".</param>
-        /// <returns>The corrected html coe.</returns>
-        public String CorrectTagsClosing(String htmlSource, String tagName)
-        {
-            //The string builder will be appendend when more then 1000 corrupted tags are found.
-            int slack = 1000;
-            string correctionString = " /";
-            string searchedString = "<" + tagName;
-            StringBuilder sb = new StringBuilder(htmlSource.Length + slack);
-            sb.Insert(0, htmlSource);
-            int startIndex = 0;
-            int endIndex = 0;
-            int nonValidTags = 0;
-            do
-            {
-                startIndex = htmlSource.IndexOf(searchedString, endIndex);
-                if (startIndex >= 0)
-                {
-                    endIndex = htmlSource.IndexOf('>', startIndex);
-                    if (endIndex > 0)
-                    {
-                        //The tag is missing  the '/' before the '>' character
-                        if (!(htmlSource[endIndex - 1].CompareTo('/') == 0))
-                        {
-                            sb.Insert(endIndex + nonValidTags * correctionString.Length, correctionString);
-                            nonValidTags++;
-                        }
-                    }
-                }
-            } while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
-            return sb.ToString();
-        }
-
-        /// <summary>
-        /// Corrects the attributes that miss ' or ".
-        /// </summary>
-        /// <param name="htmlSource">The original html source code.</param>
-        /// <returns>The source with corrected attributes.</returns>
-        public String CorrectAttributes(String htmlSource)
-        {
-            StringBuilder sb = new StringBuilder(htmlSource);
-            List<String> tags = GetTagsWithAttributes(htmlSource);
-            foreach(String initialValue in tags)
-            {
-                String value = initialValue;
-                char[] separators = {' ','>','/','\r'};
-                bool hasChanged = false;
-                foreach (String s in initialValue.Split(separators))
-                {
-                    String[] attribute = s.Split('=');
-                    if(attribute.Length == 2)
-                    {
-                        try
-                        {
-                            String newValue = attribute[1];
-                            if (attribute[1][0] != '\'' && attribute[1][0] != '\"')
-                            {
-                                newValue = attribute[0] + "=\"" + attribute[1] + "\"";
-                                value = value.Replace(s, newValue);
-                                hasChanged = true;
-                            }
-                        }
-                        catch (IndexOutOfRangeException) { };
-                    }
-                }
-                if (hasChanged)
-                {
-                    sb = sb.Replace(initialValue, value);
-                }
-            }
-            return sb.ToString();
-        }
-
-        /// <summary>
-        /// Removes the tags that are in the office namespaces.
-        /// </summary>
-        /// <param name="content">The original content.</param>
-        /// <returns>The cleaned content.</returns>
-        public String RemoveOfficeNameSpacesTags(String content)
-        {
-            bool foundTags = false;
-            int startIndex = 0;
-            int endIndex = 0;
-            do
-            {
-                foundTags = false;
-                startIndex = content.IndexOf("<o:", startIndex);
-                if (startIndex >= 0)
-                {
-                    endIndex = content.IndexOf("</o:", startIndex);
-                    if(endIndex >= 0)
-                    {
-                        endIndex = content.IndexOf(">",endIndex + 1);
-                        content = content.Remove(startIndex, endIndex - startIndex + 1);                       
-                    }
-                    foundTags = true;
-                    startIndex = endIndex - (endIndex - startIndex + 1);
-                }                
-            } while (foundTags);
-            return content;
-        }
-
-        /// <summary>
         /// Removes a char sequence that starts and ends with the given valaues.
         /// </summary>
         /// <param name="content">The initial content.</param>
@@ -245,25 +66,8 @@
             return content;
         }
 
+        
         /// <summary>
-        /// Gets the content between the opening and closing html tags.
-        /// </summary>
-        /// <param name="htmlCode">The html source to be </param>
-        /// <returns>the inner html of the body.</returns>
-        public String GetBodyContent(String htmlCode)
-        {
-            //Delete header & footer
-            int startIndex, endIndex;
-            startIndex = htmlCode.IndexOf("<body");
-            endIndex = htmlCode.IndexOf(">", startIndex);
-            htmlCode = htmlCode.Remove(0, endIndex + 1);
-            startIndex = htmlCode.IndexOf("</body");
-            if (startIndex >= 0)
-                htmlCode = htmlCode.Remove(startIndex);
-            return htmlCode;
-        }
-
-        /// <summary>
         /// Indents the given html source.
         /// </summary>
         /// <param name="htmlSource">The html source.</param>
@@ -285,67 +89,8 @@
             return htmlSource;
         }
 
-        /// <summary>
-        /// Removes the doctype declaration from an given html code.
-        /// </summary>
-        /// <param name="htmlCode">The original html code.</param>
-        /// <returns>The modified html code.</returns>
-        public String RemoveDoctype(String htmlCode)
-        {
-            int startIndex, endIndex;
-            startIndex = htmlCode.IndexOf("<!DOCTYPE");
-            endIndex = htmlCode.IndexOf(">", startIndex);
-            return htmlCode.Remove(startIndex, endIndex - startIndex);
-        }
 
         /// <summary>
-        /// Gets a string representing the opening html tag with the XML namespace definitions, if any.
-        /// </summary>
-        /// <param name="htmlCode">The html source to be processed</param>
-        /// <returns>a string representing the opening html tag.</returns>
-        public String GetXmlNamespaceDefinitions(String htmlCode)
-        {
-            int startIndex, endIndex;
-            startIndex = htmlCode.IndexOf("<html");
-
-            if (startIndex < 0)
-            {
-                return null;
-            }
-            else
-            {
-                endIndex = htmlCode.IndexOf(">", startIndex);
-                return htmlCode.Substring(startIndex, endIndex - startIndex + 1);
-            }
-        }
-
-        /// <summary>
-        /// Replaces the opening html tag with a given one.
-        /// </summary>
-        /// <param name="htmlCode">The html source.</param>
-        /// <param name="newHtmlTag">The new html tag.</param>
-        /// <returns></returns>
-        public String ReplaceXmlNamespaceDefinitions(String htmlCode, String newHtmlTag)
-        {
-            String oldHtmlTag = GetXmlNamespaceDefinitions(htmlCode);
-            if (oldHtmlTag == null)
-            {
-                if (!htmlCode.Contains("<body"))
-                {
-                    htmlCode = htmlCode.Insert(0, "<body>");
-                    htmlCode = htmlCode.Insert(htmlCode.Length, "</body>");
-                }
-                htmlCode = htmlCode.Insert(0, newHtmlTag);
-                htmlCode = htmlCode.Insert(htmlCode.Length, "</html>");
-            }
-            else
-            {
-                htmlCode = htmlCode.Replace(oldHtmlTag, newHtmlTag);            
-            }
-            return htmlCode;
-        }
-
-        /// <summary>
         /// Replaces the body tag with a new given one.
         /// </summary>
         /// <param name="initialContent">The initial html code.</param>
Index: xword/ContentFiltering/Office/Word/Cleaners/BodyContentExtractor.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/BodyContentExtractor.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/BodyContentExtractor.cs	(revision 0)
@@ -0,0 +1,33 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class BodyContentExtractor : IHTMLCleaner
+    {
+
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Gets the content between the opening and closing html tags.
+        /// </summary>
+        /// <param name="htmlCode">The html source to be </param>
+        /// <returns>the inner html of the body.</returns>
+        public string Clean(string htmlCode)
+        {
+            //Delete header & footer
+            int startIndex, endIndex;
+            startIndex = htmlCode.IndexOf("<body");
+            endIndex = htmlCode.IndexOf(">", startIndex);
+            htmlCode = htmlCode.Remove(0, endIndex + 1);
+            startIndex = htmlCode.IndexOf("</body");
+            if (startIndex >= 0)
+                htmlCode = htmlCode.Remove(startIndex);
+            return htmlCode;
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/CommentsRemover.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/CommentsRemover.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/CommentsRemover.cs	(revision 0)
@@ -0,0 +1,32 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using XWiki.Html;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class CommentsRemover : IHTMLCleaner
+    {
+        private HtmlUtil htmlUtil;
+
+        public CommentsRemover()
+        {
+            htmlUtil = new HtmlUtil();
+        }
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Removes comments ('&lt;!-- ... &gt;' and '&lt;![ ... ]&gt;') from an html source.
+        /// </summary>
+        /// <param name="htmlSource">The HTML source to clean.</param>
+        /// <returns>The cleaned HTML source (without comments)</returns>
+        public string Clean(string htmlSource)
+        {
+            string cleanHTML = htmlUtil.RemoveSpecificTagContent(htmlSource, "<!--", "-->");
+            return htmlUtil.RemoveSpecificTagContent(cleanHTML, "<![", "]>");
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/CorrectAttributesCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/CorrectAttributesCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/CorrectAttributesCleaner.cs	(revision 0)
@@ -0,0 +1,84 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class CorrectAttributesCleaner : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Corrects the attributes that miss ' or ".
+        /// </summary>
+        /// <param name="htmlSource">The original html source code.</param>
+        /// <returns>The source with corrected attributes.</returns>
+        public string Clean(string htmlSource)
+        {
+            StringBuilder sb = new StringBuilder(htmlSource);
+            List<String> tags = GetTagsWithAttributes(htmlSource);
+            foreach (String initialValue in tags)
+            {
+                String value = initialValue;
+                char[] separators = { ' ', '>', '/', '\r' };
+                bool hasChanged = false;
+                foreach (String s in initialValue.Split(separators))
+                {
+                    String[] attribute = s.Split('=');
+                    if (attribute.Length == 2)
+                    {
+                        try
+                        {
+                            String newValue = attribute[1];
+                            if (attribute[1][0] != '\'' && attribute[1][0] != '\"')
+                            {
+                                newValue = attribute[0] + "=\"" + attribute[1] + "\"";
+                                value = value.Replace(s, newValue);
+                                hasChanged = true;
+                            }
+                        }
+                        catch (IndexOutOfRangeException) { };
+                    }
+                }
+                if (hasChanged)
+                {
+                    sb = sb.Replace(initialValue, value);
+                }
+            }
+            return sb.ToString();
+        }
+
+        #endregion IHTMLCleaner Members
+
+        /// <summary>
+        /// Gets a list with all the tags that contain attributes.
+        /// </summary>
+        /// <param name="htmlSource">The html source.</param>
+        /// <returns>A list of strings with the tags containing attributes.</returns>
+        public List<String> GetTagsWithAttributes(String htmlSource)
+        {
+            List<String> tags = new List<String>();
+            int startIndex = 0;
+            int endIndex = 0;
+            do
+            {
+                startIndex = htmlSource.IndexOf('<', endIndex);
+                if (startIndex >= 0)
+                {
+                    endIndex = htmlSource.IndexOf('>', startIndex);
+                    if (endIndex >= 0)
+                    {
+                        String tag = htmlSource.Substring(startIndex, endIndex - startIndex + 1);
+                        if (tag.Contains('='))
+                        {
+                            tags.Add(tag);
+                        }
+                    }
+                }
+
+            } while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
+            return tags;
+        }
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/CorrectTagsClosingCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/CorrectTagsClosingCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/CorrectTagsClosingCleaner.cs	(revision 0)
@@ -0,0 +1,60 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    class CorrectTagsClosingCleaner : IHTMLCleaner
+    {
+        private string tagName;
+
+        /// <summary>
+        /// Cleaner constructor.
+        /// </summary>
+        /// <param name="tagName">The name if the tag. Eg: "img", "br".</param>
+        public CorrectTagsClosingCleaner(string tagName)
+        {
+            this.tagName = tagName;
+        }
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Corrects the img and br tags generated by Word.
+        /// </summary>
+        /// <param name="htmlSource">The html source to be corrected.</param>
+        /// <returns>The corrected html coe.</returns>
+        public string Clean(string htmlSource)
+        {
+            //The string builder will be appendend when more then 1000 corrupted tags are found.
+            int slack = 1000;
+            string correctionString = " /";
+            string searchedString = "<" + tagName;
+            StringBuilder sb = new StringBuilder(htmlSource.Length + slack);
+            sb.Insert(0, htmlSource);
+            int startIndex = 0;
+            int endIndex = 0;
+            int nonValidTags = 0;
+            do
+            {
+                startIndex = htmlSource.IndexOf(searchedString, endIndex);
+                if (startIndex >= 0)
+                {
+                    endIndex = htmlSource.IndexOf('>', startIndex);
+                    if (endIndex > 0)
+                    {
+                        //The tag is missing  the '/' before the '>' character
+                        if (!(htmlSource[endIndex - 1].CompareTo('/') == 0))
+                        {
+                            sb.Insert(endIndex + nonValidTags * correctionString.Length, correctionString);
+                            nonValidTags++;
+                        }
+                    }
+                }
+            } while (startIndex < (htmlSource.Length - 1) && endIndex < (htmlSource.Length - 1) && (startIndex >= 0) && (endIndex >= 0));
+            return sb.ToString();
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/DoctypeRemover.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/DoctypeRemover.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/DoctypeRemover.cs	(revision 0)
@@ -0,0 +1,28 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class DoctypeRemover : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Removes the doctype declaration from a given html code.
+        /// </summary>
+        /// <param name="htmlCode">The original html code.</param>
+        /// <returns>The modified html code.</returns>
+        public string Clean(string htmlCode)
+        {
+            int startIndex, endIndex;
+            startIndex = htmlCode.IndexOf("<!DOCTYPE");
+            endIndex = htmlCode.IndexOf(">", startIndex);
+            return htmlCode.Remove(startIndex, endIndex - startIndex);
+        }
+
+        #endregion IHTMLCleaner Members
+
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/EmptyParagraphsCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/EmptyParagraphsCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/EmptyParagraphsCleaner.cs	(revision 0)
@@ -0,0 +1,26 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class EmptyParagraphsCleaner : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Replaces empty paragraphs with line breaks ('&lt;br/&gt;').
+        /// </summary>
+        /// <param name="htmlSource">Initial HTML source.</param>
+        /// <returns>Cleaned HTML source (empty paragraphs replaced with line breaks).</returns>
+        public string Clean(string htmlSource)
+        {
+            htmlSource = htmlSource.Replace("<o:p></o:p>", "<br />");
+            htmlSource = htmlSource.Replace("<p>&nbsp;</p>", "<br />");
+            return htmlSource;
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/HeadSectionRemover.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/HeadSectionRemover.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/HeadSectionRemover.cs	(revision 0)
@@ -0,0 +1,31 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using XWiki.Html;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class HeadSectionRemover : IHTMLCleaner
+    {
+        private HtmlUtil htmlUtil;
+
+        public HeadSectionRemover()
+        {
+            htmlUtil = new HtmlUtil();
+        }
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Removes the head section from an html source.
+        /// </summary>
+        /// <param name="htmlSource">The HTML source.</param>
+        /// <returns>The HTML source without the head section.</returns>
+        public string Clean(string htmlSource)
+        {
+            return htmlUtil.RemoveSpecificTagContent(htmlSource, "<head>", "</head>");
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/IHTMLCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/IHTMLCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/IHTMLCleaner.cs	(revision 0)
@@ -0,0 +1,15 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Interface for HTML cleaners (pre-DOM filters).
+    /// </summary>
+    public interface IHTMLCleaner
+    {
+        string Clean(string htmlSource);
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/ListCharsCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/ListCharsCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/ListCharsCleaner.cs	(revision 0)
@@ -0,0 +1,27 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class ListCharsCleaner : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Replaces some characters used by MS Word for bullet lists (like <code>&amp;middot;</code>)
+        /// with 'o' characters.
+        /// </summary>
+        /// <param name="htmlSource">Initial HTML source.</param>
+        /// <returns>Cleaned HTML source.</returns>
+        public string Clean(string htmlSource)
+        {
+            htmlSource = htmlSource.Replace('·', 'o');
+            htmlSource = htmlSource.Replace('§', 'o');
+            return htmlSource;
+        }
+
+        #endregion  IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/NbspBetweenTagsRemover.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/NbspBetweenTagsRemover.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/NbspBetweenTagsRemover.cs	(revision 0)
@@ -0,0 +1,24 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class NbspBetweenTagsRemover:IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Removes the &amp;nbsp; between tags.
+        /// </summary>
+        /// <param name="htmlSource">Initial HTML source.</param>
+        /// <returns>Cleaned HTML.</returns>
+        public string Clean(string htmlSource)
+        {
+            return htmlSource.Replace(">&nbsp;<", "><");
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/NbspReplacer.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/NbspReplacer.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/NbspReplacer.cs	(revision 0)
@@ -0,0 +1,24 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class NbspReplacer : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Replaces all non-breaking spaces with space characters.
+        /// </summary>
+        /// <param name="htmlSource">Initial HTML source.</param>
+        /// <returns>Cleaned HTML.</returns>
+        public string Clean(string htmlSource)
+        {
+            return htmlSource.Replace("&nbsp;", " ");
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/OfficeNameSpacesTagsRemover.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/OfficeNameSpacesTagsRemover.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/OfficeNameSpacesTagsRemover.cs	(revision 0)
@@ -0,0 +1,43 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class OfficeNameSpacesTagsRemover : IHTMLCleaner
+    {
+        #region IHTMLCleaner Members
+
+        /// <summary>
+        /// Removes the tags that are in the office namespaces.
+        /// </summary>
+        /// <param name="content">The original content.</param>
+        /// <returns>The cleaned content.</returns>
+        public string Clean(string htmlSource)
+        {
+            bool foundTags = false;
+            int startIndex = 0;
+            int endIndex = 0;
+            do
+            {
+                foundTags = false;
+                startIndex = htmlSource.IndexOf("<o:", startIndex);
+                if (startIndex >= 0)
+                {
+                    endIndex = htmlSource.IndexOf("</o:", startIndex);
+                    if (endIndex >= 0)
+                    {
+                        endIndex = htmlSource.IndexOf(">", endIndex + 1);
+                        htmlSource = htmlSource.Remove(startIndex, endIndex - startIndex + 1);
+                    }
+                    foundTags = true;
+                    startIndex = endIndex - (endIndex - startIndex + 1);
+                }
+            } while (foundTags);
+            return htmlSource;
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/TidyHTMLCleaner.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/TidyHTMLCleaner.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/TidyHTMLCleaner.cs	(revision 0)
@@ -0,0 +1,67 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using TidyNet;
+using System.IO;
+using XWiki;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class TidyHTMLCleaner : IHTMLCleaner
+    {
+        private bool isWordHtml;
+
+        public TidyHTMLCleaner(bool isWordHtml)
+        {
+            this.isWordHtml = isWordHtml;
+        }
+        #region IHTMLCleaner Members
+        /// <summary>
+        /// Uses Tidy.Net to clean a html source.
+        /// </summary>
+        /// <param name="htmlSource">The original html source.</param>
+        /// <param name="isWordHtml">Specifies if the source is an output from Microsoft Word</param>
+        /// <returns>The cleaned Html.</returns>
+        public string Clean(string htmlSource)
+        {
+            Tidy tidy = new Tidy();
+            //Options required dor xhtml conversion.
+            tidy.Options.DocType = DocType.Strict;
+            tidy.Options.DropFontTags = true;
+            tidy.Options.LogicalEmphasis = true;
+            tidy.Options.Xhtml = true;
+            tidy.Options.XmlOut = true;
+            tidy.Options.MakeClean = true;
+            tidy.Options.TidyMark = false;
+            tidy.Options.DropEmptyParas = true;
+            tidy.Options.IndentContent = true;
+            tidy.Options.SmartIndent = true;
+            tidy.Options.Word2000 = isWordHtml;
+            tidy.Options.EncloseBlockText = true;
+
+            tidy.Options.XmlTags = true;
+            tidy.Options.FixComments = true;
+            TidyMessageCollection tmc = new TidyMessageCollection();
+            MemoryStream input = new MemoryStream();
+            MemoryStream output = new MemoryStream();
+
+            byte[] byteArray = Encoding.UTF8.GetBytes(htmlSource);
+            input.Write(byteArray, 0, byteArray.Length);
+            input.Position = 0;
+            try
+            {
+                tidy.Parse(input, output, tmc);
+            }
+            catch (FormatException ex)
+            {
+                Log.Exception(ex);
+                return htmlSource;
+            }
+            string cleanContent = Encoding.UTF8.GetString(output.ToArray());
+            return cleanContent;
+        }
+
+        #endregion IHTMLCleaner Members
+    }
+}
Index: xword/ContentFiltering/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacer.cs
===================================================================
--- xword/ContentFiltering/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacer.cs	(revision 0)
+++ xword/ContentFiltering/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacer.cs	(revision 0)
@@ -0,0 +1,68 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace ContentFiltering.Office.Word.Cleaners
+{
+    public class XmlNamespaceDefinitionsReplacer : IHTMLCleaner
+    {
+        private string newHtmlTag;
+
+        public XmlNamespaceDefinitionsReplacer(string newHtmlTag)
+        {
+            this.newHtmlTag = newHtmlTag;
+        }
+
+        #region IHTMLCleaner Members
+        /// <summary>
+        /// Replaces the opening html tag with a given one.
+        /// </summary>
+        /// <param name="htmlCode">The html source.</param>
+        /// <param name="newHtmlTag">The new html tag.</param>
+        /// <returns>Cleaned HTML source.</returns>
+        public string Clean(string htmlCode)
+        {
+            String oldHtmlTag = GetXmlNamespaceDefinitions(htmlCode);
+            if (oldHtmlTag == null)
+            {
+                if (!htmlCode.Contains("<body"))
+                {
+                    htmlCode = htmlCode.Insert(0, "<body>");
+                    htmlCode = htmlCode.Insert(htmlCode.Length, "</body>");
+                }
+                htmlCode = htmlCode.Insert(0, newHtmlTag);
+                htmlCode = htmlCode.Insert(htmlCode.Length, "</html>");
+            }
+            else
+            {
+                htmlCode = htmlCode.Replace(oldHtmlTag, newHtmlTag);
+            }
+            return htmlCode;
+        }
+
+        #endregion IHTMLCleaner Members
+
+        /// <summary>
+        /// Gets a string representing the opening html tag with the XML namespace definitions, if any.
+        /// </summary>
+        /// <param name="htmlCode">The html source to be processed</param>
+        /// <returns>a string representing the opening html tag.</returns>
+        public String GetXmlNamespaceDefinitions(String htmlCode)
+        {
+            int startIndex, endIndex;
+            startIndex = htmlCode.IndexOf("<html");
+
+            if (startIndex < 0)
+            {
+                return null;
+            }
+            else
+            {
+                endIndex = htmlCode.IndexOf(">", startIndex);
+                return htmlCode.Substring(startIndex, endIndex - startIndex + 1);
+            }
+        }
+
+    }
+}
Index: xword/ContentFiltering/Office/Word/LocalToWebHTML.cs
===================================================================
--- xword/ContentFiltering/Office/Word/LocalToWebHTML.cs	(revision 21692)
+++ xword/ContentFiltering/Office/Word/LocalToWebHTML.cs	(working copy)
@@ -9,6 +9,7 @@
 using System.Collections;
 using ContentFiltering.Office.Word;
 using ContentFiltering.Office.Word.Filters;
+using ContentFiltering.Office.Word.Cleaners;
 
 namespace XWiki.Office.Word
 {
@@ -29,26 +30,24 @@
         {
             XmlDocument xmlDoc = new XmlDocument();
             //xmlDoc.XmlResolver = null;
-            String uncleanedContent = htmlUtil.CorrectAttributes(content);
-            uncleanedContent = htmlUtil.CorrectTagsClosing(uncleanedContent, "img");
-            uncleanedContent = htmlUtil.CorrectTagsClosing(uncleanedContent, "br");
-            content = htmlUtil.CleanHTML(uncleanedContent, true);
+
+            String uncleanedContent = new CorrectAttributesCleaner().Clean(content);
+            uncleanedContent = new CorrectTagsClosingCleaner("img").Clean(uncleanedContent);
+            uncleanedContent = new CorrectTagsClosingCleaner("br").Clean(uncleanedContent);
+            content = new TidyHTMLCleaner(true).Clean(uncleanedContent);
+
             if (content.Length == 0)
             {
                 content = uncleanedContent;
             }
-            //content = htmlUtil.RemoveOfficeNameSpacesTags(content);
-            //content = htmlUtil.ReplaceBody(content, "<body>");
-            content = htmlUtil.ReplaceXmlNamespaceDefinitions(content, HTML_OPENING_TAG);
-            content = content.Replace('·','o');
-            content = content.Replace('§', 'o');//"·"; "o"; "§";
-            //Removing &nbsp; from Word and Tidy output
-            content = content.Replace("<o:p></o:p>", "<br />");
-            content = content.Replace("<p>&nbsp;</p>", "<br />");
-            content = content.Replace(">&nbsp;<", "><");
-            content = content.Replace("<o:p>", "");
-            content = content.Replace("</o:p>", "");
-            content = content.Replace("&nbsp;", " ");
+            
+            content = new XmlNamespaceDefinitionsReplacer(HTML_OPENING_TAG).Clean(content);
+            content = new ListCharsCleaner().Clean(content);
+            content = new EmptyParagraphsCleaner().Clean(content);
+            content = new NbspBetweenTagsRemover().Clean(content);
+            content = new OfficeNameSpacesTagsRemover().Clean(content);
+            content = new NbspReplacer().Clean(content);
+
             xmlDoc.LoadXml(content);
 
             List<IDOMFilter> contentFilters = new List<IDOMFilter>()
Index: xword/ContentFiltering/Office/Word/WebToLocalHTML.cs
===================================================================
--- xword/ContentFiltering/Office/Word/WebToLocalHTML.cs	(revision 21692)
+++ xword/ContentFiltering/Office/Word/WebToLocalHTML.cs	(working copy)
@@ -12,6 +12,7 @@
 using XWiki.Xml;
 using System.Collections;
 using ContentFiltering.Office.Word.Filters;
+using ContentFiltering.Office.Word.Cleaners;
 
 namespace XWiki.Office.Word
 {
@@ -92,14 +93,13 @@
         public String AdaptSource(String content)
         {
             XmlDocument xmlDoc = new XmlDocument();
-            content = htmlUtil.RemoveOfficeNameSpacesTags(content);
-            //String namespaces = htmlUtil.GetXmlNamespaceDefinitions(content);
-            content = htmlUtil.CleanHTML(content, false);
-            content = htmlUtil.ReplaceXmlNamespaceDefinitions(content, HTML_OPENING_TAG);
-            content = content.Replace("<o:p></o:p>", "<br />");
-            content = content.Replace("<p>&nbsp;</p>", "<br />");
-            content = content.Replace(">&nbsp;<", "><");
-            content = content.Replace("&nbsp;", " ");
+            content = new OfficeNameSpacesTagsRemover().Clean(content);
+            
+            content = new TidyHTMLCleaner(false).Clean(content);
+            content = new XmlNamespaceDefinitionsReplacer(HTML_OPENING_TAG).Clean(content);
+            content = new EmptyParagraphsCleaner().Clean(content);
+            content = new NbspBetweenTagsRemover().Clean(content);
+            content = new NbspReplacer().Clean(content);
             //content = content.Insert(0, DOCTYPE);
             try
             {
@@ -111,6 +111,7 @@
                 return "Sorry, a problem appeared when loading the page";
             }
             
+
             List<IDOMFilter> webToLocalFilters = new List<IDOMFilter>()
             {
                 new WebMacrosAdaptorFilter(manager),
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/BodyContentExtractorTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/BodyContentExtractorTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/BodyContentExtractorTest.cs	(revision 0)
@@ -0,0 +1,43 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>BodyContentExtractor</code>.
+    /// </summary>
+    [TestFixture]
+    public class BodyContentExtractorTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public BodyContentExtractorTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><head><body style=\"color:blue\"><h1>Header 1</h1><p>Body Content goes here</p></body></html>";
+            expectedHTML = "<h1>Header 1</h1><p>Body Content goes here</p>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            IHTMLCleaner bodyContentExctractor = new BodyContentExtractor();
+            initialHTML = bodyContentExctractor.Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/CommentsRemoverTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/CommentsRemoverTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/CommentsRemoverTest.cs	(revision 0)
@@ -0,0 +1,68 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>CommentsRemover</code>.
+    /// </summary>
+    [TestFixture]
+    public class CommentsRemoverTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        public CommentsRemoverTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                + " xmlns=\"http://www.w3.org/TR/REC-html40\"> "
+                + "<head>"
+                + "<!--[if gte mso 9]><xml>"
+                + " <o:OfficeDocumentSettings>"
+                + "   <o:AllowPNG/>"
+                + "   <o:PixelsPerInch>120</o:PixelsPerInch>"
+                + "   <o:TargetScreenSize>1024x768</o:TargetScreenSize>"
+                + "  </o:OfficeDocumentSettings>"
+                + "</xml><![endif]-->"
+                + "</head>"
+                + "<body>"
+                + "<!-- the comment -->"
+                + "<p>the paragraph</p>"
+                + "</body>"
+                + "</html>";
+
+            expectedHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                + " xmlns=\"http://www.w3.org/TR/REC-html40\"> "
+                + "<head>"
+                + "</head>"
+                + "<body>"
+                + "<p>the paragraph</p>"
+                + "</body>"
+                + "</html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new CommentsRemover().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectAttributesCleanerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectAttributesCleanerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectAttributesCleanerTest.cs	(revision 0)
@@ -0,0 +1,51 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test for <code>CorrectAttributesCleaner</code> pre-DOM filter.
+    /// </summary>
+    [TestFixture]
+    public class CorrectAttributesCleanerTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public CorrectAttributesCleanerTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void GlobalSetup()
+        {
+            initialHTML = "<html><head><title>Title</title></head><body>"
+                + "<p id=p1>text</p>"
+                + "<p style='font-color:red;'>text</p>"
+                + "<p id=p2 class=copyright>copyright notes</p>"
+                + "<font color=\"red\">red text</font>"
+                + "</body></html>";
+            expectedHTML = "<html><head><title>Title</title></head><body>"
+                + "<p id=\"p1\">text</p>"
+                + "<p style='font-color:red;'>text</p>"
+                + "<p id=\"p2\" class=\"copyright\">copyright notes</p>"
+                + "<font color=\"red\">red text</font>"
+                + "</body></html>";
+        }
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new CorrectAttributesCleaner().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectTagsClosingCleanerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectTagsClosingCleanerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/CorrectTagsClosingCleanerTest.cs	(revision 0)
@@ -0,0 +1,55 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>CorrectTagsClosingCleaner</code> pre-DOM filter.
+    /// </summary>
+    [TestFixture]
+    public class CorrectTagsClosingCleanerTest
+    {
+        private string initialHTML1;
+        private string initialHTML2;
+        private string expectedHTML1;
+        private string expectedHTML2;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public CorrectTagsClosingCleanerTest()
+        {
+            initialHTML1 = "";
+            initialHTML2 = "";
+            expectedHTML1 = "";
+            expectedHTML2 = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML1 = "<html><body><img src=\"img1.jpg\"></body></html>";
+            initialHTML2 = "<html><body><p>Text<br>Text</p></body></html>";
+            
+            expectedHTML1 = "<html><body><img src=\"img1.jpg\" /></body></html>";
+            expectedHTML2 = "<html><body><p>Text<br />Text</p></body></html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            IHTMLCleaner tagClosingCleaner1 = new CorrectTagsClosingCleaner("img");
+            initialHTML1 = tagClosingCleaner1.Clean(initialHTML1);
+
+            IHTMLCleaner tagClosingCleaner2 = new CorrectTagsClosingCleaner("br");
+            initialHTML2 = tagClosingCleaner2.Clean(initialHTML2);
+            
+            Assert.AreEqual(initialHTML1, expectedHTML1);
+            Assert.AreEqual(initialHTML2, expectedHTML2);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/DoctypeRemoverTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/DoctypeRemoverTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/DoctypeRemoverTest.cs	(revision 0)
@@ -0,0 +1,49 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>DoctypeRemover</code>.
+    /// </summary>
+    [TestFixture]
+    public class DoctypeRemoverTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public DoctypeRemoverTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"> "
+                + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en-US\" lang=\"en-US\"> "
+                + "<body>Content</body>"
+                + "</html>";
+
+            expectedHTML = "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en-US\" lang=\"en-US\"> "
+                + "<body>Content</body>"
+                + "</html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            IHTMLCleaner doctypeRemover = new DoctypeRemover();
+            initialHTML = doctypeRemover.Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/EmptyParagraphsCleanerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/EmptyParagraphsCleanerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/EmptyParagraphsCleanerTest.cs	(revision 0)
@@ -0,0 +1,53 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>EmptyParagraphsCleaner</code>.
+    /// </summary>
+    [TestFixture]
+    public class EmptyParagraphsCleanerTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public EmptyParagraphsCleanerTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                + " xmlns=\"http://www.w3.org/TR/REC-html40\"> "
+                + "<body><p>Text</p><p>&nbsp;</p><o:p></o:p></body></html>";
+
+            expectedHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                + " xmlns=\"http://www.w3.org/TR/REC-html40\"> "
+                + "<body><p>Text</p><br /><br /></body></html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new EmptyParagraphsCleaner().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/HeadSectionRemoverTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/HeadSectionRemoverTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/HeadSectionRemoverTest.cs	(revision 0)
@@ -0,0 +1,44 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>HeadSectionRemover</code>.
+    /// </summary>
+    [TestFixture]
+    public class HeadSectionRemoverTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public HeadSectionRemoverTest()
+        {
+            initialHTML = "";
+            expectedHTML="";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><head><title>Title</title><link rel=\"stylesheet\" href=\"style.css\"/></head>"
+                + "<body><p>Content</p></body></html>";
+
+            expectedHTML = "<html><body><p>Content</p></body></html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new HeadSectionRemover().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/ListCharsCleanerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/ListCharsCleanerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/ListCharsCleanerTest.cs	(revision 0)
@@ -0,0 +1,42 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>ListCharsCleaner</code>.
+    /// </summary>
+    [TestFixture]
+    public class ListCharsCleanerTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+        
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public ListCharsCleanerTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><body><p>·Item1<br/>·Item2</p><p>§Another item</p></body></html>";
+            expectedHTML = "<html><body><p>oItem1<br/>oItem2</p><p>oAnother item</p></body></html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new ListCharsCleaner().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/NbspBetweenTagsRemoverTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/NbspBetweenTagsRemoverTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/NbspBetweenTagsRemoverTest.cs	(revision 0)
@@ -0,0 +1,42 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>NbspBetweenTagsRemover</code>.
+    /// </summary>
+    [TestFixture]
+    public class NbspBetweenTagsRemoverTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public NbspBetweenTagsRemoverTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><body><p>&nbsp;<span>text</span></p><p>&nbsp;</p></body></html>";
+            expectedHTML = "<html><body><p><span>text</span></p><p></p></body></html>";
+        }
+
+        [Test]
+        public void Test()
+        {
+            initialHTML = new NbspBetweenTagsRemover().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/NbspReplacerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/NbspReplacerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/NbspReplacerTest.cs	(revision 0)
@@ -0,0 +1,42 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>NbspReplacer</code>.
+    /// </summary>
+    [TestFixture]
+    public class NbspReplacerTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public NbspReplacerTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><body><p>Some&nbsp;content</p><p>More content&nbsp;here</p></body></html>";
+            expectedHTML = "<html><body><p>Some content</p><p>More content here</p></body></html>";
+        }
+
+        [Test]
+        public void Test()
+        {
+            initialHTML = new NbspReplacer().Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/OfficeNameSpacesTagsRemoverTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/OfficeNameSpacesTagsRemoverTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/OfficeNameSpacesTagsRemoverTest.cs	(revision 0)
@@ -0,0 +1,54 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>OfficeNameSpacesTagsRemover</code>.
+    /// </summary>
+    [TestFixture]
+    public class OfficeNameSpacesTagsRemoverTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+
+        /// <summary>
+        /// Default constructor.
+        /// </summary>
+        public OfficeNameSpacesTagsRemoverTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+            + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+            + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+            + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+            + " xmlns=\"http://www.w3.org/TR/REC-html40\">" 
+            + "<body><p>Text</p><o:p>&nbsp;</o:p></body></html>";
+            
+            expectedHTML="<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+            + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+            + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+            + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+            + " xmlns=\"http://www.w3.org/TR/REC-html40\">"
+            + "<body><p>Text</p></body></html>";
+        }
+
+        [Test]
+        public void TestCleaner()
+        {
+            IHTMLCleaner officeNameSpaceCleaner = new OfficeNameSpacesTagsRemover();
+            initialHTML = officeNameSpaceCleaner.Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+        }
+    }
+}
Index: xword/ContentFiltering/Test/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacerTest.cs
===================================================================
--- xword/ContentFiltering/Test/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacerTest.cs	(revision 0)
+++ xword/ContentFiltering/Test/Office/Word/Cleaners/XmlNamespaceDefinitionsReplacerTest.cs	(revision 0)
@@ -0,0 +1,55 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using NUnit.Framework;
+using ContentFiltering.Office.Word.Cleaners;
+
+namespace ContentFiltering.Test.Office.Word.Cleaners
+{
+    /// <summary>
+    /// Test class for <code>XmlNamespaceDefinitionsReplacer</code>.
+    /// </summary>
+    [TestFixture]
+    public class XmlNamespaceDefinitionsReplacerTest
+    {
+        private string initialHTML;
+        private string expectedHTML;
+        private string newTag;
+
+        public XmlNamespaceDefinitionsReplacerTest()
+        {
+            initialHTML = "";
+            expectedHTML = "";
+            newTag = "";
+        }
+
+        [TestFixtureSetUp]
+        public void TestSetup()
+        {
+            initialHTML = "<html><body><p>Content</p></body></html>";
+            expectedHTML = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                        + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                        + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                        + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                        + " xmlns=\"http://www.w3.org/1999/xhtml\">"
+                        + "<body><p>Content</p></body></html>";
+            newTag = "<html xmlns:v=\"urn:schemas-microsoft-com:vml\""
+                        + " xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+                        + " xmlns:w=\"urn:schemas-microsoft-com:office:word\""
+                        + " xmlns:m=\"http://schemas.microsoft.com/office/2004/12/omml\""
+                        + " xmlns=\"http://www.w3.org/1999/xhtml\">";
+        }
+
+
+        [Test]
+        public void TestCleaner()
+        {
+            initialHTML = new XmlNamespaceDefinitionsReplacer(newTag).Clean(initialHTML);
+            Assert.AreEqual(initialHTML, expectedHTML);
+
+        }
+
+
+    }
+}
Index: xword/XWord/AddinActions.cs
===================================================================
--- xword/XWord/AddinActions.cs	(revision 21692)
+++ xword/XWord/AddinActions.cs	(working copy)
@@ -17,6 +17,7 @@
 using Microsoft.Office.Core;
 using XWord.VstoExtensions;
 using XWiki.Logging;
+using ContentFiltering.Office.Word.Cleaners;
 
 namespace XWord
 {
@@ -32,7 +33,7 @@
         HtmlUtil htmlUtil = new HtmlUtil();
         //A dictionary, storing the converter instances for each opened page.
         Dictionary<String, ConversionManager> pageConverters = new Dictionary<string, ConversionManager>();
-        
+
         private const string DOWNLOAD_FOLDER = "XWord"; //"MyDocuments\XWord"
         private string TEMP_UPLOAD_FILES_FOLDER = Environment.SpecialFolder.ApplicationData.ToString() + @"\XWordTempData\UploadedFiles";
         private string TEMP_FILES_FOLDER = Environment.SpecialFolder.ApplicationData.ToString() + @"\XWordTempData\Pages";
@@ -50,8 +51,8 @@
         bool checkGrammarWithSpelling = false;
         bool checkSpellingAsYouType = false;
         bool contextualSpeller = false;
-        
 
+
         /// <summary>
         /// Generic webclient used for conneting to xwiki.
         /// </summary>        
@@ -124,7 +125,7 @@
             {
                 return false;
             }
-           
+
         }
 
         /// <summary>
@@ -228,7 +229,7 @@
         /// <param name="pageFullName">The full name of the wiki page that is being opened for editing.</param>
         public void EditPage(String pageFullName)
         {
-            if(IsOpened(pageFullName))
+            if (IsOpened(pageFullName))
             {
                 UserNotifier.Message("You are already editing this page.");
                 return;
@@ -237,7 +238,7 @@
             {
                 Client.Login(addin.username, addin.password);
             }
-            if(IsProtectedPage(pageFullName, addin.ProtectedPages))
+            if (IsProtectedPage(pageFullName, addin.ProtectedPages))
             {
                 String message = "You cannot edit this page." + Environment.NewLine;
                 message += "This page contains scrips that provide functionality to the wiki.";
@@ -266,10 +267,10 @@
                 String pageFullName = (String)_pageFullName;
                 //Read from server
                 String content = Client.GetRenderedPageContent(pageFullName);
-                
+
                 String localFileName = pageFullName.Replace(".", "-");
                 String folder = addin.PagesRepository + "TempPages";
-                ConvertToNormalFolder(folder);               
+                ConvertToNormalFolder(folder);
                 //content = new WebToLocalHTML(addin.serverURL, folder, localFileName).AdaptSource(content);
                 ConversionManager pageConverter;
                 if (pageConverters.ContainsKey(pageFullName))
@@ -303,7 +304,7 @@
                 addin.EditedPages.Add(localFileName, pageFullName);
                 addin.currentPageFullName = pageFullName;
                 //Open the file with Word
-                Word.Document doc =  OpenHTMLDocument(localFileName);
+                Word.Document doc = OpenHTMLDocument(localFileName);
                 #endregion//Open local document
 
                 //Mark just-opened document as saved. This prevents a silly confirmation box that
@@ -312,8 +313,8 @@
             }
             catch (IOException ex)
             {
-               UserNotifier.Error(ex.Message);
-            }            
+                UserNotifier.Error(ex.Message);
+            }
         }
 
         /// <summary>
@@ -427,10 +428,10 @@
         {
             if (addin.currentPageFullName == "" || addin.currentPageFullName == null)
             {
-                UserNotifier.Exclamation("You are not currently editing a wiki page") ;
+                UserNotifier.Exclamation("You are not currently editing a wiki page");
                 return;
             }
-            
+
             LoadingDialog loadingDialog = new LoadingDialog("Saving to wiki...");
             ThreadPool.QueueUserWorkItem(new WaitCallback(loadingDialog.ShowSyncDialog));
             SaveToXwiki();
@@ -471,12 +472,10 @@
                 sr.Close();
                 File.Delete(contentFilePath);
                 String cleanHTML = "";
-                cleanHTML = htmlUtil.RemoveSpecificTagContent(fileContent, "<!--", "-->");
-                cleanHTML = htmlUtil.RemoveSpecificTagContent(cleanHTML, "<![", "]>");
-                //cleanHTML = htmlUtil.RemoveSpecificTagContent(cleanHTML, "<meta", ">");
-                cleanHTML = htmlUtil.RemoveSpecificTagContent(cleanHTML, "<head>", "</head>");
-                //cleanHTML = htmlUtil.CleanHTML(cleanHTML, true);
-                //cleanHTML = htmlUtil.GetBodyContent(cleanHTML);
+
+                cleanHTML = new CommentsRemover().Clean(fileContent);
+                cleanHTML = new HeadSectionRemover().Clean(cleanHTML);
+
                 ConversionManager pageConverter;
                 if (pageConverters.ContainsKey(addin.currentPageFullName))
                 {
@@ -488,7 +487,8 @@
                                                           addin.currentPageFullName, Path.GetFileName(contentFilePath), addin.Client);
                 }
                 cleanHTML = pageConverter.ConvertFromWordToWeb(cleanHTML);
-                cleanHTML = htmlUtil.GetBodyContent(cleanHTML);
+                cleanHTML = new BodyContentExtractor().Clean(cleanHTML);
+
                 //openHTMLDocument(addin.currentLocalFilePath);
                 if (addin.AddinStatus.Syntax == null)
                 {
@@ -589,7 +589,7 @@
 
                 //If it's a new space, add it to the wiki structure and mark it as unpublished
                 List<Space> spaces = Globals.XWikiAddIn.wiki.spaces;
-                Space space=null;
+                Space space = null;
                 foreach (Space sp in spaces)
                 {
                     if (sp.name == spaceName)
@@ -607,7 +607,7 @@
                     }
                 }
 
-                if (space==null)
+                if (space == null)
                 {
                     space = new Space();
                     space.name = spaceName;
@@ -620,7 +620,7 @@
                     xwdoc.published = false;
                     xwdoc.space = spaceName;
                     space.documents.Add(xwdoc);
-                }                
+                }
             }
             catch (IOException ex)
             {
@@ -670,27 +670,27 @@
                 UserNotifier.Error("There was an error on the server. The pages in MSOffice space don't have programming rights");
                 hasErrors = true;
             }
-            else if(content.Contains(HTTPResponses.WRONG_REQUEST))
+            else if (content.Contains(HTTPResponses.WRONG_REQUEST))
             {
                 Log.Error("Server " + addin.serverURL + " wrong request");
                 UserNotifier.Error("Server error: Wrong request");
                 hasErrors = true;
             }
-            else if(content.Contains(HTTPResponses.NO_EDIT_RIGHTS))
+            else if (content.Contains(HTTPResponses.NO_EDIT_RIGHTS))
             {
                 Log.Information("User tried to edit a page on " + addin.serverURL + " whithout edit rights");
                 UserNotifier.Error("You dont have the right to edit this page");
                 hasErrors = true;
             }
-            else if(content.Contains(HTTPResponses.NO_GROOVY_RIGHTS))
+            else if (content.Contains(HTTPResponses.NO_GROOVY_RIGHTS))
             {
                 Log.Error("Server " + addin.serverURL + " error on parsing groovy - no groovy rights");
                 String message = "There was an error on the server." + Environment.NewLine;
-                message +=  "Please contact your server adminitrator. Error on executing groovy page in MSOffice space";
+                message += "Please contact your server adminitrator. Error on executing groovy page in MSOffice space";
                 UserNotifier.Error(message);
                 hasErrors = true;
             }
-            else if(content.Contains(HTTPResponses.INSUFFICIENT_MEMMORY))
+            else if (content.Contains(HTTPResponses.INSUFFICIENT_MEMMORY))
             {
                 Log.Error("Server " + addin.serverURL + " reported OutOfMemmoryException");
                 String message = "There was an error on the server." + Environment.NewLine;
@@ -698,7 +698,7 @@
                 UserNotifier.Error(message);
                 hasErrors = true;
             }
-            else if(content.Contains(HTTPResponses.VELOCITY_PARSER_ERROR))
+            else if (content.Contains(HTTPResponses.VELOCITY_PARSER_ERROR))
             {
                 Log.Error("Server " + addin.serverURL + " error when parsing page. ");
                 String message = "There was an error on the server" + Environment.NewLine;
@@ -721,7 +721,7 @@
                 foreach (String wildcard in wildcards)
                 {
                     String docFullName = doc.space + "." + doc.name;
-                    if(UtilityClass.IsWildcardMatch(wildcard, docFullName, true))
+                    if (UtilityClass.IsWildcardMatch(wildcard, docFullName, true))
                     {
                         wiki.RemoveXWikiDocument(doc);
                         break;
@@ -781,7 +781,7 @@
                 UserNotifier.Error(ioex.Message);
                 return false;
             }
-            
+
             return true;
         }
     }