Created
February 28, 2018 10:14
-
-
Save joostbroekhuizen/b561b182b79b792a9b5b5754a6f5c0cd to your computer and use it in GitHub Desktop.
Sitecore richtext field custom save event to clean up HTML
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /// | |
| /// Don't forget following config file patch for Sitecore: | |
| /// <configuration> | |
| /// <sitecore> | |
| /// <events> | |
| /// <event name="item:saved"> | |
| /// <handler type="YourNameSpace.Sitecore.Events.RichTextEditorSaveEvent, YourNameSpace.Sitecore" method="OnItemSaving"> | |
| /// <database>master</database> | |
| /// </handler> | |
| /// </event> | |
| /// </events> | |
| /// </sitecore> | |
| /// </configuration> | |
| using Sitecore.Data.Items; | |
| using Sitecore.Events; | |
| using System; | |
| using System.Collections.Generic; | |
| using System.Linq; | |
| using System.Text; | |
| using System.Threading.Tasks; | |
| using Sitecore.Collections; | |
| using Sitecore.SecurityModel; | |
| using Sitecore.Data.Fields; | |
| using System.Collections; | |
| using System.Xml; | |
| using Sitecore.Data; | |
| using HtmlAgilityPack; | |
| namespace YourNameSpace.Sitecore.Events | |
| { | |
| /// <summary> | |
| /// This on save event fires for all items, but only for rich text fields it executes logic. In this case it cleans up the rich text field's html. | |
| /// </summary> | |
| public class RichTextEditorSaveEvent | |
| { | |
| private static readonly SynchronizedCollection<ID> MProcess = new SynchronizedCollection<ID>(); | |
| public string Database | |
| { | |
| get; | |
| set; | |
| } | |
| public void OnItemSaving(object sender, EventArgs args) | |
| { | |
| var item = Event.ExtractParameter(args, 0) as Item; | |
| if (item == null) | |
| { | |
| return; | |
| } | |
| if ((item.Database != null && String.Compare(item.Database.Name, this.Database) != 0) || MProcess.Contains(item.ID)) | |
| { | |
| return; | |
| } | |
| MProcess.Add(item.ID); | |
| try | |
| { | |
| foreach (Field field in item.Fields) | |
| { | |
| if (!field.TypeKey.Equals("rich text", StringComparison.InvariantCultureIgnoreCase)) | |
| { | |
| continue; | |
| } | |
| var content = field.Value; | |
| if (!string.IsNullOrEmpty(content)) | |
| { | |
| content = content.Trim(); | |
| try | |
| { | |
| HtmlDocument htmlDocument = new HtmlDocument(); | |
| htmlDocument.LoadHtml(content); | |
| RemoveEmptyPTags(htmlDocument); | |
| RemovePTagAroundImages(htmlDocument); | |
| MoveSpanClassAndStyleToPTagIfIsDirectAndOnlyChild(htmlDocument); | |
| content = htmlDocument.DocumentNode.InnerHtml; | |
| } | |
| catch (Exception) | |
| { | |
| //Apparently no html or not valid, in this case wrap in <p> | |
| if (!content.Contains("<p>")) | |
| { | |
| content = "<p>" + content + "</p>"; | |
| } | |
| } | |
| using (new SecurityDisabler()) | |
| { | |
| item.Editing.BeginEdit(); | |
| field.Value = content; | |
| item.Editing.EndEdit(); | |
| } | |
| } | |
| } | |
| } | |
| catch (Exception) | |
| { | |
| } | |
| finally | |
| { | |
| MProcess.Remove(item.ID); | |
| } | |
| } | |
| /// <summary> | |
| /// Moves span attributes to parent <p></p> tag moves span contents to parent as well, then removes span. This to get cleaner HTML. | |
| /// </summary> | |
| /// <param name="content"></param> | |
| private void MoveSpanClassAndStyleToPTagIfIsDirectAndOnlyChild(HtmlDocument content) | |
| { | |
| HtmlNodeCollection spanNodes = content.DocumentNode.SelectNodes("//span"); | |
| if (spanNodes != null && spanNodes.Count > 0) | |
| { | |
| foreach (HtmlNode spanTag in spanNodes) | |
| { | |
| if (spanTag.ParentNode != null && spanTag.ParentNode.Name == "p" && spanTag.ParentNode.ChildNodes.Count == 1) | |
| { | |
| string spanContents = spanTag.InnerHtml; | |
| foreach (HtmlAttribute attr in spanTag.Attributes) | |
| { | |
| if (spanTag.ParentNode.Attributes != null && spanTag.ParentNode.Attributes[attr.Name] != null) | |
| { | |
| spanTag.ParentNode.Attributes[attr.Name].Value += !string.IsNullOrEmpty(spanTag.ParentNode.Attributes[attr.Name].Value) ? " " + attr.Value : attr.Value; | |
| } | |
| else | |
| { | |
| spanTag.ParentNode.Attributes.Add(attr.Name, attr.Value); | |
| } | |
| } | |
| HtmlNode parent = spanTag.ParentNode; | |
| parent.RemoveChild(spanTag); | |
| parent.InnerHtml += spanContents; | |
| } | |
| } | |
| } | |
| } | |
| /// <summary> | |
| /// Images can get empty <p></p> tags around them when content editor inputs a linebreak after inserting the image. This is undesirable. This method removes such tags and sets image on parent | |
| /// </summary> | |
| /// <param name="content"></param> | |
| private void RemovePTagAroundImages(HtmlDocument content) | |
| { | |
| HtmlNodeCollection imgNodes = content.DocumentNode.SelectNodes("//img"); | |
| if (imgNodes != null && imgNodes.Count > 0) | |
| { | |
| foreach (HtmlNode imgTag in imgNodes) | |
| { | |
| if (imgTag.ParentNode.Name == "p") | |
| { | |
| string xmlToPreserve = imgTag.ParentNode.InnerHtml; | |
| HtmlNode grantParent = imgTag.ParentNode.ParentNode; | |
| grantParent.RemoveChild(imgTag.ParentNode); | |
| grantParent.InnerHtml += xmlToPreserve; | |
| } | |
| } | |
| } | |
| } | |
| /// <summary> | |
| /// Cleans html with empty <p></p> tags. Content editor should use <br/> line breaks instead, or spacing should be realized with CSS. | |
| /// </summary> | |
| /// <param name="content"></param> | |
| private void RemoveEmptyPTags(HtmlDocument content) | |
| { | |
| HtmlNodeCollection pNodes = content.DocumentNode.SelectNodes("//p"); | |
| if (pNodes != null && pNodes.Count > 0) | |
| { | |
| foreach (HtmlNode pTag in pNodes) | |
| { | |
| if (string.IsNullOrWhiteSpace(pTag.InnerHtml) || pTag.InnerHtml == " " || pTag.InnerHtml == "\n" || pTag.InnerHtml == "\n\n") | |
| { | |
| pTag.ParentNode.RemoveChild(pTag); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment