1: using System;
2: using BlogEngine.Core;
3: using BlogEngine.Core.Web.Controls;
4: using BlogEngine.Core.Web.HttpHandlers;
5: using System.Text.RegularExpressions;
6: using System.Collections.Generic;
7: using System.Web;
8: using System.Text;
9: using System.IO;
10:
11: /// <summary>
12: /// This class is a BlogEngine Extension that generates a TOC (Table Of Contents) automatically based on the occurances
13: /// of h1 - h6 tags found in the body of posts and pages
14: /// </summary>
15:
16: [Extension("Automatically generates a Table Of Contents for Posts and Pages with anchors to html headings h1-h6", "1.3", "<a href=\"http://www.tellingmachine.com\">Klaus Graefensteiner</a>")]
17: public class AutoGenTableOfContents
18: {
19:
20: public AutoGenTableOfContents()
21: {
22: Post.Saving += new EventHandler<SavedEventArgs>(Post_Saving);
23: Page.Saving += new EventHandler<SavedEventArgs>(Page_Saving);
24: }
25:
26: void Post_Saving(object sender, SavedEventArgs e)
27: {
28: if ((e.Action == SaveAction.Insert || e.Action == SaveAction.Update) && BEX.TOC.IsDesired(((Post)sender).Content))
29: {
30: Post post = sender as Post;
31: post.Content = BEX.TOC.UpdateHTML(post.Content);
32: }
33: }
34: void Page_Saving(object sender, SavedEventArgs e)
35: {
36: if ((e.Action == SaveAction.Insert || e.Action == SaveAction.Update) && BEX.TOC.IsDesired(((Page)sender).Content))
37: {
38: Page page = sender as Page;
39: page.Content = BEX.TOC.UpdateHTML(page.Content);
40: }
41: }
42:
43: }
44:
45:
46:
47: namespace BEX
48: {
49: /// <summary>
50: /// This class represents the Table Of Contents data model
51: /// </summary>
52: public class TOC
53: {
54: public TOC()
55: {
56:
57: }
58:
59: private const string Token = "tocautogen";
60:
61: public const string AnchorPrefix = "toc_";
62:
63: public const string xl = "ul"; //HTML list type (ul, ol, dl)
64:
65:
66: /// <summary>
67: /// The regular expression used to find h1-6 heading tags.
68: /// </summary>
69: private static readonly Regex AnalyzeHeadingsRegex = new Regex(@"<\s*h(?'LEVEL'[1-6])(?'RAWATTRIBUTES'.*?)>(?'RAWHEADING'.*?)<\s*/\s*h\k'LEVEL'\s*>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
70:
71: private static List<Heading> headings = new List<Heading>();
72:
73: /// <summary>
74: /// The regular expression used to heading reference id tags before the headings e.g. id=toc_1.3.23.2
75: /// </summary>
76: private static readonly Regex ReplaceAnchorRegex = new Regex("<\\s*span\\s+id=\"" + AnchorPrefix + "\\d+\"\\s*><\\s*/\\s*span\\s*>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
77:
78: /// <summary>
79: /// The regular expression used detect the rendered table of contents
80: /// </summary>
81: private static readonly Regex FindOLRegex = new Regex("<\\s*div\\s+.*?id=\"" + Token + "\"", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
82:
83: /// <summary>
84: /// The regular expression used to completely remove or replace the table of contents
85: /// </summary>
86: private static readonly Regex ReplaceOLRegex = new Regex("<\\s*div\\s+.*?id=\"" + Token + "\".*?>.*?<\\s*/\\s*div\\s*>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
87:
88: public static bool IsDesired(string htmltext)
89: {
90: return (true == htmltext.Contains("[" + Token + "]") || true == FindOLRegex.IsMatch(htmltext));
91: }
92:
93: public static string UpdateHTML(string htmltext)
94: {
95: Heading.Init();
96: headings.Clear();
97:
98: int Level = 1;
99: string HeadingText = "";
100: string AttributeText = "";
101: string MatchText = "";
102: string PreceedingText = "";
103: int FormerMatchStartPosition = 0;
104:
105: //Remove existing TOC and replace it with our token
106: htmltext = ReplaceOLRegex.Replace(htmltext, "[" + Token + "]");
107:
108: //Clean out existing anchor span tag
109: htmltext = ReplaceAnchorRegex.Replace(htmltext, String.Empty);
110:
111: MatchCollection HeadingMatches = AnalyzeHeadingsRegex.Matches(htmltext);
112: string TextAfterLastMatch = string.Empty;
113:
114: //Analyze Headings
115: for( int i = 0; i < HeadingMatches.Count; i++)
116: {
117: Level = Convert.ToInt32(HeadingMatches[i].Groups["LEVEL"].Value);
118: HeadingText = HeadingMatches[i].Groups["RAWHEADING"].Value;
119: AttributeText = HeadingMatches[i].Groups["RAWATTRIBUTES"].Value;
120: MatchText = HeadingMatches[i].Groups[0].Value;
121: PreceedingText = htmltext.Substring(FormerMatchStartPosition, HeadingMatches[i].Index - FormerMatchStartPosition);
122: FormerMatchStartPosition = HeadingMatches[i].Index + HeadingMatches[i].Length;
123: headings.Add(new Heading(Level, HeadingText, AttributeText, MatchText, PreceedingText));
124:
125: if (i == HeadingMatches.Count - 1)
126: {
127: TextAfterLastMatch = htmltext.Substring(HeadingMatches[i].Index + HeadingMatches[i].Length);
128: }
129: }
130:
131: //Build Table of contents
132: string TOCHtmlText = String.Empty;
133: TOCHtmlText = GenerateTableOfContents();
134:
135: //Build new html file
136: StringBuilder sb = new StringBuilder();
137: foreach (Heading h in headings)
138: {
139: sb.Append(h.ToString());
140: }
141: sb.Append(TextAfterLastMatch);
142: htmltext = sb.ToString();
143:
144: //Inject table of contents
145: return htmltext.Replace("[" + Token + "]", TOCHtmlText);
146: }
147:
148: private static string GenerateTableOfContents()
149: {
150: int PreviousLevel = Heading.LowestNumber;
151: for (int i = 0; i < headings.Count; i++)
152: {
153: headings[i].LevelDeltaToPreviousInList = headings[i].Level - PreviousLevel;
154: PreviousLevel = headings[i].Level;
155: }
156:
157: StringBuilder sb = new StringBuilder();
158: //Add xoxo class to ul to make it an Outline microformat: http://microformats.org/wiki/xoxo
159: sb.Append("<div id=\"" + Token + "\"><" + xl + " class=\"xoxo\">");
160:
161: int stackcheck = 0;
162:
163: foreach (Heading h in headings)
164: {
165: if (h.LevelDeltaToPreviousInList > 0)
166: {
167: for (int i = 0; i < h.LevelDeltaToPreviousInList; i++)
168: {
169: sb.Append("<li>[?]<" + xl + ">");
170: stackcheck++;
171: }
172: }
173:
174: if (h.LevelDeltaToPreviousInList < 0)
175: {
176: for (int i = 0; i < (- h.LevelDeltaToPreviousInList); i++)
177: {
178: sb.Append("</" + xl + "></li>");
179: stackcheck--;
180: }
181: }
182:
183: sb.Append("<li><a href=\"#" + h.AnchorString + "\">" + h.HeadingString + "</a></li>");
184:
185:
186: }
187: for (int i = 0; i < Heading.LevelOfLastHeading - Heading.LowestNumber; i++)
188: {
189: sb.Append("</" + xl + "></li>");
190: stackcheck--;
191: }
192:
193: sb.Append("</" + xl + "></div>");
194:
195: //sb.Append(stackcheck.ToString()); //Uncomment the preceding statement to debug the building of the nested lists
196:
197: //Remove extra [?} list items and move nested list items directly under the parent item instead of
198: //under a [?] tag
199: return sb.ToString().Replace("</li><li>[?]", string.Empty);
200: }
201:
202: }
203:
204: // <summary>
205: /// This class encapsulates all information that resulted in parsing the heading tags out of an html file
206: /// </summary>
207: public class Heading
208: {
209: public Heading()
210: {
211:
212: }
213: public Heading(int level, string heading, string attributes, string match, string preceeding)
214: {
215: _Identity = _IdentityCounter++;
216: _Level = level;
217: _HeadingString = heading;
218: _AttributeString = attributes;
219: _MatchString = match;
220: _PreceedingString = preceeding;
221: _AnchorString = TOC.AnchorPrefix + Identity.ToString();
222:
223: if (level < LowestNumber)
224: {
225: LowestNumber = level;
226: }
227:
228: if (level > HighestNumber)
229: {
230: HighestNumber = level;
231: }
232:
233: LevelOfLastHeading = level;
234: }
235:
236: public static void Init()
237: {
238: _IdentityCounter = 0;
239: _LowestNumber = 6;
240: _HighestNumber = 1;
241: }
242:
243: private int _Level;
244: private int _LevelDeltaToPreviousInList;
245: private string _HeadingString;
246: private string _AttributeString;
247: private string _MatchString;
248: private string _ID;
249: private string _PreceedingString;
250: private static int _IdentityCounter = 0;
251: private int _Identity;
252: private static int _LowestNumber = 6;
253: private static int _HighestNumber = 1;
254: private static int _LevelOfLastHeading = 6;
255: private string _AnchorString;
256:
257:
258: public override string ToString()
259: {
260: return this.PreceedingString + "<span id=\"" + this.AnchorString + "\"></span>" + this.MatchString;
261: }
262:
263: public int Level
264: {
265: get { return _Level; }
266: }
267:
268:
269: public string MatchString
270: {
271: get { return _MatchString; }
272: }
273:
274:
275: public string AttributeString
276: {
277: get { return _AttributeString; }
278: set { _AttributeString = value; }
279: }
280:
281: public string AnchorString
282: {
283: get { return _AnchorString; }
284: set { _AnchorString = value; }
285: }
286:
287:
288: public string HeadingString
289: {
290: get { return _HeadingString; }
291: set { _HeadingString = value; }
292: }
293:
294:
295: public string ID
296: {
297: get { return _ID; }
298: set { _ID = value; }
299: }
300:
301:
302: public string PreceedingString
303: {
304: get { return _PreceedingString; }
305: }
306:
307: public int Identity
308: {
309: get { return _Identity; }
310: }
311:
312: public static int LowestNumber
313: {
314: get { return _LowestNumber; }
315: set { _LowestNumber = value; }
316: }
317:
318: public static int HighestNumber
319: {
320: get { return _HighestNumber; }
321: set { _HighestNumber = value; }
322: }
323:
324: public static int LevelOfLastHeading
325: {
326: get { return _LevelOfLastHeading; }
327: set { _LevelOfLastHeading = value; }
328: }
329:
330: public int LevelDeltaToPreviousInList
331: {
332: get { return _LevelDeltaToPreviousInList; }
333: set { _LevelDeltaToPreviousInList = value; }
334: }
335:
336: }
337:
338: }