reactos/irc/TechBot/CHMLibrary/CHMDecoding/HHCParser2.cs
Casper Hornstrup 9dab4509fa Import TechBot
svn path=/trunk/; revision=13064
2005-01-15 19:27:25 +00:00

221 lines
5.8 KiB
C#

using System;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>HHCParser</c> implements a parser for HHC contents files.
/// </summary>
// internal sealed class HHCParser : IHHCParser
public class HHCParser2
{
static private string m_text1="";
static private string m_text2="";
static private int m_CurrentPos=0;
/// <summary>
/// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
/// </summary>
/// <param name="hhcFile">string content of the hhc file</param>
/// <param name="chmFile">CHMFile instance</param>
/// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
{
DateTime StartTime=DateTime.Now;
ArrayList tocList = new ArrayList();
m_text2=hhcFile;
m_text1=hhcFile.ToLower();
int idx=m_text1.IndexOf("<ul>");
if (idx==-1)
return null;
m_CurrentPos=idx+4;
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ParseTree(tocList,chmFile);
DateTime EndTime=DateTime.Now;
TimeSpan Diff=EndTime-StartTime;
string x=Diff.ToString();
return tocList;
}
/// <summary>
/// Recursively parses a sitemap tree
/// </summary>
/// <param name="text">content text</param>
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
/// <param name="chmFile">CHMFile instance</param>
static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
{
bool bProcessing=true;
do
{
bProcessing=false;
// Indent
int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
{
bProcessing=true;
m_CurrentPos=idxa+4;
if (arrNodes.Count<1)
{
ParseTree(arrNodes,chmFile);
}
else
{
ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
}
continue;
}
// new item
if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
{
bProcessing=true;
m_CurrentPos=idxb+4;
int idx2=m_text1.IndexOf("<object",m_CurrentPos);
if (idx2!=-1)
{
int idx3=m_text1.IndexOf("</object>",idx2+7);
if (idx3!=-1)
{
string text=m_text2.Substring(idx2,idx3-idx2);
m_CurrentPos=idx3+9;
// Parse items in text.
TOCItem tocItem=ParseItems(text, chmFile);
if (tocItem!=null)
{
arrNodes.Add(tocItem);
}
}
}
}
// Undent
if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
{
m_CurrentPos=idxc+5;
bProcessing=true;
return;
}
}
while (bProcessing);
}
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
private static Regex ParamRE;
private static Regex AttributesRE;
/// <summary>
/// Parses tree nodes from the text
/// </summary>
/// <param name="itemstext">text containing the items</param>
/// <param name="arrNodes">arraylist where the nodes should be added</param>
/// <param name="chmFile">CHMFile instance</param>
private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
{
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
TOCItem tocItem = new TOCItem();
// read parameters
int nParamIndex = 0;
while( ParamRE.IsMatch(itemstext, nParamIndex) )
{
Match mP = ParamRE.Match(itemstext, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = attributeValue;
}
if( attributeName.ToLower() == "value")
{
paramValue = attributeValue;
}
nAttrIdx = mA.Index+mA.Length;
}
tocItem.Params[paramName] = paramValue;
switch(paramName.ToLower())
{
case "name":
{
tocItem.Name = paramValue;
};break;
case "local":
{
tocItem.Local = paramValue;
};break;
case "imagenumber":
{
tocItem.ImageIndex = Int32.Parse(paramValue);
if( tocItem.ImageIndex == 2)
tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
};break;
}
nParamIndex = mP.Index+mP.Length;
}
tocItem.ChmFile = chmFile.ChmFilePath;
return tocItem;
}
}
}