mirror of
https://github.com/reactos/reactos.git
synced 2024-10-30 19:41:57 +00:00
221 lines
5.8 KiB
C#
221 lines
5.8 KiB
C#
|
using System;
|
||
|
using System.Collections;
|
||
|
using System.Text;
|
||
|
using System.Text.RegularExpressions;
|
||
|
|
||
|
namespace HtmlHelp.ChmDecoding
|
||
|
{
|
||
|
/// <summary>
|
||
|
/// The class <c>HHCParser</c> implements a parser for HHC contents files.
|
||
|
/// </summary>
|
||
|
// internal sealed class HHCParser : IHHCParser
|
||
|
public class HHCParser2
|
||
|
{
|
||
|
static private string m_text1="";
|
||
|
static private string m_text2="";
|
||
|
static private int m_CurrentPos=0;
|
||
|
|
||
|
/// <summary>
|
||
|
/// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
|
||
|
/// </summary>
|
||
|
/// <param name="hhcFile">string content of the hhc file</param>
|
||
|
/// <param name="chmFile">CHMFile instance</param>
|
||
|
/// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
|
||
|
public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
|
||
|
{
|
||
|
DateTime StartTime=DateTime.Now;
|
||
|
|
||
|
ArrayList tocList = new ArrayList();
|
||
|
|
||
|
m_text2=hhcFile;
|
||
|
m_text1=hhcFile.ToLower();
|
||
|
|
||
|
int idx=m_text1.IndexOf("<ul>");
|
||
|
if (idx==-1)
|
||
|
return null;
|
||
|
m_CurrentPos=idx+4;
|
||
|
|
||
|
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||
|
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||
|
|
||
|
ParseTree(tocList,chmFile);
|
||
|
|
||
|
DateTime EndTime=DateTime.Now;
|
||
|
TimeSpan Diff=EndTime-StartTime;
|
||
|
string x=Diff.ToString();
|
||
|
|
||
|
return tocList;
|
||
|
}
|
||
|
|
||
|
/// <summary>
|
||
|
/// Recursively parses a sitemap tree
|
||
|
/// </summary>
|
||
|
/// <param name="text">content text</param>
|
||
|
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
|
||
|
/// <param name="chmFile">CHMFile instance</param>
|
||
|
static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
|
||
|
{
|
||
|
bool bProcessing=true;
|
||
|
do
|
||
|
{
|
||
|
bProcessing=false;
|
||
|
|
||
|
// Indent
|
||
|
int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
|
||
|
int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
|
||
|
int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
|
||
|
|
||
|
if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
|
||
|
{
|
||
|
bProcessing=true;
|
||
|
m_CurrentPos=idxa+4;
|
||
|
if (arrNodes.Count<1)
|
||
|
{
|
||
|
ParseTree(arrNodes,chmFile);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
|
||
|
}
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// new item
|
||
|
if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
|
||
|
{
|
||
|
|
||
|
bProcessing=true;
|
||
|
m_CurrentPos=idxb+4;
|
||
|
|
||
|
int idx2=m_text1.IndexOf("<object",m_CurrentPos);
|
||
|
if (idx2!=-1)
|
||
|
{
|
||
|
int idx3=m_text1.IndexOf("</object>",idx2+7);
|
||
|
if (idx3!=-1)
|
||
|
{
|
||
|
string text=m_text2.Substring(idx2,idx3-idx2);
|
||
|
|
||
|
m_CurrentPos=idx3+9;
|
||
|
|
||
|
// Parse items in text.
|
||
|
TOCItem tocItem=ParseItems(text, chmFile);
|
||
|
if (tocItem!=null)
|
||
|
{
|
||
|
arrNodes.Add(tocItem);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Undent
|
||
|
if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
|
||
|
{
|
||
|
m_CurrentPos=idxc+5;
|
||
|
bProcessing=true;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
while (bProcessing);
|
||
|
}
|
||
|
|
||
|
|
||
|
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
|
||
|
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
|
||
|
private static Regex ParamRE;
|
||
|
private static Regex AttributesRE;
|
||
|
|
||
|
/// <summary>
|
||
|
/// Parses tree nodes from the text
|
||
|
/// </summary>
|
||
|
/// <param name="itemstext">text containing the items</param>
|
||
|
/// <param name="arrNodes">arraylist where the nodes should be added</param>
|
||
|
/// <param name="chmFile">CHMFile instance</param>
|
||
|
private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
|
||
|
{
|
||
|
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
|
||
|
|
||
|
// get group-name indexes
|
||
|
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
|
||
|
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
|
||
|
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
|
||
|
|
||
|
TOCItem tocItem = new TOCItem();
|
||
|
|
||
|
// read parameters
|
||
|
int nParamIndex = 0;
|
||
|
|
||
|
while( ParamRE.IsMatch(itemstext, nParamIndex) )
|
||
|
{
|
||
|
Match mP = ParamRE.Match(itemstext, nParamIndex);
|
||
|
|
||
|
string innerP = mP.Groups[innerPTextIdx].Value;
|
||
|
|
||
|
string paramName = "";
|
||
|
string paramValue = "";
|
||
|
|
||
|
int nAttrIdx = 0;
|
||
|
|
||
|
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
|
||
|
{
|
||
|
Match mA = AttributesRE.Match(innerP, nAttrIdx);
|
||
|
|
||
|
string attributeName = mA.Groups[nameIndex].Value;
|
||
|
string attributeValue = mA.Groups[valueIndex].Value;
|
||
|
string attributeTD = mA.Groups[tdIndex].Value;
|
||
|
|
||
|
if(attributeTD.Length > 0)
|
||
|
{
|
||
|
// delete the trailing textqualifier
|
||
|
if( attributeValue.Length > 0)
|
||
|
{
|
||
|
int ltqi = attributeValue.LastIndexOf( attributeTD );
|
||
|
|
||
|
if(ltqi >= 0)
|
||
|
{
|
||
|
attributeValue = attributeValue.Substring(0,ltqi);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if( attributeName.ToLower() == "name")
|
||
|
{
|
||
|
paramName = attributeValue;
|
||
|
}
|
||
|
|
||
|
if( attributeName.ToLower() == "value")
|
||
|
{
|
||
|
paramValue = attributeValue;
|
||
|
}
|
||
|
|
||
|
nAttrIdx = mA.Index+mA.Length;
|
||
|
}
|
||
|
|
||
|
tocItem.Params[paramName] = paramValue;
|
||
|
switch(paramName.ToLower())
|
||
|
{
|
||
|
case "name":
|
||
|
{
|
||
|
tocItem.Name = paramValue;
|
||
|
};break;
|
||
|
case "local":
|
||
|
{
|
||
|
tocItem.Local = paramValue;
|
||
|
};break;
|
||
|
case "imagenumber":
|
||
|
{
|
||
|
tocItem.ImageIndex = Int32.Parse(paramValue);
|
||
|
|
||
|
if( tocItem.ImageIndex == 2)
|
||
|
tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
|
||
|
};break;
|
||
|
}
|
||
|
|
||
|
nParamIndex = mP.Index+mP.Length;
|
||
|
}
|
||
|
|
||
|
tocItem.ChmFile = chmFile.ChmFilePath;
|
||
|
return tocItem;
|
||
|
}
|
||
|
}
|
||
|
}
|