mirror of
https://github.com/reactos/reactos.git
synced 2025-08-06 14:32:56 +00:00
Import TechBot
svn path=/trunk/; revision=13064
This commit is contained in:
parent
568b27baeb
commit
9dab4509fa
94 changed files with 24386 additions and 0 deletions
220
irc/TechBot/CHMLibrary/CHMDecoding/HHCParser2.cs
Normal file
220
irc/TechBot/CHMLibrary/CHMDecoding/HHCParser2.cs
Normal file
|
@ -0,0 +1,220 @@
|
|||
using System;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace HtmlHelp.ChmDecoding
|
||||
{
|
||||
/// <summary>
|
||||
/// The class <c>HHCParser</c> implements a parser for HHC contents files.
|
||||
/// </summary>
|
||||
// internal sealed class HHCParser : IHHCParser
|
||||
public class HHCParser2
|
||||
{
|
||||
static private string m_text1="";
|
||||
static private string m_text2="";
|
||||
static private int m_CurrentPos=0;
|
||||
|
||||
/// <summary>
|
||||
/// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
|
||||
/// </summary>
|
||||
/// <param name="hhcFile">string content of the hhc file</param>
|
||||
/// <param name="chmFile">CHMFile instance</param>
|
||||
/// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
|
||||
public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
|
||||
{
|
||||
DateTime StartTime=DateTime.Now;
|
||||
|
||||
ArrayList tocList = new ArrayList();
|
||||
|
||||
m_text2=hhcFile;
|
||||
m_text1=hhcFile.ToLower();
|
||||
|
||||
int idx=m_text1.IndexOf("<ul>");
|
||||
if (idx==-1)
|
||||
return null;
|
||||
m_CurrentPos=idx+4;
|
||||
|
||||
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
|
||||
|
||||
ParseTree(tocList,chmFile);
|
||||
|
||||
DateTime EndTime=DateTime.Now;
|
||||
TimeSpan Diff=EndTime-StartTime;
|
||||
string x=Diff.ToString();
|
||||
|
||||
return tocList;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Recursively parses a sitemap tree
|
||||
/// </summary>
|
||||
/// <param name="text">content text</param>
|
||||
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
|
||||
/// <param name="chmFile">CHMFile instance</param>
|
||||
static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
|
||||
{
|
||||
bool bProcessing=true;
|
||||
do
|
||||
{
|
||||
bProcessing=false;
|
||||
|
||||
// Indent
|
||||
int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
|
||||
int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
|
||||
int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
|
||||
|
||||
if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
|
||||
{
|
||||
bProcessing=true;
|
||||
m_CurrentPos=idxa+4;
|
||||
if (arrNodes.Count<1)
|
||||
{
|
||||
ParseTree(arrNodes,chmFile);
|
||||
}
|
||||
else
|
||||
{
|
||||
ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// new item
|
||||
if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
|
||||
{
|
||||
|
||||
bProcessing=true;
|
||||
m_CurrentPos=idxb+4;
|
||||
|
||||
int idx2=m_text1.IndexOf("<object",m_CurrentPos);
|
||||
if (idx2!=-1)
|
||||
{
|
||||
int idx3=m_text1.IndexOf("</object>",idx2+7);
|
||||
if (idx3!=-1)
|
||||
{
|
||||
string text=m_text2.Substring(idx2,idx3-idx2);
|
||||
|
||||
m_CurrentPos=idx3+9;
|
||||
|
||||
// Parse items in text.
|
||||
TOCItem tocItem=ParseItems(text, chmFile);
|
||||
if (tocItem!=null)
|
||||
{
|
||||
arrNodes.Add(tocItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Undent
|
||||
if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
|
||||
{
|
||||
m_CurrentPos=idxc+5;
|
||||
bProcessing=true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
while (bProcessing);
|
||||
}
|
||||
|
||||
|
||||
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
|
||||
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
|
||||
private static Regex ParamRE;
|
||||
private static Regex AttributesRE;
|
||||
|
||||
/// <summary>
|
||||
/// Parses tree nodes from the text
|
||||
/// </summary>
|
||||
/// <param name="itemstext">text containing the items</param>
|
||||
/// <param name="arrNodes">arraylist where the nodes should be added</param>
|
||||
/// <param name="chmFile">CHMFile instance</param>
|
||||
private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
|
||||
{
|
||||
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
|
||||
|
||||
// get group-name indexes
|
||||
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
|
||||
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
|
||||
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
|
||||
|
||||
TOCItem tocItem = new TOCItem();
|
||||
|
||||
// read parameters
|
||||
int nParamIndex = 0;
|
||||
|
||||
while( ParamRE.IsMatch(itemstext, nParamIndex) )
|
||||
{
|
||||
Match mP = ParamRE.Match(itemstext, nParamIndex);
|
||||
|
||||
string innerP = mP.Groups[innerPTextIdx].Value;
|
||||
|
||||
string paramName = "";
|
||||
string paramValue = "";
|
||||
|
||||
int nAttrIdx = 0;
|
||||
|
||||
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
|
||||
{
|
||||
Match mA = AttributesRE.Match(innerP, nAttrIdx);
|
||||
|
||||
string attributeName = mA.Groups[nameIndex].Value;
|
||||
string attributeValue = mA.Groups[valueIndex].Value;
|
||||
string attributeTD = mA.Groups[tdIndex].Value;
|
||||
|
||||
if(attributeTD.Length > 0)
|
||||
{
|
||||
// delete the trailing textqualifier
|
||||
if( attributeValue.Length > 0)
|
||||
{
|
||||
int ltqi = attributeValue.LastIndexOf( attributeTD );
|
||||
|
||||
if(ltqi >= 0)
|
||||
{
|
||||
attributeValue = attributeValue.Substring(0,ltqi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( attributeName.ToLower() == "name")
|
||||
{
|
||||
paramName = attributeValue;
|
||||
}
|
||||
|
||||
if( attributeName.ToLower() == "value")
|
||||
{
|
||||
paramValue = attributeValue;
|
||||
}
|
||||
|
||||
nAttrIdx = mA.Index+mA.Length;
|
||||
}
|
||||
|
||||
tocItem.Params[paramName] = paramValue;
|
||||
switch(paramName.ToLower())
|
||||
{
|
||||
case "name":
|
||||
{
|
||||
tocItem.Name = paramValue;
|
||||
};break;
|
||||
case "local":
|
||||
{
|
||||
tocItem.Local = paramValue;
|
||||
};break;
|
||||
case "imagenumber":
|
||||
{
|
||||
tocItem.ImageIndex = Int32.Parse(paramValue);
|
||||
|
||||
if( tocItem.ImageIndex == 2)
|
||||
tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
|
||||
};break;
|
||||
}
|
||||
|
||||
nParamIndex = mP.Index+mP.Length;
|
||||
}
|
||||
|
||||
tocItem.ChmFile = chmFile.ChmFilePath;
|
||||
return tocItem;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue