Import TechBot

svn path=/trunk/; revision=13064
This commit is contained in:
Casper Hornstrup 2005-01-15 19:27:25 +00:00
parent 568b27baeb
commit 9dab4509fa
94 changed files with 24386 additions and 0 deletions

View file

@ -0,0 +1,274 @@
using System;
using System.Collections;
using System.IO;
using System.Text;
using System.Globalization;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>BinaryReaderHelp</c> implements static helper methods for extracting binary data
/// from a binary reader object.
/// </summary>
internal class BinaryReaderHelp
{
/// <summary>
/// Internal helper method to extract null-terminated strings from a binary reader
/// </summary>
/// <param name="binReader">reference to the binary reader</param>
/// <param name="offset">offset in the stream</param>
/// <param name="noOffset">true if the offset value should be used</param>
/// <param name="encoder">encoder used for text encoding</param>
/// <returns>An extracted string value</returns>
internal static string ExtractString(ref BinaryReader binReader, int offset, bool noOffset, Encoding encoder)
{
string strReturn = "";
if(encoder == null)
encoder = Encoding.ASCII;
ArrayList nameBytes = new ArrayList();
byte curByte;
if(!noOffset)
binReader.BaseStream.Seek(offset, SeekOrigin.Begin);
if(binReader.BaseStream.Position >= binReader.BaseStream.Length)
return "";
curByte = binReader.ReadByte();
while( (curByte != (byte)0) && (binReader.BaseStream.Position < binReader.BaseStream.Length) )
{
nameBytes.Add( curByte );
curByte = binReader.ReadByte();
}
byte[] name = (byte[]) (nameBytes.ToArray(System.Type.GetType("System.Byte")));
strReturn = encoder.GetString(name,0,name.Length);
return strReturn;
}
/// <summary>
/// Internal helper method to extract a string with a specific length from the binary reader
/// </summary>
/// <param name="binReader">reference to the binary reader</param>
/// <param name="length">length of the string (number of bytes)</param>
/// <param name="offset">offset in the stream</param>
/// <param name="noOffset">true if the offset value should be used</param>
/// <param name="encoder">encoder used for text encoding</param>
/// <returns>An extracted string value</returns>
internal static string ExtractString(ref BinaryReader binReader, int length, int offset, bool noOffset, Encoding encoder)
{
string strReturn = "";
if(length == 0)
return "";
if(encoder == null)
encoder = Encoding.ASCII;
ArrayList nameBytes = new ArrayList();
byte curByte;
if(!noOffset)
binReader.BaseStream.Seek(offset, SeekOrigin.Begin);
if(binReader.BaseStream.Position >= binReader.BaseStream.Length)
return "";
curByte = binReader.ReadByte();
while( (curByte != (byte)0) && (nameBytes.Count < length) && (binReader.BaseStream.Position < binReader.BaseStream.Length) )
{
nameBytes.Add( curByte );
if(nameBytes.Count < length)
curByte = binReader.ReadByte();
}
byte[] name = (byte[]) (nameBytes.ToArray(System.Type.GetType("System.Byte")));
strReturn = encoder.GetString(name,0,name.Length);
return strReturn;
}
/// <summary>
/// Internal helper method to extract a string with a specific length from the binary reader
/// </summary>
/// <param name="binReader">reference to the binary reader</param>
/// <param name="bFoundTerminator">reference to a bool vairable which will receive true if the
/// string terminator \0 was found. false indicates that the end of the stream was reached.</param>
/// <param name="offset">offset in the stream</param>
/// <param name="noOffset">true if the offset value should be used</param>
/// <param name="encoder">encoder used for text encoding</param>
/// <returns>An extracted string value</returns>
internal static string ExtractString(ref BinaryReader binReader, ref bool bFoundTerminator, int offset, bool noOffset, Encoding encoder)
{
string strReturn = "";
ArrayList nameBytes = new ArrayList();
byte curByte;
if(encoder == null)
encoder = Encoding.ASCII;
if(!noOffset)
binReader.BaseStream.Seek(offset, SeekOrigin.Begin);
if(binReader.BaseStream.Position >= binReader.BaseStream.Length)
return "";
curByte = binReader.ReadByte();
while( (curByte != (byte)0) && (binReader.BaseStream.Position < binReader.BaseStream.Length) )
{
nameBytes.Add( curByte );
curByte = binReader.ReadByte();
if( curByte == (byte)0 )
{
bFoundTerminator = true;
}
}
byte[] name = (byte[]) (nameBytes.ToArray(System.Type.GetType("System.Byte")));
strReturn = encoder.GetString(name,0,name.Length);
return strReturn;
}
/// <summary>
/// Internal helper method to extract a null-terminated UTF-16/UCS-2 strings from a binary reader
/// </summary>
/// <param name="binReader">reference to the binary reader</param>
/// <param name="offset">offset in the stream</param>
/// <param name="noOffset">true if the offset value should be used</param>
/// <param name="encoder">encoder used for text encoding</param>
/// <returns>An extracted string value</returns>
internal static string ExtractUTF16String(ref BinaryReader binReader, int offset, bool noOffset, Encoding encoder)
{
string strReturn = "";
ArrayList nameBytes = new ArrayList();
byte curByte;
int lastByte=-1;
if(!noOffset)
binReader.BaseStream.Seek(offset, SeekOrigin.Begin);
if(binReader.BaseStream.Position >= binReader.BaseStream.Length)
return "";
if(encoder == null)
encoder = Encoding.Unicode;
curByte = binReader.ReadByte();
int nCnt = 0;
while( ((curByte != (byte)0) || (lastByte != 0) ) && (binReader.BaseStream.Position < binReader.BaseStream.Length) )
{
nameBytes.Add( curByte );
if(nCnt%2 == 0)
lastByte = (int)curByte;
curByte = binReader.ReadByte();
nCnt++;
}
byte[] name = (byte[]) (nameBytes.ToArray(System.Type.GetType("System.Byte")));
strReturn = Encoding.Unicode.GetString(name,0,name.Length);
// apply text encoding
name = Encoding.Default.GetBytes(strReturn);
strReturn = encoder.GetString(name,0,name.Length);
return strReturn;
}
/// <summary>
/// Internal helper for reading ENCINT encoded integer values
/// </summary>
/// <param name="binReader">reference to the reader</param>
/// <returns>a long value</returns>
internal static long ReadENCINT(ref BinaryReader binReader)
{
long nRet = 0;
byte buffer = 0;
int shift = 0;
if(binReader.BaseStream.Position >= binReader.BaseStream.Length)
return nRet;
do
{
buffer = binReader.ReadByte();
nRet |= ((long)((buffer & (byte)0x7F))) << shift;
shift += 7;
}while ( (buffer & (byte)0x80) != 0);
return nRet;
}
/// <summary>
/// Reads an s/r encoded value from the byte array and decodes it into an integer
/// </summary>
/// <param name="wclBits">a byte array containing all bits (contains only 0 or 1 elements)</param>
/// <param name="s">scale param for encoding</param>
/// <param name="r">root param for encoding</param>
/// <param name="nBitIndex">current index in the wclBits array</param>
/// <returns>Returns an decoded integer value.</returns>
internal static int ReadSRItem(byte[] wclBits, int s, int r, ref int nBitIndex)
{
int nRet = 0;
int q = r;
int nPref1Cnt = 0;
while( wclBits[nBitIndex++] == 1)
{
nPref1Cnt++;
}
if(nPref1Cnt == 0)
{
int nMask = 0;
for(int nbits=0; nbits<q;nbits++)
{
nMask |= ( 0x01 & (int)wclBits[nBitIndex]) << (q-nbits-1);
nBitIndex++;
}
nRet = nMask;
}
else
{
q += (nPref1Cnt-1);
int nMask = 0;
int nRMaxValue = 0;
for(int nbits=0; nbits<q;nbits++)
{
nMask |= ( 0x01 & (int)wclBits[nBitIndex]) << (q-nbits-1);
nBitIndex++;
}
for(int nsv=0; nsv<r; nsv++)
{
nRMaxValue = nRMaxValue << 1;
nRMaxValue |= 0x1;
}
nRMaxValue++; // startvalue of s/r encoding with 1 prefixing '1'
nRMaxValue *= (int) Math.Pow((double)2, (double)(nPref1Cnt-1));
nRet = nRMaxValue + nMask;
}
return nRet;
}
}
}

View file

@ -0,0 +1,325 @@
using System;
using System.IO;
using System.Collections;
using System.Collections.Specialized;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMBtree</c> implements methods/properties to decode the binary help index.
/// This class automatically creates an index arraylist for the current CHMFile instance.
/// It does not store the index internally !
/// </summary>
/// <remarks>The binary index can be found in the storage file $WWKeywordLinks/BTree</remarks>
internal sealed class CHMBtree : IDisposable
{
/// <summary>
/// Constant specifying the size of the string blocks
/// </summary>
private const int BLOCK_SIZE = 2048;
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing flags
/// </summary>
private int _flags = 0;
/// <summary>
/// Internal member storing the data format
/// </summary>
private byte[] _dataFormat = new byte[16];
/// <summary>
/// Internal member storing the index of the last listing block
/// </summary>
private int _indexOfLastListingBlock = 0;
/// <summary>
/// Internal member storing the index of the root block
/// </summary>
private int _indexOfRootBlock = 0;
/// <summary>
/// Internal member storing the number of blocks
/// </summary>
private int _numberOfBlocks = 0;
/// <summary>
/// Internal member storing the tree depth.
/// (1 if no index blocks, 2 one level of index blocks, ...)
/// </summary>
private int _treeDepth = 0;
/// <summary>
/// Internal member storing the number of keywords in the file
/// </summary>
private int _numberOfKeywords = 0;
/// <summary>
/// Internal member storing the codepage
/// </summary>
private int _codePage = 0;
/// <summary>
/// true if the index is from a CHI or CHM file, else CHW
/// </summary>
private bool _isCHI_CHM = true;
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Internal flag specifying if we have to read listing or index blocks
/// </summary>
private bool _readListingBlocks = true;
/// <summary>
/// Internal member storing an indexlist of the current file.
/// </summary>
private ArrayList _indexList = new ArrayList();
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the $WWKeywordLinks/BTree file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMBtree(byte[] binaryFileData, CHMFile associatedFile)
{
if( associatedFile == null)
{
throw new ArgumentException("CHMBtree.ctor() - Associated CHMFile must not be null !", "associatedFile");
}
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
// clear internal binary data after extraction
_binaryFileData = null;
}
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nCurOffset = 0;
int nTemp = 0;
// decode header
binReader.ReadChars(2); // 2chars signature (not important)
_flags = (int)binReader.ReadInt16(); // WORD flags
binReader.ReadInt16(); // size of blocks (always 2048)
_dataFormat = binReader.ReadBytes(16);
binReader.ReadInt32(); // unknown DWORD
_indexOfLastListingBlock = binReader.ReadInt32();
_indexOfRootBlock = binReader.ReadInt32();
binReader.ReadInt32(); // unknown DWORD
_numberOfBlocks = binReader.ReadInt32();
_treeDepth = binReader.ReadInt16();
_numberOfKeywords = binReader.ReadInt32();
_codePage = binReader.ReadInt32();
binReader.ReadInt32(); // lcid DWORD
nTemp = binReader.ReadInt32();
_isCHI_CHM = (nTemp==1);
binReader.ReadInt32(); // unknown DWORD
binReader.ReadInt32(); // unknown DWORD
binReader.ReadInt32(); // unknown DWORD
binReader.ReadInt32(); // unknown DWORD
// end of header decode
while( (memStream.Position < memStream.Length) && (bRet) )
{
nCurOffset = (int)memStream.Position;
byte [] dataBlock = binReader.ReadBytes(BLOCK_SIZE);
bRet &= DecodeBlock(dataBlock, ref nCurOffset, _treeDepth-1);
}
return bRet;
}
/// <summary>
/// Decodes a block of url-string data
/// </summary>
/// <param name="dataBlock">block of data</param>
/// <param name="nOffset">current file offset</param>
/// <param name="indexBlocks">number of index blocks</param>
/// <returns>true if succeeded</returns>
private bool DecodeBlock( byte[] dataBlock, ref int nOffset, int indexBlocks )
{
bool bRet = true;
int nblockOffset = nOffset;
MemoryStream memStream = new MemoryStream(dataBlock);
BinaryReader binReader = new BinaryReader(memStream);
int freeSpace = binReader.ReadInt16(); // length of freespace
int nrOfEntries = binReader.ReadInt16(); // number of entries
bool bListingEndReached = false;
//while( (memStream.Position < (memStream.Length-freeSpace)) && (bRet) )
//{
int nIndexOfPrevBlock = -1;
int nIndexOfNextBlock = -1;
int nIndexOfChildBlock = 0;
if(_readListingBlocks)
{
nIndexOfPrevBlock = binReader.ReadInt32(); // -1 if this is the header
nIndexOfNextBlock = binReader.ReadInt32(); // -1 if this is the last block
}
else
{
nIndexOfChildBlock = binReader.ReadInt32();
}
for(int nE = 0; nE < nrOfEntries; nE++)
{
if(_readListingBlocks)
{
bListingEndReached = (nIndexOfNextBlock==-1);
string keyWord = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
bool isSeeAlsoKeyword = (binReader.ReadInt16()!=0);
int indent = binReader.ReadInt16(); // indent of entry
int nCharIndex = binReader.ReadInt32();
binReader.ReadInt32();
int numberOfPairs = binReader.ReadInt32();
int[] nTopics = new int[numberOfPairs];
string[] seeAlso = new string[numberOfPairs];
for(int i=0; i < numberOfPairs; i++)
{
if(isSeeAlsoKeyword)
{
seeAlso[i] = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
}
else
{
nTopics[i] = binReader.ReadInt32();
}
}
binReader.ReadInt32(); // unknown
int nIndexOfThisEntry = binReader.ReadInt32();
IndexItem newItem = new IndexItem(_associatedFile, keyWord, isSeeAlsoKeyword, indent, nCharIndex, nIndexOfThisEntry, seeAlso, nTopics);
_indexList.Add(newItem);
}
else
{
string keyWord = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
bool isSeeAlsoKeyword = (binReader.ReadInt16()!=0);
int indent = binReader.ReadInt16(); // indent of entry
int nCharIndex = binReader.ReadInt32();
binReader.ReadInt32();
int numberOfPairs = binReader.ReadInt32();
int[] nTopics = new int[numberOfPairs];
string[] seeAlso = new string[numberOfPairs];
for(int i=0; i < numberOfPairs; i++)
{
if(isSeeAlsoKeyword)
{
seeAlso[i] = BinaryReaderHelp.ExtractUTF16String(ref binReader, 0, true, _associatedFile.TextEncoding);
}
else
{
nTopics[i] = binReader.ReadInt32();
}
}
int nIndexChild = binReader.ReadInt32();
int nIndexOfThisEntry=-1;
IndexItem newItem = new IndexItem(_associatedFile, keyWord, isSeeAlsoKeyword, indent, nCharIndex, nIndexOfThisEntry, seeAlso, nTopics);
_indexList.Add(newItem);
}
}
//}
binReader.ReadBytes(freeSpace);
if( bListingEndReached )
_readListingBlocks = false;
return bRet;
}
/// <summary>
/// Gets the internal generated index list
/// </summary>
internal ArrayList IndexList
{
get { return _indexList; }
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
}
}
disposed = true;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,286 @@
using System;
using System.Collections;
using System.IO;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMIdxhdr</c> implements t properties which have been read from the #IDXHDR file.
/// </summary>
internal sealed class CHMIdxhdr : IDisposable
{
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the number of topic nodes including the contents and index files
/// </summary>
private int _numberOfTopicNodes = 0;
/// <summary>
/// Internal member storing the offset in the #STRINGS file of the ImageList param of the "text/site properties" object of the sitemap contents
/// </summary>
private int _imageListOffset = 0;
/// <summary>
/// True if the value of the ImageType param of the "text/site properties" object of the sitemap contents is "Folder".
/// </summary>
private bool _imageTypeFolder = false;
/// <summary>
/// Internal member storing the background value
/// </summary>
private int _background = 0;
/// <summary>
/// Internal member storing the foreground value
/// </summary>
private int _foreground = 0;
/// <summary>
/// Internal member storing the offset in the #STRINGS file of the Font param of the "text/site properties" object of the sitemap contents
/// </summary>
private int _fontOffset = 0;
/// <summary>
/// Internal member storing the offset in the #STRINGS file of the FrameName param of the "text/site properties" object of the sitemap contents
/// </summary>
private int _frameNameOffset = 0;
/// <summary>
/// Internal member storing the offset in the #STRINGS file of the WindowName param of the "text/site properties" object of the sitemap contents
/// </summary>
private int _windowNameOffset = 0;
/// <summary>
/// Internal member storing the number of merged files
/// </summary>
private int _numberOfMergedFiles = 0;
/// <summary>
/// Internal member storing the offset in the #STRINGS file of the merged file names
/// </summary>
private ArrayList _mergedFileOffsets = new ArrayList();
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #IDXHDR file</param>
/// <param name="associatedFile">associated CHMFile instance</param>
public CHMIdxhdr(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
}
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nTemp = 0;
// 4 character T#SM
binReader.ReadBytes(4);
// unknown timestamp DWORD
nTemp = binReader.ReadInt32();
// unknown 1
nTemp = binReader.ReadInt32();
// number of topic nodes including the contents & index files
_numberOfTopicNodes = binReader.ReadInt32();
// unknown DWORD
nTemp = binReader.ReadInt32();
// offset in the strings file
_imageListOffset = binReader.ReadInt32();
if( _imageListOffset == 0)
_imageListOffset = -1; // 0/-1 = none
// unknown DWORD
nTemp = binReader.ReadInt32();
// 1 if the value of the ImageType param of the "text/site properties" object of the sitemap contents is "Folder".
nTemp = binReader.ReadInt32();
_imageTypeFolder = (nTemp == 1);
// offset in the strings file
_background = binReader.ReadInt32();
// offset in the strings file
_foreground = binReader.ReadInt32();
// offset in the strings file
_fontOffset = binReader.ReadInt32();
// window styles DWORD
nTemp = binReader.ReadInt32();
// window styles DWORD
nTemp = binReader.ReadInt32();
// unknown DWORD
nTemp = binReader.ReadInt32();
// offset in the strings file
_frameNameOffset = binReader.ReadInt32();
if( _frameNameOffset == 0)
_frameNameOffset = -1; // 0/-1 = none
// offset in the strings file
_windowNameOffset = binReader.ReadInt32();
if( _windowNameOffset == 0)
_windowNameOffset = -1; // 0/-1 = none
// informations types DWORD
nTemp = binReader.ReadInt32();
// unknown DWORD
nTemp = binReader.ReadInt32();
// number of merged files in the merged file list DWORD
_numberOfMergedFiles = binReader.ReadInt32();
nTemp = binReader.ReadInt32();
for(int i = 0; i < _numberOfMergedFiles; i++)
{
// DWORD offset value of merged file
nTemp = binReader.ReadInt32();
if(nTemp > 0)
_mergedFileOffsets.Add(nTemp);
}
return bRet;
}
/// <summary>
/// Gets the number of topic nodes including the contents and index files
/// </summary>
public int NumberOfTopicNodes
{
get { return _numberOfTopicNodes; }
}
/// <summary>
/// Gets the offset in the #STRINGS file of the ImageList
/// param of the "text/site properties" object of the sitemap contents
/// </summary>
public int ImageListOffset
{
get { return _imageListOffset; }
}
/// <summary>
/// True if the value of the ImageType param of the
/// "text/site properties" object of the sitemap contents is "Folder".
/// </summary>
/// <remarks>If this is set to true, the help will display folders instead of books</remarks>
public bool ImageTypeFolder
{
get { return _imageTypeFolder; }
}
/// <summary>
/// Gets the background setting
/// </summary>
public int Background
{
get { return _background; }
}
/// <summary>
/// Gets the foreground setting
/// </summary>
public int Foreground
{
get { return _foreground; }
}
/// <summary>
/// Gets the offset in the #STRINGS file of the Font
/// param of the "text/site properties" object of the sitemap contents
/// </summary>
public int WindowNameOffset
{
get { return _fontOffset; }
}
/// <summary>
/// Gets the offset in the #STRINGS file of the FrameName
/// param of the "text/site properties" object of the sitemap contents
/// </summary>
public int FrameNameOffset
{
get { return _frameNameOffset; }
}
/// <summary>
/// Gets the offset in the #STRINGS file of the WindowName
/// param of the "text/site properties" object of the sitemap contents
/// </summary>
public int FontOffset
{
get { return _windowNameOffset; }
}
/// <summary>
/// Gets an array list of offset numbers in the #STRINGS file of the
/// merged file names.
/// </summary>
public ArrayList MergedFileOffsets
{
get { return _mergedFileOffsets; }
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_mergedFileOffsets = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,256 @@
using System;
using System.IO;
using System.Collections;
using System.Collections.Specialized;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMStrings</c> implements a string collection read from the #STRINGS file
/// </summary>
internal sealed class CHMStrings : IDisposable
{
/// <summary>
/// Constant specifying the size of the string blocks
/// </summary>
private const int STRING_BLOCK_SIZE = 4096;
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the string dictionary
/// </summary>
private Hashtable _stringDictionary = new Hashtable();
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #STRINGS file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMStrings(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
}
/// <summary>
/// Standard constructor
/// </summary>
internal CHMStrings()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _stringDictionary.Count );
if (_stringDictionary.Count != 0)
{
IDictionaryEnumerator iDictionaryEnumerator = _stringDictionary.GetEnumerator();
while (iDictionaryEnumerator.MoveNext())
{
DictionaryEntry dictionaryEntry = (DictionaryEntry)iDictionaryEnumerator.Current;
writer.Write( Int32.Parse(dictionaryEntry.Key.ToString()) );
writer.Write( dictionaryEntry.Value.ToString() );
}
}
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
int nCnt = reader.ReadInt32();
for(int i=0; i<nCnt;i++)
{
int nKey = reader.ReadInt32();
string sValue = reader.ReadString();
_stringDictionary[nKey.ToString()] = sValue;
}
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
}
#endregion
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
//binReader.ReadByte(); // file starts with a NULL character for 0-based offset indexing
int nStringOffset = 0;
int nSubsetOffset = 0;
while( (memStream.Position < memStream.Length) && (bRet) )
{
nStringOffset = (int)memStream.Position;
byte [] stringBlock = binReader.ReadBytes(STRING_BLOCK_SIZE);
bRet &= DecodeBlock(stringBlock, ref nStringOffset, ref nSubsetOffset);
}
return bRet;
}
/// <summary>
/// Decodes a string block
/// </summary>
/// <param name="stringBlock">byte array which represents the string block</param>
/// <param name="nStringOffset">current string offset number</param>
/// <param name="nSubsetOffset">reference to a subset variable</param>
/// <returns>true if succeeded</returns>
/// <remarks>If a string crosses the end of a block then it will be cut off
/// without a NT and repeated in full, with a NT, at the start of the next block.
/// For eg "To customize the appearance of a contents file" might become
/// "To customize the (block ending)To customize the appearance of a contents file"
/// when there are 17 bytes left at the end of the block. </remarks>
private bool DecodeBlock( byte[] stringBlock, ref int nStringOffset, ref int nSubsetOffset)
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(stringBlock);
BinaryReader binReader = new BinaryReader(memStream);
while( (memStream.Position < memStream.Length) && (bRet) )
{
bool bFoundTerminator = false;
int nCurOffset = nStringOffset + (int)memStream.Position;
string sTemp = BinaryReaderHelp.ExtractString(ref binReader, ref bFoundTerminator, 0, true, _associatedFile.TextEncoding);
if(nSubsetOffset != 0)
{
_stringDictionary[nSubsetOffset.ToString()] = sTemp.ToString();
}
else
{
_stringDictionary[nCurOffset.ToString()] = sTemp.ToString();
}
if( bFoundTerminator )
{
nSubsetOffset = 0;
}
else
{
nSubsetOffset = nCurOffset;
}
}
return bRet;
}
/// <summary>
/// Indexer which returns the string at a given offset
/// </summary>
public string this[int offset]
{
get
{
if(offset == -1)
return String.Empty;
string sTemp = (string)_stringDictionary[ offset.ToString() ];
if(sTemp == null)
return String.Empty;
return sTemp;
}
}
/// <summary>
/// Indexer which returns the string at a given offset
/// </summary>
public string this[string offset]
{
get
{
if(offset == "-1")
return String.Empty;
string sTemp = (string)_stringDictionary[ offset ];
if(sTemp == null)
return String.Empty;
return sTemp;
}
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_stringDictionary = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,821 @@
using System;
using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Globalization;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMSystem</c> reads the #SYSTEM file of the chm and stores its settings
/// </summary>
internal sealed class CHMSystem : IDisposable
{
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the file version
/// </summary>
private int _fileVersion = 0;
/// <summary>
/// Internal member storing the contents file path
/// </summary>
private string _contentsFile = "";
/// <summary>
/// Internal member storing the index file path
/// </summary>
private string _indexFile = "";
/// <summary>
/// Internal member storing the default help topic
/// </summary>
private string _defaultTopic = "";
/// <summary>
/// Internal member storing the help-window title
/// </summary>
private string _title = "";
/// <summary>
/// Internal flag if dbcs is on
/// </summary>
private bool _dbcs = false;
/// <summary>
/// Internal flag if fulltext search is enabled
/// </summary>
private bool _fullTextSearch = false;
/// <summary>
/// Internal flag if KLinks are in the file
/// </summary>
private bool _hasKLinks = false;
/// <summary>
/// Internal flag if ALinks are in the file
/// </summary>
private bool _hasALinks = false;
/// <summary>
/// Internal member storing the name of the default window
/// </summary>
private string _defaultWindow = "";
/// <summary>
/// Internal member storing the filename of the compiled file
/// </summary>
private string _compileFile = "";
/// <summary>
/// Internal flag storing the offset value of the binary index
/// </summary>
private uint _binaryIndexURLTableID = 0;
/// <summary>
/// Inernal member storing the compiler version this file was compiled
/// </summary>
private string _compilerVersion = "";
/// <summary>
/// Internal flag storing the offset value of the binary TOC
/// </summary>
private uint _binaryTOCURLTableID = 0;
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Internal member storing the default fontface, size, charset
/// </summary>
private string _defaultFont = "";
/// <summary>
/// Internal member storing the culture info of the file
/// </summary>
private CultureInfo _culture;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #SYSTEM file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMSystem(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
if(_culture == null)
{
// Set the text encoder of the chm file to the read charset/codepage
_associatedFile.TextEncoding = Encoding.GetEncoding( this.CodePage );
}
}
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
// First entry = DWORD for version number
_fileVersion = (int) binReader.ReadInt32();
while( (memStream.Position < memStream.Length) && (bRet) )
{
bRet &= DecodeEntry(ref binReader);
}
return bRet;
}
/// <summary>
/// Decodes an #system file entry
/// </summary>
/// <param name="binReader">binary reader reference</param>
/// <returns>true if succeeded</returns>
private bool DecodeEntry(ref BinaryReader binReader)
{
bool bRet = true;
int code = (int) binReader.ReadInt16(); // entry code, WORD
int length = (int) binReader.ReadInt16(); // length of entry
switch(code)
{
case 0:
{
_contentsFile = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 1:
{
_indexFile = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 2:
{
_defaultTopic = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 3:
{
_title = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 4:
{
int nTemp = 0;
nTemp = binReader.ReadInt32(); // read DWORD LCID
_culture = new CultureInfo(nTemp);
if(_culture != null)
_associatedFile.TextEncoding = Encoding.GetEncoding(_culture.TextInfo.ANSICodePage);
nTemp = binReader.ReadInt32(); // read DWORD DBCS
_dbcs = (nTemp == 1);
nTemp = binReader.ReadInt32(); // read DWORD Fulltext search
_fullTextSearch = (nTemp == 1);
nTemp = binReader.ReadInt32(); // read DWORD has klinks
_hasKLinks = (nTemp != 0);
nTemp = binReader.ReadInt32(); // read DWORD has alinks
_hasALinks = (nTemp != 0);
// read the rest of code 4 (not important for us)
byte[] temp = new byte[length-(5*4)];
temp = binReader.ReadBytes(length-(5*4));
};break;
case 5:
{
_defaultWindow = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 6:
{
_compileFile = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 7:
{
if(_fileVersion > 2)
{
_binaryIndexURLTableID = (uint) binReader.ReadInt32();
}
else
{
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
}
};break;
case 8:
{
// abbreviation (not interresting for us)
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 9:
{
_compilerVersion = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
case 10:
{
// timestamp of the file (not interresting for us)
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 11:
{
if(_fileVersion > 2)
{
_binaryTOCURLTableID = (uint) binReader.ReadInt32();
}
else
{
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
}
};break;
case 12:
{
// number of information bytes
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 13:
{
// copy of file #idxhdr
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 14:
{
// custom tabs for HH viewer
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 15:
{
// a checksum
byte[] read = binReader.ReadBytes(length);
int i=read.Length;
};break;
case 16:
{
// Default Font=string,number,number
// The string is the name of the font, the first number is the
// point size & the last number is the character set used by the font.
// For acceptable values see *_CHARSET defines in wingdi.h from the
// Windows SDK or the same file in MinGW or Wine.
// Most of the time you will only want to use 0, which is the value for ANSI,
// which is the subset of ASCII used by Windows.
_defaultFont = BinaryReaderHelp.ExtractString(ref binReader,length, 0, true, _associatedFile.TextEncoding);
};break;
default:
{
byte[] temp = new byte[length];
temp = binReader.ReadBytes(length);
//bRet = false;
int i=temp.Length;
};break;
}
return bRet;
}
/// <summary>
/// Reads all HHC files and checks which one has the global object tag.
/// This hhc file will be returned as master hhc
/// </summary>
/// <param name="hhcTopics">list of topics containing the extension hhc</param>
/// <param name="TopicItemArrayList">true if the arraylist contains topic items</param>
/// <returns>the filename of the found master toc</returns>
private string GetMasterHHC(ArrayList hhcTopics, bool TopicItemArrayList)
{
string sRet = "";
if( (hhcTopics!=null) && (hhcTopics.Count > 0) )
{
if( TopicItemArrayList )
{
if(hhcTopics.Count == 1)
{
sRet = ((TopicEntry)hhcTopics[0]).Locale;
}
else
{
foreach(TopicEntry curEntry in hhcTopics)
{
CHMStream.CHMStream iw=null;
MemoryStream fileObject=null;
if( _associatedFile.CurrentStorageWrapper == null)
{
iw=new CHMStream.CHMStream();
iw.OpenCHM(_associatedFile.ChmFilePath);
}
else
{
iw = _associatedFile.CurrentStorageWrapper;
}
fileObject = iw.OpenStream(curEntry.Locale);
if( fileObject != null)
{
string fileString =_associatedFile.TextEncoding.GetString(fileObject.ToArray(),0,(int)fileObject.Length);
fileObject.Close();
if( HHCParser.HasGlobalObjectTag(fileString, _associatedFile) )
{
sRet = curEntry.Locale;
break;
}
}
}
}
}
else
{
if(hhcTopics.Count == 1)
{
sRet = ((string)hhcTopics[0]);
}
else
{
foreach(string curEntry in hhcTopics)
{
CHMStream.CHMStream iw=null;
MemoryStream fileObject=null;
if( _associatedFile.CurrentStorageWrapper == null)
{
iw=new CHMStream.CHMStream();
iw.OpenCHM(_associatedFile.ChmFilePath);
}
else
{
iw = _associatedFile.CurrentStorageWrapper;
}
fileObject = iw.OpenStream(curEntry);
if( fileObject != null)
{
string fileString =_associatedFile.TextEncoding.GetString(fileObject.ToArray(),0,(int)fileObject.Length);
fileObject.Close();
if( HHCParser.HasGlobalObjectTag(fileString, _associatedFile) )
{
sRet = curEntry;
break;
}
}
}
}
}
}
return sRet;
}
/// <summary>
/// Gets the file version of the chm file.
/// 2 for Compatibility=1.0, 3 for Compatibility=1.1
/// </summary>
public int FileVersion
{
get { return _fileVersion; }
}
/// <summary>
/// Gets the contents file name
/// </summary>
public string ContentsFile
{
get
{
if( BinaryTOC ) // if the file contains a binary TOC
{
// make sure the CHMFile instance exists and has loaded the file #URLTBL
if( (_associatedFile != null) && (_associatedFile.UrltblFile != null ) )
{
// Get an url-table entry by its unique id
UrlTableEntry entry = _associatedFile.UrltblFile.GetByUniqueID( this.BinaryTOCURLTableID );
if(entry != null)
{
// entry found, return the url ( = filename )
return entry.URL;
}
}
}
else
{
if(_contentsFile.Length <= 0)
{
string sCheck = "Table of Contents.hhc"; // default HHP contents filename
if( (_associatedFile != null) && (_associatedFile.TopicsFile != null ) )
{
TopicEntry te = _associatedFile.TopicsFile.GetByLocale( sCheck );
if( te == null)
{
sCheck = "toc.hhc"; // default HHP contents filename
te = _associatedFile.TopicsFile.GetByLocale( sCheck );
if( te == null)
{
sCheck = CompileFile + ".hhc";
te = _associatedFile.TopicsFile.GetByLocale( sCheck );
if( te == null)
{
ArrayList arrExt = _associatedFile.TopicsFile.GetByExtension("hhc");
if( arrExt == null )
{
arrExt = _associatedFile.EnumFilesByExtension("hhc");
if( arrExt == null )
{
Debug.WriteLine("CHMSystem.ContentsFile - Failed, contents file not found !");
}
else
{
if(arrExt.Count > 1)
{
sCheck = GetMasterHHC(arrExt, false);
_contentsFile = sCheck;
}
else
{
_contentsFile = ((string)arrExt[0]);
sCheck = _contentsFile;
}
}
}
else
{
if(arrExt.Count > 1)
{
sCheck = GetMasterHHC(arrExt, true);
_contentsFile = sCheck;
}
else
{
_contentsFile = ((TopicEntry)arrExt[0]).Locale;
sCheck = _contentsFile;
}
}
}
else
{
_contentsFile = sCheck;
}
}
else
{
_contentsFile = sCheck;
}
}
else
{
_contentsFile = sCheck;
}
}
return sCheck;
}
}
return _contentsFile;
}
}
/// <summary>
/// Gets the index file name
/// </summary>
public string IndexFile
{
get
{
if( BinaryIndex ) // if the file contains a binary index
{
// make sure the CHMFile instance exists and has loaded the file #URLTBL
if( (_associatedFile != null) && (_associatedFile.UrltblFile != null ) )
{
// Get an url-table entry by its unique id
UrlTableEntry entry = _associatedFile.UrltblFile.GetByUniqueID( this.BinaryIndexURLTableID );
if(entry != null)
{
// entry found, return the url ( = filename )
return entry.URL;
}
}
}
else
{
if(_indexFile.Length <= 0)
{
string sCheck = "Index.hhk"; // default HHP index filename
if( (_associatedFile != null) && (_associatedFile.TopicsFile != null ) )
{
TopicEntry te = _associatedFile.TopicsFile.GetByLocale( sCheck );
if( te == null)
{
sCheck = CompileFile + ".hhk";
te = _associatedFile.TopicsFile.GetByLocale( sCheck );
if( te == null)
{
ArrayList arrExt = _associatedFile.TopicsFile.GetByExtension("hhk");
if( arrExt == null )
{
Debug.WriteLine("CHMSystem.IndexFile - Failed, index file not found !");
}
else
{
_indexFile = ((TopicEntry)arrExt[0]).Locale;
sCheck = _indexFile;
}
}
else
{
_indexFile = sCheck;
}
}
else
{
_indexFile = sCheck;
}
}
return sCheck;
}
}
return _indexFile;
}
}
/// <summary>
/// Sets the default topic of this file
/// </summary>
/// <param name="local">new local value of the topic</param>
internal void SetDefaultTopic(string local)
{
_defaultTopic = local;
}
/// <summary>
/// Gets the default help topic
/// </summary>
public string DefaultTopic
{
get { return _defaultTopic; }
}
/// <summary>
/// Gets the title of the help window
/// </summary>
public string Title
{
get { return _title; }
}
/// <summary>
/// Gets the flag if DBCS is in use
/// </summary>
public bool DBCS
{
get { return _dbcs; }
}
/// <summary>
/// Gets the flag if full-text-search is available
/// </summary>
public bool FullTextSearch
{
get { return _fullTextSearch; }
}
/// <summary>
/// Gets the flag if the file has ALinks
/// </summary>
public bool HasALinks
{
get { return _hasALinks; }
}
/// <summary>
/// Gets the flag if the file has KLinks
/// </summary>
public bool HasKLinks
{
get { return _hasKLinks; }
}
/// <summary>
/// Gets the default window name
/// </summary>
public string DefaultWindow
{
get { return _defaultWindow; }
}
/// <summary>
/// Gets the file name of the compile file
/// </summary>
public string CompileFile
{
get { return _compileFile; }
}
/// <summary>
/// Gets the id of the binary index in the url table
/// </summary>
public uint BinaryIndexURLTableID
{
get { return _binaryIndexURLTableID; }
}
/// <summary>
/// Gets the flag if the chm has a binary index file
/// </summary>
public bool BinaryIndex
{
get { return (_binaryIndexURLTableID>0); }
}
/// <summary>
/// Gets the flag if the chm has a binary index file
/// </summary>
public string CompilerVersion
{
get { return _compilerVersion; }
}
/// <summary>
/// Gets the id of the binary toc in the url table
/// </summary>
public uint BinaryTOCURLTableID
{
get { return _binaryTOCURLTableID; }
}
/// <summary>
/// Gets the flag if the chm has a binary toc file
/// </summary>
public bool BinaryTOC
{
get { return (_binaryTOCURLTableID>0); }
}
/// <summary>
/// Gets the font face of the read font property.
/// Empty string for default font.
/// </summary>
public string FontFace
{
get
{
if( _defaultFont.Length > 0)
{
string [] fontSplit = _defaultFont.Split( new char[]{','});
if(fontSplit.Length > 0)
return fontSplit[0].Trim();
}
return "";
}
}
/// <summary>
/// Gets the font size of the read font property.
/// 0 for default font size
/// </summary>
public double FontSize
{
get
{
if( _defaultFont.Length > 0)
{
string [] fontSplit = _defaultFont.Split( new char[]{','});
if(fontSplit.Length > 1)
return double.Parse(fontSplit[1].Trim());
}
return 0.0;
}
}
/// <summary>
/// Gets the character set of the read font property
/// 1 for default
/// </summary>
public int CharacterSet
{
get
{
if( _defaultFont.Length > 0)
{
string [] fontSplit = _defaultFont.Split( new char[]{','});
if(fontSplit.Length > 2)
return Int32.Parse(fontSplit[2].Trim());
}
return 0;
}
}
/// <summary>
/// Gets the codepage depending on the read font property
/// </summary>
public int CodePage
{
get
{
// if we've read a LCID from the system file
// ignore the font-property settings and return
// the codepage generated from the culture info
if(_culture != null)
{
return _culture.TextInfo.ANSICodePage;
}
int nRet = 1252; // default codepage windows-1252
int nCSet = CharacterSet;
switch(nCSet)
{
case 0x00: nRet = 1252;break; // ANSI_CHARSET
case 0xCC: nRet = 1251;break; // RUSSIAN_CHARSET
case 0xEE: nRet = 1250;break; // EE_CHARSET
case 0xA1: nRet = 1253;break; // GREEK_CHARSET
case 0xA2: nRet = 1254;break; // TURKISH_CHARSET
case 0xBA: nRet = 1257;break; // BALTIC_CHARSET
case 0xB1: nRet = 1255;break; // HEBREW_CHARSET
case 0xB2: nRet = 1256;break; // ARABIC_CHARSET
case 0x80: nRet = 932;break; // SHIFTJIS_CHARSET
case 0x81: nRet = 949;break; // HANGEUL_CHARSET
case 0x86: nRet = 936;break; // GB2313_CHARSET
case 0x88: nRet = 950;break; // CHINESEBIG5_CHARSET
}
return nRet;
}
}
/// <summary>
/// Gets the assiciated culture info
/// </summary>
public CultureInfo Culture
{
get { return _culture; }
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,288 @@
using System;
using System.IO;
using System.Collections;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMTocidx</c> implements functions to decode the #TOCIDX internal file.
/// </summary>
internal sealed class CHMTocidx : IDisposable
{
/// <summary>
/// Constant specifying the size of the data blocks
/// </summary>
private const int BLOCK_SIZE = 0x1000;
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal memebr storing the offset to the 20/28 byte structs
/// </summary>
private int _offset2028 = 0;
/// <summary>
/// Internal member storing the offset to the 16 byte structs
/// </summary>
private int _offset16structs = 0;
/// <summary>
/// Internal member storing the number of 16 byte structs
/// </summary>
private int _numberOf16structs = 0;
/// <summary>
/// Internal member storing the offset to the topic list
/// </summary>
private int _offsetOftopics = 0;
/// <summary>
/// Internal member storing the toc
/// </summary>
private ArrayList _toc = new ArrayList();
/// <summary>
/// Internal member for offset seeking
/// </summary>
private Hashtable _offsetTable = new Hashtable();
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #TOCIDX file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMTocidx(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
// clear internal binary data after extraction
_binaryFileData = null;
}
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
_toc = new ArrayList();
_offsetTable = new Hashtable();
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nCurOffset = 0;
_offset2028 = binReader.ReadInt32();
_offset16structs = binReader.ReadInt32();
_numberOf16structs = binReader.ReadInt32();
_offsetOftopics = binReader.ReadInt32();
binReader.BaseStream.Seek( _offset2028, SeekOrigin.Begin );
if( RecursivelyBuildTree(ref binReader, _offset2028, _toc, null) )
{
binReader.BaseStream.Seek( _offset16structs, SeekOrigin.Begin );
nCurOffset = (int)binReader.BaseStream.Position;
for(int i=0; i < _numberOf16structs; i++)
{
int tocOffset = binReader.ReadInt32();
int sqNr = binReader.ReadInt32();
int topOffset = binReader.ReadInt32();
int hhctopicIdx = binReader.ReadInt32();
nCurOffset = (int)binReader.BaseStream.Position;
int topicIdx = -1;
// if the topic offset is within the range of the stream
// and is >= the offset of the first topic dword
if((topOffset < (binReader.BaseStream.Length - 4)) && (topOffset >= _offsetOftopics))
{
// read the index of the topic for this item
binReader.BaseStream.Seek( topOffset, SeekOrigin.Begin);
topicIdx = binReader.ReadInt32();
binReader.BaseStream.Seek( nCurOffset, SeekOrigin.Begin);
TOCItem item = (TOCItem)_offsetTable[tocOffset.ToString()];
if( item != null)
{
if(( topicIdx < _associatedFile.TopicsFile.TopicTable.Count)&&(topicIdx>=0))
{
TopicEntry te = (TopicEntry) (_associatedFile.TopicsFile.TopicTable[topicIdx]);
if( (te != null) && (item.TopicOffset < 0) )
{
item.TopicOffset = te.EntryOffset;
}
}
}
}
}
}
return bRet;
}
/// <summary>
/// Recursively reads the binary toc tree from the file
/// </summary>
/// <param name="binReader">reference to binary reader</param>
/// <param name="NodeOffset">offset of the first node in the current level</param>
/// <param name="level">arraylist of TOCItems for the current level</param>
/// <param name="parentItem">parent item for the item</param>
/// <returns>Returns true if succeeded</returns>
private bool RecursivelyBuildTree(ref BinaryReader binReader, int NodeOffset, ArrayList level, TOCItem parentItem)
{
bool bRet = true;
int nextOffset=0;
int nReadOffset = (int)binReader.BaseStream.Position;
binReader.BaseStream.Seek(NodeOffset, SeekOrigin.Begin);
do
{
int nCurOffset = (int)binReader.BaseStream.Position;
int unkn1 = binReader.ReadInt16(); // unknown
int unkn2 = binReader.ReadInt16(); // unknown
int flag = binReader.ReadInt32();
int nFolderAdd = 0;
if((_associatedFile != null) && (_associatedFile.ImageTypeFolder))
{
// get the value which should be added, to display folders instead of books
if(HtmlHelpSystem.UseHH2TreePics)
nFolderAdd = 8;
else
nFolderAdd = 4;
}
int nFolderImgIdx = (HtmlHelpSystem.UseHH2TreePics ? (TOCItem.STD_FOLDER_HH2+nFolderAdd) : (TOCItem.STD_FOLDER_HH1+nFolderAdd));
int nFileImgIdx = (HtmlHelpSystem.UseHH2TreePics ? TOCItem.STD_FILE_HH2 : TOCItem.STD_FILE_HH1);
int stdImage = ((flag & 0x4)!=0) ? nFolderImgIdx : nFileImgIdx;
int stringOffset = binReader.ReadInt32();
int ParentOffset = binReader.ReadInt32();
nextOffset = binReader.ReadInt32();
int firstChildOffset = 0;
int unkn3=0;
if( (flag&0x4)!=0 )
{
firstChildOffset = binReader.ReadInt32();
unkn3 = binReader.ReadInt32(); // unknown
}
TOCItem newItem = new TOCItem();
newItem.ImageIndex = stdImage;
newItem.Offset = nCurOffset;
newItem.OffsetNext = nextOffset;
newItem.AssociatedFile = _associatedFile;
newItem.TocMode = DataMode.Binary;
newItem.Parent = parentItem;
if( (flag&0x08) == 0)
{
// toc item doesn't have a local value (=> stringOffset = offset of strings file)
newItem.Name = _associatedFile.StringsFile[stringOffset];
}
else
{
// this item has a topic entry (=> stringOffset = index of topic entry)
if((stringOffset < _associatedFile.TopicsFile.TopicTable.Count) && (stringOffset >= 0))
{
TopicEntry te = (TopicEntry) (_associatedFile.TopicsFile.TopicTable[stringOffset]);
if(te != null)
{
newItem.TopicOffset = te.EntryOffset;
}
}
}
_offsetTable[nCurOffset.ToString()] = newItem;
// if this item has children (firstChildOffset > 0)
if( firstChildOffset > 0)
{
bRet &= RecursivelyBuildTree(ref binReader, firstChildOffset, newItem.Children, newItem);
}
level.Add( newItem );
if(nCurOffset != nextOffset)
binReader.BaseStream.Seek(nextOffset, SeekOrigin.Begin);
}while(nextOffset != 0);
binReader.BaseStream.Seek(nReadOffset, SeekOrigin.Begin);
return bRet;
}
/// <summary>
/// Gets the internal read toc
/// </summary>
internal ArrayList TOC
{
get { return _toc; }
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_toc = null;
_offsetTable = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,235 @@
using System;
using System.IO;
using System.Collections;
using System.Collections.Specialized;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMTopics</c> implements functionality to decode the #TOPICS internal file
/// </summary>
internal sealed class CHMTopics : IDisposable
{
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Internal member storing the topic list
/// </summary>
private ArrayList _topicTable = new ArrayList();
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #TOPICS file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMTopics(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
// clear internal binary data after extraction
_binaryFileData = null;
}
/// <summary>
/// Standard constructor
/// </summary>
internal CHMTopics()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _topicTable.Count );
foreach(TopicEntry curItem in _topicTable)
{
curItem.Dump(ref writer);
}
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
int i=0;
int nCnt = reader.ReadInt32();
for(i=0; i<nCnt;i++)
{
TopicEntry newItem = new TopicEntry();
newItem.SetCHMFile(_associatedFile);
newItem.ReadDump(ref reader);
_topicTable.Add(newItem);
}
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
foreach(TopicEntry curEntry in _topicTable)
{
curEntry.SetCHMFile(associatedFile);
}
}
#endregion
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nCurOffset = 0;
while( (memStream.Position < memStream.Length) && (bRet) )
{
int entryOffset = nCurOffset;
int tocIdx = binReader.ReadInt32();
int titleOffset = binReader.ReadInt32();
int urltablOffset = binReader.ReadInt32();
int visibilityMode = binReader.ReadInt16();
int unknownMode = binReader.ReadInt16();
TopicEntry newEntry = new TopicEntry(entryOffset, tocIdx, titleOffset, urltablOffset, visibilityMode, unknownMode, _associatedFile);
_topicTable.Add( newEntry );
nCurOffset = (int)memStream.Position;
}
return bRet;
}
/// <summary>
/// Gets the arraylist containing all topic entries.
/// </summary>
public ArrayList TopicTable
{
get
{
return _topicTable;
}
}
/// <summary>
/// Gets the topic entry of a given offset
/// </summary>
public TopicEntry this[int offset]
{
get
{
foreach(TopicEntry curEntry in _topicTable)
if(curEntry.EntryOffset == offset)
return curEntry;
return null;
}
}
/// <summary>
/// Searches a topic by the locale name
/// </summary>
/// <param name="locale">locale name to search</param>
/// <returns>The topicentry instance if found, otherwise null</returns>
public TopicEntry GetByLocale(string locale)
{
foreach(TopicEntry curEntry in TopicTable)
{
if(curEntry.Locale.ToLower() == locale.ToLower())
return curEntry;
}
return null;
}
/// <summary>
/// Searches the topics for all files with a given file extension
/// </summary>
/// <param name="fileExtension">extension to search</param>
/// <returns>An arraylist of TopicEntry instances or null if no topic was found</returns>
public ArrayList GetByExtension(string fileExtension)
{
ArrayList arrRet = new ArrayList();
foreach(TopicEntry curEntry in TopicTable)
{
if(curEntry.Locale.ToLower().EndsWith(fileExtension.ToLower()))
arrRet.Add(curEntry);
}
if(arrRet.Count > 0)
return arrRet;
return null;
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_topicTable=null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,308 @@
using System;
using System.IO;
using System.Collections;
using System.Collections.Specialized;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMUrlstr</c> implements a string collection storing the URL strings of the help file
/// </summary>
internal sealed class CHMUrlstr : IDisposable
{
/// <summary>
/// Constant specifying the size of the string blocks
/// </summary>
private const int BLOCK_SIZE = 0x1000;
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the url dictionary
/// </summary>
private Hashtable _urlDictionary = new Hashtable();
/// <summary>
/// Internal member storing the framename dictionary
/// </summary>
private Hashtable _framenameDictionary = new Hashtable();
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #URLSTR file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMUrlstr(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
// clear internal binary data after extraction
_binaryFileData = null;
}
/// <summary>
/// Standard constructor
/// </summary>
internal CHMUrlstr()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _urlDictionary.Count );
if (_urlDictionary.Count != 0)
{
IDictionaryEnumerator iDictionaryEnumerator = _urlDictionary.GetEnumerator();
while (iDictionaryEnumerator.MoveNext())
{
DictionaryEntry dictionaryEntry = (DictionaryEntry)iDictionaryEnumerator.Current;
writer.Write( Int32.Parse(dictionaryEntry.Key.ToString()) );
writer.Write( dictionaryEntry.Value.ToString() );
}
}
writer.Write( _framenameDictionary.Count );
if (_framenameDictionary.Count != 0)
{
IDictionaryEnumerator iDictionaryEnumerator = _framenameDictionary.GetEnumerator();
while (iDictionaryEnumerator.MoveNext())
{
DictionaryEntry dictionaryEntry = (DictionaryEntry)iDictionaryEnumerator.Current;
writer.Write( Int32.Parse(dictionaryEntry.Key.ToString()) );
writer.Write( dictionaryEntry.Value.ToString() );
}
}
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
int i=0;
int nCnt = reader.ReadInt32();
for(i=0; i<nCnt;i++)
{
int nKey = reader.ReadInt32();
string sValue = reader.ReadString();
_urlDictionary[nKey.ToString()] = sValue;
}
nCnt = reader.ReadInt32();
for(i=0; i<nCnt;i++)
{
int nKey = reader.ReadInt32();
string sValue = reader.ReadString();
_framenameDictionary[nKey.ToString()] = sValue;
}
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
}
#endregion
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nCurOffset = 0;
while( (memStream.Position < memStream.Length) && (bRet) )
{
nCurOffset = (int)memStream.Position;
byte [] dataBlock = binReader.ReadBytes(BLOCK_SIZE);
bRet &= DecodeBlock(dataBlock, ref nCurOffset);
}
return bRet;
}
/// <summary>
/// Decodes a block of url-string data
/// </summary>
/// <param name="dataBlock">block of data</param>
/// <param name="nOffset">current file offset</param>
/// <returns>true if succeeded</returns>
private bool DecodeBlock( byte[] dataBlock, ref int nOffset )
{
bool bRet = true;
int blockOffset = nOffset;
MemoryStream memStream = new MemoryStream(dataBlock);
BinaryReader binReader = new BinaryReader(memStream);
if(nOffset==0)
binReader.ReadByte(); // first block starts with an unknown byte
while( (memStream.Position < (memStream.Length-8)) && (bRet) )
{
int entryOffset = blockOffset + (int)memStream.Position;
int urlOffset = binReader.ReadInt32();
int frameOffset = binReader.ReadInt32();
// There is one way to tell where the end of the URL/FrameName
// pairs occurs: Repeat the following: read 2 DWORDs and if both
// are less than the current offset then this is the start of the Local
// strings else skip two NT strings.
// if(( (urlOffset < (entryOffset+8)) && (frameOffset < (entryOffset+8)) ))
// {
// //TODO: add correct string reading if an offset has been found
// /*
// int curOffset = (int)memStream.Position;
//
// memStream.Seek( (long)(blockOffset-urlOffset), SeekOrigin.Begin);
// string sTemp = CHMReader.ExtractString(ref binReader, 0, true);
//
// memStream.Seek( (long)(blockOffset-frameOffset), SeekOrigin.Begin);
// sTemp = CHMReader.ExtractString(ref binReader, 0, true);
//
// memStream.Seek((long)curOffset, SeekOrigin.Begin);
// */
//
//
// int curOffs = (int)memStream.Position;
// BinaryReaderHelp.ExtractString(ref binReader, 0, true, _associatedFile.TextEncoding);
// nOffset += (int)memStream.Position - curOffs;
//
// curOffs = (int)memStream.Position;
// BinaryReaderHelp.ExtractString(ref binReader, 0, true, _associatedFile.TextEncoding);
// nOffset += (int)memStream.Position - curOffs;
// }
// else
{
bool bFoundTerminator = false;
string sTemp = BinaryReaderHelp.ExtractString(ref binReader, ref bFoundTerminator, 0, true, _associatedFile.TextEncoding);
if(sTemp == "")
{
//nOffset = nOffset + 1 + (int)memStream.Length - (int)memStream.Position;
memStream.Seek(memStream.Length-1, SeekOrigin.Begin);
}
else
{
_urlDictionary[entryOffset.ToString()] = sTemp.ToString();
_framenameDictionary[ entryOffset.ToString() ] = sTemp.ToString() ;
}
}
}
return bRet;
}
/// <summary>
/// Gets the url at a given offset
/// </summary>
/// <param name="offset">offset of url</param>
/// <returns>the url at the given offset</returns>
public string GetURLatOffset(int offset)
{
if(offset == -1)
return String.Empty;
string sTemp = (string)_urlDictionary[ offset.ToString() ];
if(sTemp == null)
return String.Empty;
return sTemp;
}
/// <summary>
/// Gets the framename at a given offset
/// </summary>
/// <param name="offset">offset of the framename</param>
/// <returns>the frame name at the given offset</returns>
public string GetFrameNameatOffset(int offset)
{
if(offset == -1)
return String.Empty;
string sTemp = (string)_framenameDictionary[ offset.ToString() ];
if(sTemp == null)
return String.Empty;
return sTemp;
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_urlDictionary = null;
_framenameDictionary = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,245 @@
using System;
using System.IO;
using System.Collections;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>CHMUrltable</c> implements methods to decode the #URLTBL internal file.
/// </summary>
internal sealed class CHMUrltable : IDisposable
{
/// <summary>
/// Constant specifying the size of the data blocks
/// </summary>
private const int BLOCK_SIZE = 0x1000;
/// <summary>
/// Constant specifying the number of records per block
/// </summary>
private const int RECORDS_PER_BLOCK = 341;
/// <summary>
/// Internal flag specifying if the object is going to be disposed
/// </summary>
private bool disposed = false;
/// <summary>
/// Internal member storing the binary file data
/// </summary>
private byte[] _binaryFileData = null;
/// <summary>
/// Internal member storing the url table
/// </summary>
private ArrayList _urlTable = new ArrayList();
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="binaryFileData">binary file data of the #URLTBL file</param>
/// <param name="associatedFile">associated chm file</param>
public CHMUrltable(byte[] binaryFileData, CHMFile associatedFile)
{
_binaryFileData = binaryFileData;
_associatedFile = associatedFile;
DecodeData();
// clear internal binary data after extraction
_binaryFileData = null;
}
/// <summary>
/// Standard constructor
/// </summary>
internal CHMUrltable()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _urlTable.Count );
foreach(UrlTableEntry curItem in _urlTable)
{
curItem.Dump(ref writer);
}
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
int i=0;
int nCnt = reader.ReadInt32();
for(i=0; i<nCnt;i++)
{
UrlTableEntry newItem = new UrlTableEntry();
newItem.SetCHMFile(_associatedFile);
newItem.ReadDump(ref reader);
_urlTable.Add(newItem);
}
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
foreach(UrlTableEntry curEntry in _urlTable)
{
curEntry.SetCHMFile(associatedFile);
}
}
#endregion
/// <summary>
/// Decodes the binary file data and fills the internal properties
/// </summary>
/// <returns>true if succeeded</returns>
private bool DecodeData()
{
bool bRet = true;
MemoryStream memStream = new MemoryStream(_binaryFileData);
BinaryReader binReader = new BinaryReader(memStream);
int nCurOffset = 0;
while( (memStream.Position < memStream.Length) && (bRet) )
{
nCurOffset = (int)memStream.Position;
byte [] dataBlock = binReader.ReadBytes(BLOCK_SIZE);
bRet &= DecodeBlock(dataBlock, ref nCurOffset);
}
return bRet;
}
/// <summary>
/// Decodes a block of url-string data
/// </summary>
/// <param name="dataBlock">block of data</param>
/// <param name="nOffset">current file offset</param>
/// <returns>true if succeeded</returns>
private bool DecodeBlock( byte[] dataBlock, ref int nOffset )
{
bool bRet = true;
int blockOffset = nOffset;
MemoryStream memStream = new MemoryStream(dataBlock);
BinaryReader binReader = new BinaryReader(memStream);
for(int i=0; i < RECORDS_PER_BLOCK; i++)
{
int recordOffset = blockOffset + (int)memStream.Position;
uint nuniqueID = (uint) binReader.ReadInt32(); // unknown dword
int ntopicsIdx = binReader.ReadInt32();
int urlstrOffset = binReader.ReadInt32();
UrlTableEntry newEntry = new UrlTableEntry(nuniqueID, recordOffset, ntopicsIdx, urlstrOffset, _associatedFile);
_urlTable.Add(newEntry);
if( memStream.Position >= memStream.Length)
break;
}
if(dataBlock.Length == BLOCK_SIZE)
binReader.ReadInt32();
return bRet;
}
/// <summary>
/// Gets the arraylist containing all urltable entries.
/// </summary>
public ArrayList UrlTable
{
get
{
return _urlTable;
}
}
/// <summary>
/// Gets the urltable entry of a given offset
/// </summary>
public UrlTableEntry this[int offset]
{
get
{
foreach(UrlTableEntry curEntry in _urlTable)
if(curEntry.EntryOffset == offset)
return curEntry;
return null;
}
}
/// <summary>
/// Gets the urltable entry of a given uniqueID
/// </summary>
public UrlTableEntry GetByUniqueID(uint uniqueID)
{
foreach(UrlTableEntry curEntry in UrlTable)
{
if(curEntry.UniqueID == uniqueID)
return curEntry;
}
return null;
}
/// <summary>
/// Implement IDisposable.
/// </summary>
public void Dispose()
{
Dispose(true);
// This object will be cleaned up by the Dispose method.
// Therefore, you should call GC.SupressFinalize to
// take this object off the finalization queue
// and prevent finalization code for this object
// from executing a second time.
GC.SuppressFinalize(this);
}
/// <summary>
/// Dispose(bool disposing) executes in two distinct scenarios.
/// If disposing equals true, the method has been called directly
/// or indirectly by a user's code. Managed and unmanaged resources
/// can be disposed.
/// If disposing equals false, the method has been called by the
/// runtime from inside the finalizer and you should not reference
/// other objects. Only unmanaged resources can be disposed.
/// </summary>
/// <param name="disposing">disposing flag</param>
private void Dispose(bool disposing)
{
// Check to see if Dispose has already been called.
if(!this.disposed)
{
// If disposing equals true, dispose all managed
// and unmanaged resources.
if(disposing)
{
// Dispose managed resources.
_binaryFileData = null;
_urlTable = null;
}
}
disposed = true;
}
}
}

View file

@ -0,0 +1,395 @@
using System;
using System.IO;
using System.Text;
using System.Diagnostics;
using System.Collections.Specialized;
using ICSharpCode.SharpZipLib;
using ICSharpCode.SharpZipLib.Zip;
using ICSharpCode.SharpZipLib.Zip.Compression;
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
using HtmlHelp;
// using HtmlHelp.Storage;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// Enumeration for specifying the dumping compression
/// </summary>
public enum DumpCompression
{
/// <summary>
/// None - no data compression will be used.
/// Fastest but most memory intensive
/// </summary>
None = 0,
/// <summary>
/// Minimum - a minimum data compression will be used.
/// Fast but not much data reduction
/// </summary>
Minimum = 1,
/// <summary>
/// Medium - a medium data compression will be used.
/// Slower but medium data reduction
/// </summary>
Medium = 2,
/// <summary>
/// Maximum - a maximum data compression will be used.
/// Slowest but maximum data reduction
/// </summary>
Maximum = 3
}
/// <summary>
/// Flags which specify which data should be dumped
/// </summary>
[FlagsAttribute()]
public enum DumpingFlags
{
/// <summary>
/// DumpTextTOC - if this flag is set, text-based TOCs (sitemap format) will be dumped
/// </summary>
DumpTextTOC = 1,
/// <summary>
/// DumpBinaryTOC - if this flag is set, binary TOCs will be dumped
/// </summary>
DumpBinaryTOC = 2,
/// <summary>
/// DumpTextIndex - if this flag is set, the text-based index (sitemap format) will be dumped
/// </summary>
DumpTextIndex = 4,
/// <summary>
/// DumpBinaryIndex - if this flag is set, the binary index will be dumped
/// </summary>
DumpBinaryIndex = 8,
/// <summary>
/// DumpStrings - if this flag is set, the internal #STRINGS file will be dumped
/// </summary>
DumpStrings = 16,
/// <summary>
/// DumpUrlStr - if this flag is set, the internal #URLSTR file will be dumped
/// </summary>
DumpUrlStr = 32,
/// <summary>
/// DumpUrlTbl - if this flag is set, the internal #URLTBL file will be dumped
/// </summary>
DumpUrlTbl = 64,
/// <summary>
/// DumpTopics - if this flag is set, the internal #TOPICS file will be dumped
/// </summary>
DumpTopics = 128,
/// <summary>
/// DumpFullText - if this flag is set, the internal $FIftiMain file will be dumped
/// </summary>
DumpFullText = 256
}
/// <summary>
/// The class <c>DumpingInfo</c> implements information properties for the CHMFile class
/// if and how data dumping should be used.
/// </summary>
public sealed class DumpingInfo
{
public bool m_bAllowSaveDump=true;
private readonly static BitVector32.Section DumpFlags = BitVector32.CreateSection(512);
private const string _dumpHeader = "HtmlHelpSystem dump file 1.0";
private string _outputDir = ""; // emtpy string means, same directory as chm file
private DumpCompression _compressionLevel = DumpCompression.Maximum;
private CHMFile _chmFile = null;
private DeflaterOutputStream _outputStream = null;
private InflaterInputStream _inputStream = null;
private BinaryWriter _writer = null;
private BinaryReader _reader = null;
private BitVector32 _flags;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="flags">Combine flag values to specify which data should be dumped.</param>
/// <param name="outputDir">output directory. emtpy string means,
/// same directory as chm file (only if destination = ExternalFile)</param>
/// <param name="compressionLevel">compression which should be used</param>
public DumpingInfo(DumpingFlags flags, string outputDir, DumpCompression compressionLevel)
{
_flags = new BitVector32(0);
int i = _flags[DumpFlags];
_flags[DumpFlags] = i | (int)flags;
_outputDir = outputDir;
_compressionLevel = compressionLevel;
}
/// <summary>
/// Gets the flag if text-based TOCs will be written to the dumping file
/// </summary>
public bool DumpTextTOC
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpTextTOC) != 0); }
}
/// <summary>
/// Gets the flag if binary TOCs will be written to the dumping file
/// </summary>
public bool DumpBinaryTOC
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpBinaryTOC) != 0); }
}
/// <summary>
/// Gets the flag if the text-based index will be written to the dumping file
/// </summary>
public bool DumpTextIndex
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpTextIndex) != 0); }
}
/// <summary>
/// Gets the flag if the binary index will be written to the dumping file
/// </summary>
public bool DumpBinaryIndex
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpBinaryIndex) != 0); }
}
/// <summary>
/// Gets the flag if the #STRINGS file will be written to the dumping file
/// </summary>
public bool DumpStrings
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpStrings) != 0); }
}
/// <summary>
/// Gets the flag if the #URLSTR file will be written to the dumping file
/// </summary>
public bool DumpUrlStr
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpUrlStr) != 0); }
}
/// <summary>
/// Gets the flag if the #URLTBL file will be written to the dumping file
/// </summary>
public bool DumpUrlTbl
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpUrlTbl) != 0); }
}
/// <summary>
/// Gets the flag if the #TOPICS file will be written to the dumping file
/// </summary>
public bool DumpTopics
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpTopics) != 0); }
}
/// <summary>
/// Gets the flag if the $FIftiMain file will be written to the dumping file
/// </summary>
public bool DumpFullText
{
get { return ((_flags[DumpFlags] & (int)DumpingFlags.DumpFullText) != 0); }
}
/// <summary>
/// Gets the dump output directory.
/// </summary>
/// <value>emtpy string means, same directory as chm file</value>
/// <remarks>If Destination is set to DumpingOutput.InternalFile this property will be ignored</remarks>
public string OutputDir
{
get { return _outputDir; }
}
/// <summary>
/// The compression level used.
/// </summary>
public DumpCompression CompressionLevel
{
get { return _compressionLevel; }
}
/// <summary>
/// Gets/Sets the CHMFile instance associated with this object
/// </summary>
internal CHMFile ChmFile
{
get { return _chmFile; }
set { _chmFile = value; }
}
/// <summary>
/// Translates the compression level to the deflater constants
/// </summary>
private int CompLvl
{
get
{
switch(CompressionLevel)
{
case DumpCompression.None: return Deflater.NO_COMPRESSION;
case DumpCompression.Minimum: return Deflater.BEST_SPEED;
case DumpCompression.Medium: return Deflater.DEFAULT_COMPRESSION;
case DumpCompression.Maximum: return Deflater.BEST_COMPRESSION;
}
return Deflater.BEST_COMPRESSION;
}
}
/// <summary>
/// Checks if a dump exists
/// </summary>
internal bool DumpExists
{
get
{
if(_flags[DumpFlags] == 0)
return false;
// we have a reader or writer to the dump so it must exist
if( (_reader != null) || (_writer != null) )
return true;
string sDmpFile = _chmFile.ChmFilePath;
sDmpFile=sDmpFile.ToLower().Replace(".chm",".CHB");
return File.Exists(sDmpFile);
}
}
/// <summary>
/// Gets a binary writer instance which allows you to write to the dump
/// </summary>
internal BinaryWriter Writer
{
get
{
if (m_bAllowSaveDump==false)
return null;
if(_flags[DumpFlags] == 0)
throw new InvalidOperationException("Nothing to dump. No flags have been set !");
if(_reader != null)
throw new InvalidOperationException("Can't write and read at the same time !");
if(_chmFile == null)
throw new InvalidOperationException("Only usable with an associated CHMFile instance !");
if(_writer==null)
{
string sDmpFile = _chmFile.ChmFilePath;
sDmpFile=sDmpFile.ToLower().Replace(".chm",".CHB");
StreamWriter stream = new StreamWriter(sDmpFile, false, _chmFile.TextEncoding);
// write header info uncompressed
BinaryWriter _hwriter = new BinaryWriter(stream.BaseStream);
_hwriter.Write(_dumpHeader);
_hwriter.Write((int)CompressionLevel);
if(_compressionLevel == DumpCompression.None)
{
_writer = new BinaryWriter(stream.BaseStream);
}
else
{
_outputStream = new DeflaterOutputStream(stream.BaseStream, new Deflater(CompLvl));
_writer = new BinaryWriter(_outputStream);
}
}
return _writer;
}
}
/// <summary>
/// Gets a binary reader which allows you to read from the dump
/// </summary>
internal BinaryReader Reader
{
get
{
if(_writer != null)
throw new InvalidOperationException("Can't write and read at the same time !");
if(_chmFile == null)
throw new InvalidOperationException("Only usable with an associated CHMFile instance !");
if(_reader==null)
{
string sDmpFile = _chmFile.ChmFilePath;
sDmpFile=sDmpFile.ToLower().Replace(".chm",".CHB");
StreamReader stream = new StreamReader(sDmpFile, _chmFile.TextEncoding);
BinaryReader _hReader = new BinaryReader(stream.BaseStream);
string sH = _hReader.ReadString();
if(sH != _dumpHeader)
{
_hReader.Close();
Debug.WriteLine("Unexpected dump-file header !");
throw new FormatException("DumpingInfo.Reader - Unexpected dump-file header !");
}
_compressionLevel = (DumpCompression)_hReader.ReadInt32();
// if(_compressionLevel != (DumpCompression)_hReader.ReadInt32())
// {
// _hReader.Close();
// return null;
// }
if(_compressionLevel == DumpCompression.None)
{
_reader = new BinaryReader(stream.BaseStream);
}
else
{
_inputStream = new InflaterInputStream(stream.BaseStream, new Inflater());
_reader = new BinaryReader(_inputStream);
}
}
return _reader;
}
}
/// <summary>
/// Saves data and closes the dump
/// </summary>
/// <returns>true if succeed</returns>
internal bool SaveData()
{
if (m_bAllowSaveDump==false)
return true;
if(_writer != null)
{
if(_writer!=null)
_writer.Close();
_outputStream = null;
_writer = null;
}
if(_reader != null)
{
if(_reader!=null)
_reader.Close();
_inputStream = null;
_reader = null;
}
return true;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,593 @@
using System;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>HHCParser</c> implements a parser for HHC contents files.
/// </summary>
internal sealed class HHCParser
{
/// <summary>
/// regular expressions for replacing the sitemap boundary tags
/// </summary>
private static string RE_ULOpening = @"\<ul\>"; // will be replaced by a '(' for nested parsing
private static string RE_ULClosing = @"\</ul\>"; // will be replaced by a ')' for nested parsing
/// <summary>
/// Matching ul-tags
/// </summary>
private static string RE_ULBoundaries = @"\<ul\>(?<innerText>.*)\</ul\>";
/// <summary>
/// Matching the nested tree structure.
/// </summary>
private static string RE_NestedBoundaries = @"\( (?> [^()]+ | \( (?<DEPTH>) | \) (?<-DEPTH>) )* (?(DEPTH)(?!)) \)";
/// <summary>
/// Matching object-tags
/// </summary>
private static string RE_ObjectBoundaries = @"\<object(?<innerText>.*?)\</object\>";
/// <summary>
/// Matching param tags
/// </summary>
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
/// <summary>
/// Extracting tag attributes
/// </summary>
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
/// <summary>
/// private regular expressionobjects
/// </summary>
private static Regex ulRE;
private static Regex NestedRE;
private static Regex ObjectRE;
private static Regex ParamRE;
private static Regex AttributesRE;
/// <summary>
/// Internal member storing the list of TOCItems which are holding merge links
/// </summary>
private static ArrayList _mergeItems = null;
/// <summary>
/// Internal member storing the last read regular topic item.
/// This is used to handle "Merge" entries and add them as child to this instance.
/// </summary>
private static TOCItem _lastTopicItem = null;
/// <summary>
/// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
/// </summary>
/// <param name="hhcFile">string content of the hhc file</param>
/// <param name="chmFile">CHMFile instance</param>
/// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
{
_lastTopicItem = null;
_mergeItems = null; // clear merged item list
ArrayList tocList = new ArrayList();
ulRE = new Regex(RE_ULBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
NestedRE = new Regex(RE_NestedBoundaries, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ObjectRE = new Regex(RE_ObjectBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
int innerTextIdx = ulRE.GroupNumberFromName("innerText");
if( ulRE.IsMatch(hhcFile, 0) )
{
Match m = ulRE.Match(hhcFile, 0);
int nFirstUL = 0;
nFirstUL = hhcFile.ToLower().IndexOf("<ul>");
if(nFirstUL == -1)
nFirstUL = hhcFile.ToLower().IndexOf("<il>");
if( ObjectRE.IsMatch(hhcFile, 0) ) // first object block contains information types and categories
{
Match mO = ObjectRE.Match(hhcFile, 0);
int iOTxt = ObjectRE.GroupNumberFromName("innerText");
string globalText = mO.Groups[iOTxt].Value;
if( mO.Groups[iOTxt].Index <= nFirstUL)
ParseGlobalSettings( globalText, chmFile );
}
// parse toc tree
string innerText = m.Groups["innerText"].Value;
innerText = innerText.Replace("(", "&#040;");
innerText = innerText.Replace(")", "&#041;");
innerText = Regex.Replace(innerText, RE_ULOpening, "(", RegexOptions.IgnoreCase);
innerText = Regex.Replace(innerText, RE_ULClosing, ")", RegexOptions.IgnoreCase);
ParseTree( innerText, null, tocList, chmFile );
}
return tocList;
}
/// <summary>
/// Checks if the hhc file contains a global object tag.
/// </summary>
/// <param name="hhcFile">string content of the hhc file</param>
/// <param name="chmFile">chm file</param>
/// <returns>true if the hhc content contains a global object tag</returns>
public static bool HasGlobalObjectTag(string hhcFile, CHMFile chmFile)
{
bool bRet = false;
ulRE = new Regex(RE_ULBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ObjectRE = new Regex(RE_ObjectBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
int innerTextIdx = ulRE.GroupNumberFromName("innerText");
if( ulRE.IsMatch(hhcFile, 0) )
{
Match m = ulRE.Match(hhcFile, 0);
int nFirstUL = 0;
nFirstUL = hhcFile.ToLower().IndexOf("<ul>");
if(nFirstUL == -1)
nFirstUL = hhcFile.ToLower().IndexOf("<il>");
if( ObjectRE.IsMatch(hhcFile, 0) ) // first object block contains information types and categories
{
Match mO = ObjectRE.Match(hhcFile, 0);
int iOTxt = ObjectRE.GroupNumberFromName("innerText");
string globalText = mO.Groups[iOTxt].Value;
if( mO.Groups[iOTxt].Index <= nFirstUL)
bRet = true;
}
}
return bRet;
}
/// <summary>
/// Gets true if the previously done parsing found merge-links
/// </summary>
public static bool HasMergeLinks
{
get
{
if(_mergeItems==null)
return false;
return _mergeItems.Count > 0;
}
}
/// <summary>
/// Gets all TOCItem references which are holding merge-links
/// </summary>
public static ArrayList MergeItems
{
get { return _mergeItems; }
}
/// <summary>
/// Recursively parses a sitemap tree
/// </summary>
/// <param name="text">content text</param>
/// <param name="parent">Parent for all read items</param>
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseTree( string text, TOCItem parent, ArrayList arrNodes, CHMFile chmFile )
{
string strPreItems="", strPostItems="";
string innerText = "";
int nIndex = 0;
while( NestedRE.IsMatch(text, nIndex) )
{
Match m = NestedRE.Match(text, nIndex);
innerText = m.Value.Substring( 1, m.Length-2);
strPreItems = text.Substring(nIndex,m.Index-nIndex);
ParseItems(strPreItems, parent, arrNodes, chmFile);
if((arrNodes.Count>0) && (innerText.Length > 0) )
{
TOCItem p = ((TOCItem)(arrNodes[arrNodes.Count-1]));
ParseTree( innerText, p, p.Children, chmFile );
}
nIndex = m.Index+m.Length;
}
if( nIndex == 0)
{
strPostItems = text.Substring(nIndex, text.Length-nIndex);
ParseItems(strPostItems, parent, arrNodes, chmFile);
}
else if( nIndex < text.Length-1)
{
strPostItems = text.Substring(nIndex, text.Length-nIndex);
ParseTree(strPostItems, parent, arrNodes, chmFile);
}
}
/// <summary>
/// Parses tree nodes from the text
/// </summary>
/// <param name="itemstext">text containing the items</param>
/// <param name="parent">Parent for all read items</param>
/// <param name="arrNodes">arraylist where the nodes should be added</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseItems( string itemstext, TOCItem parent, ArrayList arrNodes, CHMFile chmFile)
{
int innerTextIdx = ObjectRE.GroupNumberFromName("innerText");
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
int nObjStartIndex = 0;
while( ObjectRE.IsMatch(itemstext, nObjStartIndex) )
{
Match m = ObjectRE.Match(itemstext, nObjStartIndex);
string innerText = m.Groups[innerTextIdx].Value;
TOCItem tocItem = new TOCItem();
tocItem.TocMode = DataMode.TextBased;
tocItem.AssociatedFile = chmFile;
tocItem.Parent = parent;
// read parameters
int nParamIndex = 0;
while( ParamRE.IsMatch(innerText, nParamIndex) )
{
Match mP = ParamRE.Match(innerText, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
}
if( attributeName.ToLower() == "value")
{
paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
// delete trailing /
while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
paramValue = paramValue.Substring(0,paramValue.Length-1);
}
nAttrIdx = mA.Index+mA.Length;
}
tocItem.Params[paramName] = paramValue;
switch(paramName.ToLower())
{
case "name":
{
tocItem.Name = paramValue;
};break;
case "local":
{
tocItem.Local = paramValue.Replace("../", "").Replace("./", "");
};break;
case "imagenumber":
{
tocItem.ImageIndex = Int32.Parse(paramValue);
tocItem.ImageIndex-=1;
int nFolderAdd = 0;
if((chmFile != null) && (chmFile.ImageTypeFolder))
{
// get the value which should be added, to display folders instead of books
if(HtmlHelpSystem.UseHH2TreePics)
nFolderAdd = 8;
else
nFolderAdd = 4;
}
if(tocItem.ImageIndex%2 != 0)
{
if(tocItem.ImageIndex==1)
tocItem.ImageIndex=0;
}
if(HtmlHelpSystem.UseHH2TreePics)
if( tocItem.ImageIndex == 0)
tocItem.ImageIndex = TOCItem.STD_FOLDER_HH2+nFolderAdd;
};break;
case "merge": // this item contains topics or a full TOC from a merged CHM
{
tocItem.MergeLink = paramValue;
// "register" this item as merge-link
if(_mergeItems==null)
_mergeItems=new ArrayList();
_mergeItems.Add(tocItem);
};break;
case "type": // information type assignment for item
{
tocItem.InfoTypeStrings.Add( paramValue );
};break;
}
nParamIndex = mP.Index+mP.Length;
}
tocItem.ChmFile = chmFile.ChmFilePath;
if(tocItem.MergeLink.Length > 0)
{
if(_lastTopicItem != null)
{
tocItem.Parent = _lastTopicItem;
_lastTopicItem.Children.Add(tocItem);
}
else
arrNodes.Add( tocItem );
}
else
{
_lastTopicItem = tocItem;
arrNodes.Add( tocItem );
}
nObjStartIndex = m.Index+m.Length;
}
}
/// <summary>
/// Parses the very first &lt;OBJECT&gt; tag in the sitemap file and extracts
/// information types and categories.
/// </summary>
/// <param name="sText">text of the object tag</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseGlobalSettings(string sText, CHMFile chmFile)
{
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
// read parameters
int nParamIndex = 0;
// 0... unknown
// 1... inclusinve info type name
// 2... exclusive info type name
// 3... hidden info type name
// 4... category name
// 5... incl infotype name for category
// 6... excl infotype name for category
// 7... hidden infotype name for category
int prevItem = 0;
string sName = "";
string sDescription = "";
string curCategory = "";
while( ParamRE.IsMatch(sText, nParamIndex) )
{
Match mP = ParamRE.Match(sText, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
}
if( attributeName.ToLower() == "value")
{
paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
// delete trailing /
while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
paramValue = paramValue.Substring(0,paramValue.Length-1);
}
nAttrIdx = mA.Index+mA.Length;
}
switch(paramName.ToLower())
{
case "savetype": // inclusive information type name
{
prevItem = 1;
sName = paramValue;
};break;
case "savetypedesc": // description of information type
{
InformationTypeMode mode = InformationTypeMode.Inclusive;
sDescription = paramValue;
if( prevItem == 1)
mode = InformationTypeMode.Inclusive;
if( prevItem == 2)
mode = InformationTypeMode.Exclusive;
if( prevItem == 3)
mode = InformationTypeMode.Hidden;
if( chmFile.GetInformationType( sName ) == null)
{
// check if the HtmlHelpSystem already holds such an information type
if( chmFile.SystemInstance.GetInformationType( sName ) == null)
{
// info type not found yet
InformationType newType = new InformationType(sName, sDescription, mode);
chmFile.InformationTypes.Add(newType);
}
else
{
InformationType sysType = chmFile.SystemInstance.GetInformationType( sName );
chmFile.InformationTypes.Add( sysType );
}
}
prevItem = 0;
};break;
case "saveexclusive": // exclusive information type name
{
prevItem = 2;
sName = paramValue;
};break;
case "savehidden": // hidden information type name
{
prevItem = 3;
sName = paramValue;
};break;
case "category": // category name
{
prevItem = 4;
sName = paramValue;
curCategory = sName;
};break;
case "categorydesc": // category description
{
sDescription = paramValue;
if( chmFile.GetCategory( sName ) == null)
{
// check if the HtmlHelpSystem already holds such a category
if( chmFile.SystemInstance.GetCategory( sName ) == null)
{
// add category
Category newCat = new Category(sName, sDescription);
chmFile.Categories.Add(newCat);
}
else
{
Category sysCat = chmFile.SystemInstance.GetCategory( sName );
chmFile.Categories.Add( sysCat );
}
}
prevItem = 0;
};break;
case "type": // inclusive information type which is member of the previously read category
{
prevItem = 5;
sName = paramValue;
};break;
case "typedesc": // description of type for category
{
sDescription = paramValue;
Category cat = chmFile.GetCategory( curCategory );
if( cat != null)
{
// category found
InformationType infoType = chmFile.GetInformationType( sName );
if( infoType != null)
{
if( !cat.ContainsInformationType(infoType))
{
infoType.SetCategoryFlag(true);
cat.AddInformationType(infoType);
}
}
}
prevItem = 0;
};break;
case "typeexclusive": // exclusive information type which is member of the previously read category
{
prevItem = 6;
sName = paramValue;
};break;
case "typehidden": // hidden information type which is member of the previously read category
{
prevItem = 7;
sName = paramValue;
};break;
default:
{
prevItem = 0;
sName = "";
sDescription = "";
};break;
}
nParamIndex = mP.Index+mP.Length;
}
}
}
}

View file

@ -0,0 +1,220 @@
using System;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>HHCParser</c> implements a parser for HHC contents files.
/// </summary>
// internal sealed class HHCParser : IHHCParser
public class HHCParser2
{
static private string m_text1="";
static private string m_text2="";
static private int m_CurrentPos=0;
/// <summary>
/// Parses a HHC file and returns an ArrayList with the table of contents (TOC) tree
/// </summary>
/// <param name="hhcFile">string content of the hhc file</param>
/// <param name="chmFile">CHMFile instance</param>
/// <returns>Returns an ArrayList with the table of contents (TOC) tree</returns>
public static ArrayList ParseHHC(string hhcFile, CHMFile chmFile)
{
DateTime StartTime=DateTime.Now;
ArrayList tocList = new ArrayList();
m_text2=hhcFile;
m_text1=hhcFile.ToLower();
int idx=m_text1.IndexOf("<ul>");
if (idx==-1)
return null;
m_CurrentPos=idx+4;
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ParseTree(tocList,chmFile);
DateTime EndTime=DateTime.Now;
TimeSpan Diff=EndTime-StartTime;
string x=Diff.ToString();
return tocList;
}
/// <summary>
/// Recursively parses a sitemap tree
/// </summary>
/// <param name="text">content text</param>
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
/// <param name="chmFile">CHMFile instance</param>
static private void ParseTree( ArrayList arrNodes, CHMFile chmFile )
{
bool bProcessing=true;
do
{
bProcessing=false;
// Indent
int idxa=m_text1.IndexOf("<ul>",m_CurrentPos);
int idxb=m_text1.IndexOf("<li>",m_CurrentPos);
int idxc=m_text1.IndexOf("</ul>",m_CurrentPos);
if ((idxa<idxb) && (idxa<idxc) && (idxa>-1))
{
bProcessing=true;
m_CurrentPos=idxa+4;
if (arrNodes.Count<1)
{
ParseTree(arrNodes,chmFile);
}
else
{
ParseTree(((TOCItem)(arrNodes[arrNodes.Count-1])).Children,chmFile);
}
continue;
}
// new item
if ((idxb<idxa) && (idxb<idxc) && (idxb>-1))
{
bProcessing=true;
m_CurrentPos=idxb+4;
int idx2=m_text1.IndexOf("<object",m_CurrentPos);
if (idx2!=-1)
{
int idx3=m_text1.IndexOf("</object>",idx2+7);
if (idx3!=-1)
{
string text=m_text2.Substring(idx2,idx3-idx2);
m_CurrentPos=idx3+9;
// Parse items in text.
TOCItem tocItem=ParseItems(text, chmFile);
if (tocItem!=null)
{
arrNodes.Add(tocItem);
}
}
}
}
// Undent
if ((idxc<idxa) && (idxc<idxb) && (idxc>-1))
{
m_CurrentPos=idxc+5;
bProcessing=true;
return;
}
}
while (bProcessing);
}
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
private static Regex ParamRE;
private static Regex AttributesRE;
/// <summary>
/// Parses tree nodes from the text
/// </summary>
/// <param name="itemstext">text containing the items</param>
/// <param name="arrNodes">arraylist where the nodes should be added</param>
/// <param name="chmFile">CHMFile instance</param>
private static TOCItem ParseItems( string itemstext, CHMFile chmFile)
{
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
TOCItem tocItem = new TOCItem();
// read parameters
int nParamIndex = 0;
while( ParamRE.IsMatch(itemstext, nParamIndex) )
{
Match mP = ParamRE.Match(itemstext, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = attributeValue;
}
if( attributeName.ToLower() == "value")
{
paramValue = attributeValue;
}
nAttrIdx = mA.Index+mA.Length;
}
tocItem.Params[paramName] = paramValue;
switch(paramName.ToLower())
{
case "name":
{
tocItem.Name = paramValue;
};break;
case "local":
{
tocItem.Local = paramValue;
};break;
case "imagenumber":
{
tocItem.ImageIndex = Int32.Parse(paramValue);
if( tocItem.ImageIndex == 2)
tocItem.ImageIndex = TOCItem.STD_FOLDER_HH1;
};break;
}
nParamIndex = mP.Index+mP.Length;
}
tocItem.ChmFile = chmFile.ChmFilePath;
return tocItem;
}
}
}

View file

@ -0,0 +1,550 @@
using System;
using System.IO;
using System.Collections;
using System.Text;
using System.Text.RegularExpressions;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>HHKParser</c> implements a parser for HHK contents files.
/// </summary>
internal sealed class HHKParser
{
/// <summary>
/// regular expressions for replacing the sitemap boundary tags
/// </summary>
private static string RE_ULOpening = @"\<ul\>"; // will be replaced by a '(' for nested parsing
private static string RE_ULClosing = @"\</ul\>"; // will be replaced by a ')' for nested parsing
/// <summary>
/// Matching ul-tags
/// </summary>
private static string RE_ULBoundaries = @"\<ul\>(?<innerText>.*)\</ul\>";
/// <summary>
/// Matching the nested tree structure.
/// </summary>
private static string RE_NestedBoundaries = @"\( (?> [^()]+ | \( (?<DEPTH>) | \) (?<-DEPTH>) )* (?(DEPTH)(?!)) \)";
/// <summary>
/// Matching object-tags
/// </summary>
private static string RE_ObjectBoundaries = @"\<object(?<innerText>.*?)\</object\>";
/// <summary>
/// Matching param tags
/// </summary>
private static string RE_ParamBoundaries = @"\<param(?<innerText>.*?)\>";
/// <summary>
/// Extracting tag attributes
/// </summary>
private const string RE_QuoteAttributes = @"( |\t)*(?<attributeName>[\-a-zA-Z0-9]*)( |\t)*=( |\t)*(?<attributeTD>[\""\'])?(?<attributeValue>.*?(?(attributeTD)\k<attributeTD>|([\s>]|.$)))";
/// <summary>
/// private regular expressionobjects
/// </summary>
private static Regex ulRE;
private static Regex NestedRE;
private static Regex ObjectRE;
private static Regex ParamRE;
private static Regex AttributesRE;
/// <summary>
/// Parses a HHK file and returns an ArrayList with the index tree
/// </summary>
/// <param name="hhkFile">string content of the hhk file</param>
/// <param name="chmFile">CHMFile instance</param>
/// <returns>Returns an ArrayList with the index tree</returns>
public static ArrayList ParseHHK(string hhkFile, CHMFile chmFile)
{
ArrayList indexList = new ArrayList();
ulRE = new Regex(RE_ULBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
NestedRE = new Regex(RE_NestedBoundaries, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ObjectRE = new Regex(RE_ObjectBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
ParamRE = new Regex(RE_ParamBoundaries, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
AttributesRE = new Regex(RE_QuoteAttributes, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
int innerTextIdx = ulRE.GroupNumberFromName("innerText");
if( ulRE.IsMatch(hhkFile, 0) )
{
Match m = ulRE.Match(hhkFile, 0);
if( ObjectRE.IsMatch(hhkFile, 0) ) // first object block contains information types and categories
{
Match mO = ObjectRE.Match(hhkFile, 0);
int iOTxt = ObjectRE.GroupNumberFromName("innerText");
string globalText = mO.Groups[iOTxt].Value;
ParseGlobalSettings( globalText, chmFile );
}
string innerText = m.Groups["innerText"].Value;
innerText = innerText.Replace("(", "&#040;");
innerText = innerText.Replace(")", "&#041;");
innerText = Regex.Replace(innerText, RE_ULOpening, "(", RegexOptions.IgnoreCase);
innerText = Regex.Replace(innerText, RE_ULClosing, ")", RegexOptions.IgnoreCase);
ParseTree( innerText, null, indexList, chmFile );
}
return indexList;
}
/// <summary>
/// Recursively parses a sitemap tree
/// </summary>
/// <param name="text">content text</param>
/// <param name="parent">Parent for all read items</param>
/// <param name="arrNodes">arraylist which receives the extracted nodes</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseTree( string text, IndexItem parent, ArrayList arrNodes, CHMFile chmFile )
{
string strPreItems="", strPostItems="";
string innerText = "";
int nIndex = 0;
while( NestedRE.IsMatch(text, nIndex) )
{
Match m = NestedRE.Match(text, nIndex);
innerText = m.Value.Substring( 1, m.Length-2);
strPreItems = text.Substring(nIndex,m.Index-nIndex);
ParseItems(strPreItems, parent, arrNodes, chmFile);
if((arrNodes.Count>0) && (innerText.Length > 0) )
{
IndexItem p = ((IndexItem)(arrNodes[arrNodes.Count-1]));
ParseTree( innerText, p, arrNodes, chmFile );
}
nIndex = m.Index+m.Length;
}
if( nIndex == 0)
{
strPostItems = text.Substring(nIndex, text.Length-nIndex);
ParseItems(strPostItems, parent, arrNodes, chmFile);
}
else if( nIndex < text.Length-1)
{
strPostItems = text.Substring(nIndex, text.Length-nIndex);
ParseTree(strPostItems, parent, arrNodes, chmFile);
}
}
/// <summary>
/// Parses nodes from the text
/// </summary>
/// <param name="itemstext">text containing the items</param>
/// <param name="parentItem">parent index item</param>
/// <param name="arrNodes">arraylist where the nodes should be added</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseItems( string itemstext, IndexItem parentItem, ArrayList arrNodes, CHMFile chmFile)
{
int innerTextIdx = ObjectRE.GroupNumberFromName("innerText");
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
int nObjStartIndex = 0;
int nLastObjStartIndex = 0;
string sKeyword = "";
while( ObjectRE.IsMatch(itemstext, nObjStartIndex) )
{
Match m = ObjectRE.Match(itemstext, nObjStartIndex);
string innerText = m.Groups[innerTextIdx].Value;
IndexItem idxItem = new IndexItem();
// read parameters
int nParamIndex = 0;
int nNameCnt = 0;
string paramTitle = "";
string paramLocal = "";
bool bAdded = false;
while( ParamRE.IsMatch(innerText, nParamIndex) )
{
Match mP = ParamRE.Match(innerText, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
//sKeyword = "";
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
nNameCnt++;
}
if( attributeName.ToLower() == "value")
{
paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
// delete trailing /
while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
paramValue = paramValue.Substring(0,paramValue.Length-1);
}
nAttrIdx = mA.Index+mA.Length;
}
if( nNameCnt == 1) // first "Name" param = keyword
{
sKeyword = "";
if(parentItem != null)
sKeyword = parentItem.KeyWordPath + ",";
string sOldKW = sKeyword;
sKeyword += paramValue;
IndexItem idxFind = FindByKeyword(arrNodes, sKeyword);
if(idxFind != null)
{
idxItem = idxFind;
}
else
{
if( sKeyword.Split(new char[] {','}).Length > 1 )
{
idxItem.CharIndex = sKeyword.Length - paramValue.Length;
}
else
{
sKeyword = paramValue;
sOldKW = sKeyword;
idxItem.CharIndex = 0;
}
idxItem.KeyWordPath = sKeyword;
idxItem.Indent = sKeyword.Split(new char[] {','}).Length - 1;
idxItem.IsSeeAlso = false;
sKeyword = sOldKW;
}
}
else
{
if( (nNameCnt > 2) && (paramName.ToLower()=="name") )
{
bAdded = true;
IndexTopic idxTopic = new IndexTopic(paramTitle, paramLocal, chmFile.CompileFile, chmFile.ChmFilePath);
idxItem.Topics.Add( idxTopic );
paramTitle = "";
paramLocal = "";
}
switch(paramName.ToLower())
{
case "name":
//case "keyword":
{
paramTitle = paramValue;
};break;
case "local":
{
paramLocal = paramValue.Replace("../", "").Replace("./", "");
};break;
case "type": // information type assignment for item
{
idxItem.InfoTypeStrings.Add( paramValue );
};break;
case "see also":
{
idxItem.AddSeeAlso(paramValue);
idxItem.IsSeeAlso = true;
bAdded = true;
};break;
}
}
nParamIndex = mP.Index+mP.Length;
}
if(!bAdded)
{
bAdded=false;
IndexTopic idxTopic = new IndexTopic(paramTitle, paramLocal, chmFile.CompileFile, chmFile.ChmFilePath);
idxItem.Topics.Add( idxTopic );
paramTitle = "";
paramLocal = "";
}
idxItem.ChmFile = chmFile;
arrNodes.Add( idxItem );
nLastObjStartIndex = nObjStartIndex;
nObjStartIndex = m.Index+m.Length;
}
}
/// <summary>
/// Searches an index-keyword in the index list
/// </summary>
/// <param name="indexList">index list to search</param>
/// <param name="Keyword">keyword to find</param>
/// <returns>Returns an <see cref="IndexItem">IndexItem</see> instance if found, otherwise null.</returns>
private static IndexItem FindByKeyword(ArrayList indexList, string Keyword)
{
foreach(IndexItem curItem in indexList)
{
if( curItem.KeyWordPath == Keyword)
return curItem;
}
return null;
}
/// <summary>
/// Parses the very first &lt;OBJECT&gt; tag in the sitemap file and extracts
/// information types and categories.
/// </summary>
/// <param name="sText">text of the object tag</param>
/// <param name="chmFile">CHMFile instance</param>
private static void ParseGlobalSettings(string sText, CHMFile chmFile)
{
int innerPTextIdx = ParamRE.GroupNumberFromName("innerText");
// get group-name indexes
int nameIndex = AttributesRE.GroupNumberFromName("attributeName");
int valueIndex = AttributesRE.GroupNumberFromName("attributeValue");
int tdIndex = AttributesRE.GroupNumberFromName("attributeTD");
// read parameters
int nParamIndex = 0;
// 0... unknown
// 1... inclusinve info type name
// 2... exclusive info type name
// 3... hidden info type name
// 4... category name
// 5... incl infotype name for category
// 6... excl infotype name for category
// 7... hidden infotype name for category
int prevItem = 0;
string sName = "";
string sDescription = "";
string curCategory = "";
while( ParamRE.IsMatch(sText, nParamIndex) )
{
Match mP = ParamRE.Match(sText, nParamIndex);
string innerP = mP.Groups[innerPTextIdx].Value;
string paramName = "";
string paramValue = "";
int nAttrIdx = 0;
while( AttributesRE.IsMatch( innerP, nAttrIdx ) )
{
Match mA = AttributesRE.Match(innerP, nAttrIdx);
string attributeName = mA.Groups[nameIndex].Value;
string attributeValue = mA.Groups[valueIndex].Value;
string attributeTD = mA.Groups[tdIndex].Value;
if(attributeTD.Length > 0)
{
// delete the trailing textqualifier
if( attributeValue.Length > 0)
{
int ltqi = attributeValue.LastIndexOf( attributeTD );
if(ltqi >= 0)
{
attributeValue = attributeValue.Substring(0,ltqi);
}
}
}
if( attributeName.ToLower() == "name")
{
paramName = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
}
if( attributeName.ToLower() == "value")
{
paramValue = HttpUtility.HtmlDecode(attributeValue); // for unicode encoded values
// delete trailing /
while((paramValue.Length>0)&&(paramValue[paramValue.Length-1] == '/'))
paramValue = paramValue.Substring(0,paramValue.Length-1);
}
nAttrIdx = mA.Index+mA.Length;
}
switch(paramName.ToLower())
{
case "savetype": // inclusive information type name
{
prevItem = 1;
sName = paramValue;
};break;
case "savetypedesc": // description of information type
{
InformationTypeMode mode = InformationTypeMode.Inclusive;
sDescription = paramValue;
if( prevItem == 1)
mode = InformationTypeMode.Inclusive;
if( prevItem == 2)
mode = InformationTypeMode.Exclusive;
if( prevItem == 3)
mode = InformationTypeMode.Hidden;
if( chmFile.GetInformationType( sName ) == null)
{
// check if the HtmlHelpSystem already holds such an information type
if( chmFile.SystemInstance.GetInformationType( sName ) == null)
{
// info type not found yet
InformationType newType = new InformationType(sName, sDescription, mode);
chmFile.InformationTypes.Add(newType);
}
else
{
InformationType sysType = chmFile.SystemInstance.GetInformationType( sName );
chmFile.InformationTypes.Add( sysType );
}
}
prevItem = 0;
};break;
case "saveexclusive": // exclusive information type name
{
prevItem = 2;
sName = paramValue;
};break;
case "savehidden": // hidden information type name
{
prevItem = 3;
sName = paramValue;
};break;
case "category": // category name
{
prevItem = 4;
sName = paramValue;
curCategory = sName;
};break;
case "categorydesc": // category description
{
sDescription = paramValue;
if( chmFile.GetCategory( sName ) == null)
{
// check if the HtmlHelpSystem already holds such a category
if( chmFile.SystemInstance.GetCategory( sName ) == null)
{
// add category
Category newCat = new Category(sName, sDescription);
chmFile.Categories.Add(newCat);
}
else
{
Category sysCat = chmFile.SystemInstance.GetCategory( sName );
chmFile.Categories.Add( sysCat );
}
}
prevItem = 0;
};break;
case "type": // inclusive information type which is member of the previously read category
{
prevItem = 5;
sName = paramValue;
};break;
case "typedesc": // description of type for category
{
sDescription = paramValue;
Category cat = chmFile.GetCategory( curCategory );
if( cat != null)
{
// category found
InformationType infoType = chmFile.GetInformationType( sName );
if( infoType != null)
{
if( !cat.ContainsInformationType(infoType))
{
infoType.SetCategoryFlag(true);
cat.AddInformationType(infoType);
}
}
}
prevItem = 0;
};break;
case "typeexclusive": // exclusive information type which is member of the previously read category
{
prevItem = 6;
sName = paramValue;
};break;
case "typehidden": // hidden information type which is member of the previously read category
{
prevItem = 7;
sName = paramValue;
};break;
default:
{
prevItem = 0;
sName = "";
sDescription = "";
};break;
}
nParamIndex = mP.Index+mP.Length;
}
}
}
}

View file

@ -0,0 +1,245 @@
using System;
using System.IO;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>TopicEntry</c> stores the data for one topic entry
/// </summary>
internal sealed class TopicEntry
{
/// <summary>
/// Internal member storing the offset of this topic entry
/// </summary>
private int _entryOffset = 0;
/// <summary>
/// Internal member storing the index of the binary toc
/// </summary>
private int _tocidxOffset = 0;
/// <summary>
/// Internal member storing the string offset of the title
/// </summary>
private int _titleOffset = 0;
/// <summary>
/// Internal member storuing the urltable offset
/// </summary>
private int _urltableOffset = 0;
/// <summary>
/// Internal member storing the visibility mode
/// </summary>
private int _visibilityMode = 0;
/// <summary>
/// Internal member storing an unknown mode
/// </summary>
private int _unknownMode = 0;
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="entryOffset">offset of this entry</param>
/// <param name="tocidxOffset">offset in the binary toc index</param>
/// <param name="titleOffset">offset of the title (in the #STRINGS file)</param>
/// <param name="urltableOffset">offset in the urltable containing the urlstr offset for the url</param>
/// <param name="visibilityMode">visibility mode 2 indicates not in contents, 6 indicates that it is in the contents, 0/4 something else (unknown)</param>
/// <param name="unknownMode">0, 2, 4, 8, 10, 12, 16, 32 (unknown)</param>
public TopicEntry(int entryOffset, int tocidxOffset, int titleOffset, int urltableOffset, int visibilityMode, int unknownMode) :this(entryOffset, tocidxOffset, titleOffset, urltableOffset, visibilityMode, unknownMode, null)
{
}
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="entryOffset">offset of this entry</param>
/// <param name="tocidxOffset">offset in the binary toc index</param>
/// <param name="titleOffset">offset of the title (in the #STRINGS file)</param>
/// <param name="urltableOffset">offset in the urltable containing the urlstr offset for the url</param>
/// <param name="visibilityMode">visibility mode 2 indicates not in contents, 6 indicates that it is in the contents, 0/4 something else (unknown)</param>
/// <param name="unknownMode">0, 2, 4, 8, 10, 12, 16, 32 (unknown)</param>
/// <param name="associatedFile">associated chmfile object</param>
internal TopicEntry(int entryOffset, int tocidxOffset, int titleOffset, int urltableOffset, int visibilityMode, int unknownMode, CHMFile associatedFile)
{
_entryOffset = entryOffset;
_tocidxOffset = tocidxOffset;
_titleOffset = titleOffset;
_urltableOffset = urltableOffset;
_visibilityMode = visibilityMode;
_unknownMode = unknownMode;
_associatedFile = associatedFile;
}
/// <summary>
/// Standard constructor
/// </summary>
internal TopicEntry()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _entryOffset );
writer.Write( _tocidxOffset );
writer.Write( _titleOffset );
writer.Write( _urltableOffset );
writer.Write( _visibilityMode );
writer.Write( _unknownMode );
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
_entryOffset = reader.ReadInt32();
_tocidxOffset = reader.ReadInt32();
_titleOffset = reader.ReadInt32();
_urltableOffset = reader.ReadInt32();
_visibilityMode = reader.ReadInt32();
_unknownMode = reader.ReadInt32();
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
}
#endregion
/// <summary>
/// Gets the associated chm file
/// </summary>
internal CHMFile ChmFile
{
get { return _associatedFile; }
}
/// <summary>
/// Gets the offset of this entry
/// </summary>
internal int EntryOffset
{
get { return _entryOffset; }
}
/// <summary>
/// Gets the tocidx offset
/// </summary>
internal int TOCIdxOffset
{
get { return _tocidxOffset; }
}
/// <summary>
/// Gets the title offset of the #STRINGS file
/// </summary>
internal int TitleOffset
{
get { return _titleOffset; }
}
/// <summary>
/// Gets the urltable offset
/// </summary>
internal int UrlTableOffset
{
get { return _urltableOffset; }
}
/// <summary>
/// Gets the title of the topic entry
/// </summary>
public string Title
{
get
{
if( _associatedFile == null)
return String.Empty;
if( _associatedFile.StringsFile == null)
return String.Empty;
string sTemp = (string)_associatedFile.StringsFile[ _titleOffset ];
if(sTemp == null)
return String.Empty;
return sTemp;
}
}
/// <summary>
/// Gets the url of the topic
/// </summary>
public string Locale
{
get
{
if( _associatedFile == null)
return String.Empty;
if( _associatedFile.UrltblFile == null)
return String.Empty;
UrlTableEntry utEntry = (UrlTableEntry)_associatedFile.UrltblFile[ _urltableOffset ];
if(utEntry == null)
return String.Empty;
if(utEntry.URL == "")
return String.Empty;
return utEntry.URL;
}
}
/// <summary>
/// Gets the URL of this topic
/// </summary>
public string URL
{
get
{
if(Locale.Length <= 0)
return "about:blank";
if( (Locale.ToLower().IndexOf("http://") >= 0) ||
(Locale.ToLower().IndexOf("https://") >= 0) ||
(Locale.ToLower().IndexOf("mailto:") >= 0) ||
(Locale.ToLower().IndexOf("ftp://") >= 0) ||
(Locale.ToLower().IndexOf("ms-its:") >= 0))
return Locale;
return HtmlHelpSystem.UrlPrefix + _associatedFile.ChmFilePath + "::/" + Locale;
}
}
/// <summary>
/// Gets the visibility mode
/// </summary>
public int VisibilityMode
{
get { return _visibilityMode; }
}
/// <summary>
/// Gets the unknown mode
/// </summary>
public int UknownMode
{
get { return _unknownMode; }
}
}
}

View file

@ -0,0 +1,175 @@
using System;
using System.IO;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// The class <c>UrlTableEntry</c> stores data for an URL-Table entry
/// </summary>
internal sealed class UrlTableEntry
{
/// <summary>
/// Internal member storing the offset of this entry
/// </summary>
private int _entryOffset = 0;
/// <summary>
/// Internal member storing a unique id
/// </summary>
private uint _uniqueID = 0;
/// <summary>
/// Internal member storing the topics index
/// </summary>
private int _topicsIndex = 0;
/// <summary>
/// Internal member storing the offset in the urlstr table
/// </summary>
private int _urlStrOffset = 0;
/// <summary>
/// Internal member storing the associated chmfile object
/// </summary>
private CHMFile _associatedFile = null;
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="uniqueID">unique id</param>
/// <param name="entryOffset">offset of the entry</param>
/// <param name="topicIndex">topic index</param>
/// <param name="urlstrOffset">urlstr offset for filename</param>
public UrlTableEntry(uint uniqueID, int entryOffset, int topicIndex, int urlstrOffset) : this(uniqueID, entryOffset, topicIndex, urlstrOffset, null)
{
}
/// <summary>
/// Constructor of the class
/// </summary>
/// <param name="uniqueID">unique id</param>
/// <param name="entryOffset">offset of the entry</param>
/// <param name="topicIndex">topic index</param>
/// <param name="urlstrOffset">urlstr offset for filename</param>
/// <param name="associatedFile">associated chm file</param>
internal UrlTableEntry(uint uniqueID, int entryOffset, int topicIndex, int urlstrOffset, CHMFile associatedFile)
{
_uniqueID = uniqueID;
_entryOffset = entryOffset;
_topicsIndex = topicIndex;
_urlStrOffset = urlstrOffset;
_associatedFile = associatedFile;
}
/// <summary>
/// Standard constructor
/// </summary>
internal UrlTableEntry()
{
}
#region Data dumping
/// <summary>
/// Dump the class data to a binary writer
/// </summary>
/// <param name="writer">writer to write the data</param>
internal void Dump(ref BinaryWriter writer)
{
writer.Write( _urlStrOffset );
writer.Write( _entryOffset );
writer.Write( _topicsIndex );
writer.Write( _urlStrOffset );
}
/// <summary>
/// Reads the object data from a dump store
/// </summary>
/// <param name="reader">reader to read the data</param>
internal void ReadDump(ref BinaryReader reader)
{
_urlStrOffset = reader.ReadInt32();
_entryOffset = reader.ReadInt32();
_topicsIndex = reader.ReadInt32();
_urlStrOffset = reader.ReadInt32();
}
/// <summary>
/// Sets the associated CHMFile instance
/// </summary>
/// <param name="associatedFile">instance to set</param>
internal void SetCHMFile(CHMFile associatedFile)
{
_associatedFile = associatedFile;
}
#endregion
/// <summary>
/// Gets the unique id of the entry
/// </summary>
internal uint UniqueID
{
get {return _uniqueID; }
}
/// <summary>
/// Gets the offset of the entry
/// </summary>
internal int EntryOffset
{
get {return _entryOffset; }
}
/// <summary>
/// Gets the topics index
/// </summary>
internal int TopicIndex
{
get {return _topicsIndex; }
}
/// <summary>
/// Gets the urlstr offset
/// </summary>
internal int UrlstrOffset
{
get { return _urlStrOffset; }
}
/// <summary>
/// Gets the url of the entry
/// </summary>
public string URL
{
get
{
if(_associatedFile == null)
return String.Empty;
if(_associatedFile.UrlstrFile == null)
return String.Empty;
string sTemp = (string)_associatedFile.UrlstrFile.GetURLatOffset( _urlStrOffset );
if( sTemp == null)
return String.Empty;
return sTemp;
}
}
/// <summary>
/// Gets the associated topic for this url entry
/// </summary>
internal TopicEntry Topic
{
get
{
if(_associatedFile == null)
return null;
if(_associatedFile.TopicsFile == null)
return null;
TopicEntry tentry = _associatedFile.TopicsFile[ _topicsIndex*16 ];
return tentry;
}
}
}
}

View file

@ -0,0 +1,19 @@
using System;
namespace HtmlHelp.ChmDecoding
{
/// <summary>
/// Enumeration for specifying the extraction mode of an toc or index item.
/// </summary>
public enum DataMode
{
/// <summary>
/// TextBased - this item comes from a text-based sitemap file
/// </summary>
TextBased = 0,
/// <summary>
/// Binary - this item was extracted out of a binary stream
/// </summary>
Binary = 1
}
}