mirror of
https://github.com/reactos/reactos.git
synced 2025-03-10 10:14:44 +00:00

* sumatrapdf - vendor import * everything compiles (libjpeg, poppler, fitz, sumatrapdf) * does NOT link (remove the comment tags in the parent directory.rbuild file (rosapps dir) to build it) svn path=/trunk/; revision=29295
1166 lines
21 KiB
C
1166 lines
21 KiB
C
/*
|
|
* The CMap data structure here is constructed on the fly by
|
|
* adding simple range-to-range mappings. Then the data structure
|
|
* is optimized to contain both range-to-range and range-to-table
|
|
* lookups.
|
|
*
|
|
* Any one-to-many mappings are inserted as one-to-table
|
|
* lookups in the beginning, and are not affected by the optimization
|
|
* stage.
|
|
*
|
|
* There is a special function to add a 256-length range-to-table mapping.
|
|
* The ranges do not have to be added in order.
|
|
*
|
|
* This code can be a lot simpler if we don't care about wasting memory,
|
|
* or can trust the parser to give us optimal mappings.
|
|
*/
|
|
|
|
#include "fitz.h"
|
|
#include "mupdf.h"
|
|
|
|
typedef struct pdf_range_s pdf_range;
|
|
|
|
enum { MAXCODESPACE = 10 };
|
|
enum { SINGLE, RANGE, TABLE, MULTI };
|
|
|
|
struct pdf_range_s
|
|
{
|
|
int low;
|
|
int high;
|
|
int flag; /* what kind of lookup is this */
|
|
int offset; /* either range-delta or table-index */
|
|
};
|
|
|
|
static int
|
|
cmprange(const void *va, const void *vb)
|
|
{
|
|
return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
|
|
}
|
|
|
|
struct pdf_cmap_s
|
|
{
|
|
int refs;
|
|
char cmapname[32];
|
|
|
|
char usecmapname[32];
|
|
pdf_cmap *usecmap;
|
|
|
|
int wmode;
|
|
|
|
int ncspace;
|
|
struct {
|
|
int n;
|
|
unsigned char lo[4];
|
|
unsigned char hi[4];
|
|
} cspace[MAXCODESPACE];
|
|
|
|
int rlen, rcap;
|
|
pdf_range *ranges;
|
|
|
|
int tlen, tcap;
|
|
int *table;
|
|
};
|
|
|
|
/*
|
|
* Allocate, destroy and simple parameters.
|
|
*/
|
|
|
|
fz_error *
|
|
pdf_newcmap(pdf_cmap **cmapp)
|
|
{
|
|
pdf_cmap *cmap;
|
|
|
|
cmap = *cmapp = fz_malloc(sizeof(pdf_cmap));
|
|
if (!cmap)
|
|
return fz_outofmem;
|
|
|
|
cmap->refs = 1;
|
|
strcpy(cmap->cmapname, "");
|
|
|
|
strcpy(cmap->usecmapname, "");
|
|
cmap->usecmap = nil;
|
|
|
|
cmap->wmode = 0;
|
|
|
|
cmap->ncspace = 0;
|
|
|
|
cmap->rlen = 0;
|
|
cmap->rcap = 0;
|
|
cmap->ranges = nil;
|
|
|
|
cmap->tlen = 0;
|
|
cmap->tcap = 0;
|
|
cmap->table = nil;
|
|
|
|
return nil;
|
|
}
|
|
|
|
pdf_cmap *
|
|
pdf_keepcmap(pdf_cmap *cmap)
|
|
{
|
|
cmap->refs ++;
|
|
return cmap;
|
|
}
|
|
|
|
void
|
|
pdf_dropcmap(pdf_cmap *cmap)
|
|
{
|
|
if (--cmap->refs == 0)
|
|
{
|
|
if (cmap->usecmap)
|
|
pdf_dropcmap(cmap->usecmap);
|
|
fz_free(cmap->ranges);
|
|
fz_free(cmap->table);
|
|
fz_free(cmap);
|
|
}
|
|
}
|
|
|
|
pdf_cmap *
|
|
pdf_getusecmap(pdf_cmap *cmap)
|
|
{
|
|
return cmap->usecmap;
|
|
}
|
|
|
|
void
|
|
pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap)
|
|
{
|
|
int i;
|
|
|
|
if (cmap->usecmap)
|
|
pdf_dropcmap(cmap->usecmap);
|
|
cmap->usecmap = pdf_keepcmap(usecmap);
|
|
|
|
if (cmap->ncspace == 0)
|
|
{
|
|
cmap->ncspace = usecmap->ncspace;
|
|
for (i = 0; i < usecmap->ncspace; i++)
|
|
cmap->cspace[i] = usecmap->cspace[i];
|
|
}
|
|
}
|
|
|
|
int
|
|
pdf_getwmode(pdf_cmap *cmap)
|
|
{
|
|
return cmap->wmode;
|
|
}
|
|
|
|
void
|
|
pdf_setwmode(pdf_cmap *cmap, int wmode)
|
|
{
|
|
cmap->wmode = wmode;
|
|
}
|
|
|
|
void
|
|
pdf_debugcmap(pdf_cmap *cmap)
|
|
{
|
|
int i, k, n;
|
|
|
|
printf("cmap $%p /%s {\n", cmap, cmap->cmapname);
|
|
|
|
if (cmap->usecmapname[0])
|
|
printf(" usecmap /%s\n", cmap->usecmapname);
|
|
if (cmap->usecmap)
|
|
printf(" usecmap $%p\n", cmap->usecmap);
|
|
|
|
printf(" wmode %d\n", cmap->wmode);
|
|
|
|
printf(" codespaces {\n");
|
|
for (i = 0; i < cmap->ncspace; i++)
|
|
{
|
|
printf(" <");
|
|
for (k = 0; k < cmap->cspace[i].n; k++)
|
|
printf("%02x", cmap->cspace[i].lo[k]);
|
|
printf("> <");
|
|
for (k = 0; k < cmap->cspace[i].n; k++)
|
|
printf("%02x", cmap->cspace[i].hi[k]);
|
|
printf(">\n");
|
|
}
|
|
printf(" }\n");
|
|
|
|
printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
|
|
for (i = 0; i < cmap->rlen; i++)
|
|
{
|
|
pdf_range *r = &cmap->ranges[i];
|
|
printf(" <%04x> <%04x> ", r->low, r->high);
|
|
if (r->flag == TABLE)
|
|
{
|
|
printf("[ ");
|
|
for (k = 0; k < r->high - r->low + 1; k++)
|
|
printf("%d ", cmap->table[r->offset + k]);
|
|
printf("]\n");
|
|
}
|
|
else if (r->flag == MULTI)
|
|
{
|
|
printf("< ");
|
|
n = cmap->table[r->offset];
|
|
for (k = 0; k < n; k++)
|
|
printf("%04x ", cmap->table[r->offset + 1 + k]);
|
|
printf(">\n");
|
|
}
|
|
else
|
|
printf("%d\n", r->offset);
|
|
}
|
|
printf(" }\n}\n");
|
|
}
|
|
|
|
/*
|
|
* Add a codespacerange section.
|
|
* These ranges are used by pdf_decodecmap to decode
|
|
* multi-byte encoded strings.
|
|
*/
|
|
fz_error *
|
|
pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n)
|
|
{
|
|
int i;
|
|
|
|
if (cmap->ncspace + 1 == MAXCODESPACE)
|
|
return fz_throw("rangelimit: too many code space ranges");
|
|
|
|
cmap->cspace[cmap->ncspace].n = n;
|
|
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
int o = (n - i - 1) * 8;
|
|
cmap->cspace[cmap->ncspace].lo[i] = (lo >> o) & 0xFF;
|
|
cmap->cspace[cmap->ncspace].hi[i] = (hi >> o) & 0xFF;
|
|
}
|
|
|
|
cmap->ncspace ++;
|
|
|
|
return nil;
|
|
}
|
|
|
|
/*
|
|
* Add an integer to the table.
|
|
*/
|
|
static fz_error *
|
|
addtable(pdf_cmap *cmap, int value)
|
|
{
|
|
if (cmap->tlen + 1 > cmap->tcap)
|
|
{
|
|
int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2;
|
|
int *newtable = fz_realloc(cmap->table, newcap * sizeof(int));
|
|
if (!newtable)
|
|
return fz_outofmem;
|
|
cmap->tcap = newcap;
|
|
cmap->table = newtable;
|
|
}
|
|
|
|
cmap->table[cmap->tlen++] = value;
|
|
|
|
return nil;
|
|
}
|
|
|
|
/*
|
|
* Add a range.
|
|
*/
|
|
static fz_error *
|
|
addrange(pdf_cmap *cmap, int low, int high, int flag, int offset)
|
|
{
|
|
if (cmap->rlen + 1 > cmap->rcap)
|
|
{
|
|
pdf_range *newranges;
|
|
int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2;
|
|
newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range));
|
|
if (!newranges)
|
|
return fz_outofmem;
|
|
cmap->rcap = newcap;
|
|
cmap->ranges = newranges;
|
|
}
|
|
|
|
cmap->ranges[cmap->rlen].low = low;
|
|
cmap->ranges[cmap->rlen].high = high;
|
|
cmap->ranges[cmap->rlen].flag = flag;
|
|
cmap->ranges[cmap->rlen].offset = offset;
|
|
cmap->rlen ++;
|
|
|
|
return nil;
|
|
}
|
|
|
|
/*
|
|
* Add a range-to-table mapping.
|
|
*/
|
|
fz_error *
|
|
pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len)
|
|
{
|
|
fz_error *error;
|
|
int offset;
|
|
int high;
|
|
int i;
|
|
|
|
high = low + len;
|
|
offset = cmap->tlen;
|
|
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
error = addtable(cmap, table[i]);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
return addrange(cmap, low, high, TABLE, offset);
|
|
}
|
|
|
|
/*
|
|
* Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25)
|
|
*/
|
|
fz_error *
|
|
pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset)
|
|
{
|
|
return addrange(cmap, low, high, high - low == 0 ? SINGLE : RANGE, offset);
|
|
}
|
|
|
|
/*
|
|
* Add a single one-to-many mapping.
|
|
*/
|
|
fz_error *
|
|
pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len)
|
|
{
|
|
fz_error *error;
|
|
int offset;
|
|
int i;
|
|
|
|
if (len == 1)
|
|
return addrange(cmap, low, low, SINGLE, values[0]);
|
|
|
|
offset = cmap->tlen;
|
|
|
|
error = addtable(cmap, len);
|
|
if (error)
|
|
return error;
|
|
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
addtable(cmap, values[i]);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
return addrange(cmap, low, low, MULTI, offset);
|
|
}
|
|
|
|
/*
|
|
* Sort the input ranges.
|
|
* Merge contiguous input ranges to range-to-range if the output is contiguos.
|
|
* Merge contiguous input ranges to range-to-table if the output is random.
|
|
*/
|
|
fz_error *
|
|
pdf_sortcmap(pdf_cmap *cmap)
|
|
{
|
|
fz_error *error;
|
|
pdf_range *newranges;
|
|
int *newtable;
|
|
pdf_range *a; /* last written range on output */
|
|
pdf_range *b; /* current range examined on input */
|
|
|
|
qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
|
|
|
|
a = cmap->ranges;
|
|
b = cmap->ranges + 1;
|
|
|
|
while (b < cmap->ranges + cmap->rlen)
|
|
{
|
|
/* ignore one-to-many mappings */
|
|
if (b->flag == MULTI)
|
|
{
|
|
*(++a) = *b;
|
|
}
|
|
|
|
/* input contiguous */
|
|
else if (a->high + 1 == b->low)
|
|
{
|
|
/* output contiguous */
|
|
if (a->high - a->low + a->offset + 1 == b->offset)
|
|
{
|
|
/* SR -> R and SS -> R and RR -> R and RS -> R */
|
|
if (a->flag == SINGLE || a->flag == RANGE)
|
|
{
|
|
a->flag = RANGE;
|
|
a->high = b->high;
|
|
}
|
|
|
|
/* LS -> L */
|
|
else if (a->flag == TABLE && b->flag == SINGLE)
|
|
{
|
|
a->high = b->high;
|
|
error = addtable(cmap, b->offset);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/* LR -> LR */
|
|
else if (a->flag == TABLE && b->flag == RANGE)
|
|
{
|
|
*(++a) = *b;
|
|
}
|
|
|
|
/* XX -> XX */
|
|
else
|
|
{
|
|
*(++a) = *b;
|
|
}
|
|
}
|
|
|
|
/* output separated */
|
|
else
|
|
{
|
|
/* SS -> L */
|
|
if (a->flag == SINGLE && b->flag == SINGLE)
|
|
{
|
|
a->flag = TABLE;
|
|
a->high = b->high;
|
|
|
|
error = addtable(cmap, a->offset);
|
|
if (error)
|
|
return error;
|
|
|
|
error = addtable(cmap, b->offset);
|
|
if (error)
|
|
return error;
|
|
|
|
a->offset = cmap->tlen - 2;
|
|
}
|
|
|
|
/* LS -> L */
|
|
else if (a->flag == TABLE && b->flag == SINGLE)
|
|
{
|
|
a->high = b->high;
|
|
error = addtable(cmap, b->offset);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/* XX -> XX */
|
|
else
|
|
{
|
|
*(++a) = *b;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* input separated: XX -> XX */
|
|
else
|
|
{
|
|
*(++a) = *b;
|
|
}
|
|
|
|
b ++;
|
|
}
|
|
|
|
cmap->rlen = a - cmap->ranges + 1;
|
|
|
|
assert(cmap->rlen > 0);
|
|
|
|
newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range));
|
|
if (!newranges)
|
|
return fz_outofmem;
|
|
cmap->rcap = cmap->rlen;
|
|
cmap->ranges = newranges;
|
|
|
|
if (cmap->tlen)
|
|
{
|
|
newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int));
|
|
if (!newtable)
|
|
return fz_outofmem;
|
|
cmap->tcap = cmap->tlen;
|
|
cmap->table = newtable;
|
|
}
|
|
|
|
return nil;
|
|
}
|
|
|
|
/*
|
|
* Lookup the mapping of a codepoint.
|
|
*/
|
|
int
|
|
pdf_lookupcmap(pdf_cmap *cmap, int cpt)
|
|
{
|
|
int l = 0;
|
|
int r = cmap->rlen - 1;
|
|
int m;
|
|
|
|
while (l <= r)
|
|
{
|
|
m = (l + r) >> 1;
|
|
if (cpt < cmap->ranges[m].low)
|
|
r = m - 1;
|
|
else if (cpt > cmap->ranges[m].high)
|
|
l = m + 1;
|
|
else
|
|
{
|
|
int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
|
|
if (cmap->ranges[m].flag == TABLE)
|
|
return cmap->table[i];
|
|
if (cmap->ranges[m].flag == MULTI)
|
|
return -1;
|
|
return i;
|
|
}
|
|
}
|
|
|
|
if (cmap->usecmap)
|
|
return pdf_lookupcmap(cmap->usecmap, cpt);
|
|
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Use the codespace ranges to extract a codepoint from a
|
|
* multi-byte encoded string.
|
|
*/
|
|
unsigned char *
|
|
pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
|
|
{
|
|
int i, k;
|
|
|
|
for (k = 0; k < cmap->ncspace; k++)
|
|
{
|
|
unsigned char *lo = cmap->cspace[k].lo;
|
|
unsigned char *hi = cmap->cspace[k].hi;
|
|
int n = cmap->cspace[k].n;
|
|
int c = 0;
|
|
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
if (lo[i] <= buf[i] && buf[i] <= hi[i])
|
|
c = (c << 8) | buf[i];
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (i == n) {
|
|
*cpt = c;
|
|
return buf + n;
|
|
}
|
|
}
|
|
|
|
*cpt = 0;
|
|
return buf + 1;
|
|
}
|
|
|
|
/*
|
|
* CMap parser
|
|
*/
|
|
|
|
enum
|
|
{
|
|
TUSECMAP = PDF_NTOKENS,
|
|
TBEGINCODESPACERANGE,
|
|
TENDCODESPACERANGE,
|
|
TBEGINBFCHAR,
|
|
TENDBFCHAR,
|
|
TBEGINBFRANGE,
|
|
TENDBFRANGE,
|
|
TBEGINCIDCHAR,
|
|
TENDCIDCHAR,
|
|
TBEGINCIDRANGE,
|
|
TENDCIDRANGE
|
|
};
|
|
|
|
static int tokenfromkeyword(char *key)
|
|
{
|
|
if (!strcmp(key, "usecmap")) return TUSECMAP;
|
|
if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE;
|
|
if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE;
|
|
if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR;
|
|
if (!strcmp(key, "endbfchar")) return TENDBFCHAR;
|
|
if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE;
|
|
if (!strcmp(key, "endbfrange")) return TENDBFRANGE;
|
|
if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR;
|
|
if (!strcmp(key, "endcidchar")) return TENDCIDCHAR;
|
|
if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE;
|
|
if (!strcmp(key, "endcidrange")) return TENDCIDRANGE;
|
|
return PDF_TKEYWORD;
|
|
}
|
|
|
|
static int codefromstring(unsigned char *buf, int len)
|
|
{
|
|
int a = 0;
|
|
while (len--)
|
|
a = (a << 8) | *buf++;
|
|
return a;
|
|
}
|
|
|
|
static int mylex(fz_stream *file, char *buf, int n, int *sl)
|
|
{
|
|
int token = pdf_lex(file, buf, n, sl);
|
|
if (token == PDF_TKEYWORD)
|
|
token = tokenfromkeyword(buf);
|
|
return token;
|
|
}
|
|
|
|
static fz_error *parsecmapname(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token == PDF_TNAME) {
|
|
strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname));
|
|
return nil;
|
|
}
|
|
|
|
return fz_throw("syntaxerror in CMap after /CMapName");
|
|
}
|
|
|
|
static fz_error *parsewmode(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token == PDF_TINT) {
|
|
pdf_setwmode(cmap, atoi(buf));
|
|
return nil;
|
|
}
|
|
|
|
return fz_throw("syntaxerror in CMap after /WMode");
|
|
}
|
|
|
|
static fz_error *parsecodespacerange(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int lo, hi;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == TENDCODESPACERANGE)
|
|
return nil;
|
|
|
|
else if (token == PDF_TSTRING)
|
|
{
|
|
lo = codefromstring(buf, len);
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token == PDF_TSTRING)
|
|
{
|
|
hi = codefromstring(buf, len);
|
|
error = pdf_addcodespace(cmap, lo, hi, len);
|
|
if (error)
|
|
return error;
|
|
}
|
|
else break;
|
|
}
|
|
|
|
else break;
|
|
}
|
|
|
|
return fz_throw("syntaxerror in CMap codespacerange section");
|
|
}
|
|
|
|
static fz_error *parsecidrange(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int lo, hi, dst;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == TENDCIDRANGE)
|
|
return nil;
|
|
|
|
else if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
lo = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
hi = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token != PDF_TINT)
|
|
goto cleanup;
|
|
|
|
dst = atoi(buf);
|
|
|
|
error = pdf_maprangetorange(cmap, lo, hi, dst);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
cleanup:
|
|
return fz_throw("syntaxerror in CMap cidrange section");
|
|
}
|
|
|
|
static fz_error *parsecidchar(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int src, dst;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == TENDCIDCHAR)
|
|
return nil;
|
|
|
|
else if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
src = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token != PDF_TINT)
|
|
goto cleanup;
|
|
|
|
dst = atoi(buf);
|
|
|
|
error = pdf_maprangetorange(cmap, src, src, dst);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
cleanup:
|
|
return fz_throw("syntaxerror in CMap cidchar section");
|
|
}
|
|
|
|
static fz_error *parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int dst[256];
|
|
int i;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
/* Note: does not handle [ /Name /Name ... ] */
|
|
|
|
if (token == PDF_TCARRAY)
|
|
return nil;
|
|
|
|
else if (token != PDF_TSTRING)
|
|
return fz_throw("syntaxerror in CMap bfrange array section");
|
|
|
|
if (len / 2)
|
|
{
|
|
for (i = 0; i < len / 2; i++)
|
|
dst[i] = codefromstring(buf + i * 2, 2);
|
|
|
|
error = pdf_maponetomany(cmap, lo, dst, len / 2);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
lo ++;
|
|
}
|
|
}
|
|
|
|
static fz_error *parsebfrange(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int lo, hi, dst;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == TENDBFRANGE)
|
|
return nil;
|
|
|
|
else if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
lo = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
hi = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == PDF_TSTRING)
|
|
{
|
|
if (len == 2)
|
|
{
|
|
dst = codefromstring(buf, len);
|
|
error = pdf_maprangetorange(cmap, lo, hi, dst);
|
|
if (error)
|
|
return error;
|
|
}
|
|
else
|
|
{
|
|
int dststr[256];
|
|
int i;
|
|
|
|
if (len / 2)
|
|
{
|
|
for (i = 0; i < len / 2; i++)
|
|
dststr[i] = codefromstring(buf + i * 2, 2);
|
|
|
|
while (lo <= hi)
|
|
{
|
|
dststr[i-1] ++;
|
|
error = pdf_maponetomany(cmap, lo, dststr, i);
|
|
if (error)
|
|
return error;
|
|
lo ++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
else if (token == PDF_TOARRAY)
|
|
{
|
|
error = parsebfrangearray(cmap, file, lo, hi);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
else
|
|
{
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
cleanup:
|
|
return fz_throw("syntaxerror in CMap bfrange section");
|
|
}
|
|
|
|
static fz_error *parsebfchar(pdf_cmap *cmap, fz_stream *file)
|
|
{
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
fz_error *error;
|
|
int dst[256];
|
|
int src;
|
|
int i;
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == TENDBFCHAR)
|
|
return nil;
|
|
|
|
else if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
src = codefromstring(buf, len);
|
|
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
/* Note: does not handle /dstName */
|
|
if (token != PDF_TSTRING)
|
|
goto cleanup;
|
|
|
|
if (len / 2)
|
|
{
|
|
for (i = 0; i < len / 2; i++)
|
|
dst[i] = codefromstring(buf + i * 2, 2);
|
|
|
|
error = pdf_maponetomany(cmap, src, dst, i);
|
|
if (error)
|
|
return error;
|
|
}
|
|
}
|
|
|
|
cleanup:
|
|
return fz_throw("syntaxerror in CMap bfchar section");
|
|
}
|
|
|
|
fz_error *
|
|
pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file)
|
|
{
|
|
fz_error *error;
|
|
pdf_cmap *cmap;
|
|
char key[64];
|
|
char buf[256];
|
|
int token;
|
|
int len;
|
|
|
|
error = pdf_newcmap(&cmap);
|
|
if (error)
|
|
return error;
|
|
|
|
strcpy(key, ".notdef");
|
|
|
|
while (1)
|
|
{
|
|
token = mylex(file, buf, sizeof buf, &len);
|
|
|
|
if (token == PDF_TEOF)
|
|
break;
|
|
|
|
else if (token == PDF_TERROR)
|
|
{
|
|
error = fz_throw("syntaxerror in CMap");
|
|
goto cleanup;
|
|
}
|
|
|
|
else if (token == PDF_TNAME)
|
|
{
|
|
if (!strcmp(buf, "CMapName"))
|
|
{
|
|
error = parsecmapname(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
else if (!strcmp(buf, "WMode"))
|
|
{
|
|
error = parsewmode(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
else
|
|
strlcpy(key, buf, sizeof key);
|
|
}
|
|
|
|
else if (token == TUSECMAP)
|
|
{
|
|
strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname));
|
|
}
|
|
|
|
else if (token == TBEGINCODESPACERANGE)
|
|
{
|
|
error = parsecodespacerange(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
|
|
else if (token == TBEGINBFCHAR)
|
|
{
|
|
error = parsebfchar(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
|
|
else if (token == TBEGINCIDCHAR)
|
|
{
|
|
error = parsecidchar(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
|
|
else if (token == TBEGINBFRANGE)
|
|
{
|
|
error = parsebfrange(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
|
|
else if (token == TBEGINCIDRANGE)
|
|
{
|
|
error = parsecidrange(cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
}
|
|
|
|
/* ignore everything else */
|
|
}
|
|
|
|
error = pdf_sortcmap(cmap);
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
*cmapp = cmap;
|
|
return nil;
|
|
|
|
cleanup:
|
|
pdf_dropcmap(cmap);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Load CMap stream in PDF file
|
|
*/
|
|
fz_error *
|
|
pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref)
|
|
{
|
|
fz_obj *stmobj = stmref;
|
|
fz_error *error = nil;
|
|
fz_stream *file;
|
|
pdf_cmap *cmap = nil;
|
|
pdf_cmap *usecmap;
|
|
fz_obj *wmode;
|
|
fz_obj *obj;
|
|
|
|
if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref)))
|
|
{
|
|
pdf_keepcmap(*cmapp);
|
|
return nil;
|
|
}
|
|
|
|
pdf_logfont("load embedded cmap %d %d {\n", fz_tonum(stmref), fz_togen(stmref));
|
|
|
|
error = pdf_resolve(&stmobj, xref);
|
|
if (error)
|
|
return error;
|
|
|
|
error = pdf_openstream(&file, xref, fz_tonum(stmref), fz_togen(stmref));
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
error = pdf_parsecmap(&cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
fz_dropstream(file);
|
|
|
|
wmode = fz_dictgets(stmobj, "WMode");
|
|
if (fz_isint(wmode))
|
|
{
|
|
pdf_logfont("wmode %d\n", wmode);
|
|
pdf_setwmode(cmap, fz_toint(wmode));
|
|
}
|
|
|
|
obj = fz_dictgets(stmobj, "UseCMap");
|
|
if (fz_isname(obj))
|
|
{
|
|
pdf_logfont("usecmap /%s\n", fz_toname(obj));
|
|
error = pdf_loadsystemcmap(&usecmap, fz_toname(obj));
|
|
if (error)
|
|
goto cleanup;
|
|
pdf_setusecmap(cmap, usecmap);
|
|
pdf_dropcmap(usecmap);
|
|
}
|
|
else if (fz_isindirect(obj))
|
|
{
|
|
pdf_logfont("usecmap %d %d R\n", fz_tonum(obj), fz_togen(obj));
|
|
error = pdf_loadembeddedcmap(&usecmap, xref, obj);
|
|
if (error)
|
|
goto cleanup;
|
|
pdf_setusecmap(cmap, usecmap);
|
|
pdf_dropcmap(usecmap);
|
|
}
|
|
|
|
pdf_logfont("}\n");
|
|
|
|
error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap);
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
fz_dropobj(stmobj);
|
|
|
|
*cmapp = cmap;
|
|
return nil;
|
|
|
|
cleanup:
|
|
if (cmap)
|
|
pdf_dropcmap(cmap);
|
|
fz_dropobj(stmobj);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Load predefined CMap from system
|
|
*/
|
|
fz_error *
|
|
pdf_loadsystemcmap(pdf_cmap **cmapp, char *name)
|
|
{
|
|
fz_error *error = nil;
|
|
fz_stream *file;
|
|
char *cmapdir;
|
|
char *usecmapname;
|
|
pdf_cmap *usecmap;
|
|
pdf_cmap *cmap;
|
|
char path[1024];
|
|
|
|
cmap = nil;
|
|
file = nil;
|
|
|
|
pdf_logfont("load system cmap %s {\n", name);
|
|
|
|
cmapdir = getenv("CMAPDIR");
|
|
if (!cmapdir)
|
|
return fz_throw("ioerror: CMAPDIR environment not set");
|
|
|
|
strlcpy(path, cmapdir, sizeof path);
|
|
strlcat(path, "/", sizeof path);
|
|
strlcat(path, name, sizeof path);
|
|
|
|
error = fz_openrfile(&file, path);
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
error = pdf_parsecmap(&cmap, file);
|
|
if (error)
|
|
goto cleanup;
|
|
|
|
fz_dropstream(file);
|
|
|
|
usecmapname = cmap->usecmapname;
|
|
if (usecmapname[0])
|
|
{
|
|
pdf_logfont("usecmap %s\n", usecmapname);
|
|
error = pdf_loadsystemcmap(&usecmap, usecmapname);
|
|
if (error)
|
|
goto cleanup;
|
|
pdf_setusecmap(cmap, usecmap);
|
|
pdf_dropcmap(usecmap);
|
|
}
|
|
|
|
pdf_logfont("}\n");
|
|
|
|
*cmapp = cmap;
|
|
return nil;
|
|
|
|
cleanup:
|
|
if (cmap)
|
|
pdf_dropcmap(cmap);
|
|
if (file)
|
|
fz_dropstream(file);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Create an Identity-* CMap (for both 1 and 2-byte encodings)
|
|
*/
|
|
fz_error *
|
|
pdf_newidentitycmap(pdf_cmap **cmapp, int wmode, int bytes)
|
|
{
|
|
fz_error *error;
|
|
pdf_cmap *cmap;
|
|
|
|
error = pdf_newcmap(&cmap);
|
|
if (error)
|
|
return error;
|
|
|
|
sprintf(cmap->cmapname, "Identity-%c", wmode ? 'V' : 'H');
|
|
|
|
error = pdf_addcodespace(cmap, 0x0000, 0xffff, bytes);
|
|
if (error) {
|
|
pdf_dropcmap(cmap);
|
|
return error;
|
|
}
|
|
|
|
error = pdf_maprangetorange(cmap, 0x0000, 0xffff, 0);
|
|
if (error) {
|
|
pdf_dropcmap(cmap);
|
|
return error;
|
|
}
|
|
|
|
error = pdf_sortcmap(cmap);
|
|
if (error) {
|
|
pdf_dropcmap(cmap);
|
|
return error;
|
|
}
|
|
|
|
pdf_setwmode(cmap, wmode);
|
|
|
|
*cmapp = cmap;
|
|
return nil;
|
|
}
|
|
|