--- buffer.c.orig +++ buffer.c @@ -1,9 +1,9 @@ -/*********************/ -/* buffer.c */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* buffer.c */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/***********************/ /* This is ANSI C code (C89). */ @@ -18,6 +18,7 @@ #include #include +#include #undef NULL #define NULL ((void *) 0) @@ -60,7 +61,7 @@ blk = malloc(sizeof (block)); items = malloc(maxhere * itemsize); if (!buf || !blk || !items) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto nberror; } @@ -127,7 +128,7 @@ new = malloc(sizeof (block)); items = malloc(maxhere * itemsize); if (!new || !items) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto aierror; } blk->next = new; @@ -174,7 +175,7 @@ r = malloc(n * itemsize); if (!r) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); return NULL; } --- buffer.h.orig +++ buffer.h @@ -1,9 +1,9 @@ -/*********************/ -/* buffer.h */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* buffer.h */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/***********************/ /* This is ANSI C code (C89). */ --- charset.c.orig +++ charset.c @@ -1,9 +1,11 @@ -/*********************/ -/* charset.c */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* charset.c */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -22,6 +24,8 @@ #include #include #include +#include +#include #undef NULL #define NULL ((void *) 0) @@ -39,8 +43,8 @@ typedef unsigned char csflag_t; struct charset { - char *inlist; /* Characters in inlist are in the set. */ - char *outlist; /* Characters in outlist are not in the set. */ + wchar_t *inlist; /* Characters in inlist are in the set. */ + wchar_t *outlist; /* Characters in outlist are not in the set. */ /* inlist and outlist must have no common characters. */ /* inlist and outlist may be NULL, which acts like "". */ csflag_t flags; /* Characters in neither list are in the set if they */ @@ -56,25 +60,25 @@ CS_NUL = 8; /* Includes the NUL character. */ -static int appearsin(char c, const char *str) +static int appearsin(wchar_t c, const wchar_t *str) /* Returns 0 if c is '\0' or str is NULL or c */ /* does not appear in *str. Otherwise returns 1. */ { - return c && str && strchr(str,c); + return c && str && wcschr(str,c); } -static int hexdigtoint(char c) +static int hexdigtoint(wchar_t c) /* Returns the value represented by the hexadecimal */ /* digit c, or -1 if c is not a hexadecimal digit. */ { - const char *p, * const hexdigits = "0123456789ABCDEFabcdef"; + const wchar_t *p, * const hexdigits = L"0123456789ABCDEFabcdef"; int n; if (!c) return -1; - p = strchr(hexdigits, *(unsigned char *)&c); + p = wcschr(hexdigits, c); if (!p) return -1; n = p - hexdigits; if (n >= 16) n -= 6; @@ -91,35 +95,46 @@ { charset *cset = NULL; buffer *cbuf = NULL; - const char *p, * const singleescapes = "_sbqQx"; + const wchar_t *p, * const singleescapes = L"_sbqQx"; int hex1, hex2; - char ch; + wchar_t ch; + wchar_t *wstr; + wstr = (wchar_t *) malloc((strlen(str) + 1) * sizeof(wchar_t)); + if (!wstr) { + wcscpy(errmsg,outofmem); + goto pcserror; + } + if ((size_t)(-1) == mbstowcs(wstr, str, strlen(str) + 1)) { + wcscpy(errmsg,mbserror); + goto pcserror; + } cset = malloc(sizeof (charset)); if (!cset) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto pcserror; } cset->inlist = cset->outlist = NULL; cset->flags = 0; - cbuf = newbuffer(sizeof (char), errmsg); + cbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto pcserror; - for (p = str; *p; ++p) - if (*p == '_') { + for (p = wstr; *p; ++p) + if (*p == L'_') { ++p; if (appearsin(*p, singleescapes)) { - if (*p == '_') ch = '_' ; - else if (*p == 's') ch = ' ' ; - else if (*p == 'b') ch = '\\'; - else if (*p == 'q') ch = '\''; - else if (*p == 'Q') ch = '\"'; + if (*p == L'_') ch = L'_' ; + else if (*p == L's') ch = L' ' ; + else if (*p == L'b') ch = L'\\'; + else if (*p == L'q') ch = L'\''; + else if (*p == L'Q') ch = L'\"'; else /* *p == 'x' */ { + /* FIXME _x metacharacter should allow wide characters input.*/ hex1 = hexdigtoint(p[1]); hex2 = hexdigtoint(p[2]); if (hex1 < 0 || hex2 < 0) goto pcsbadstr; - *(unsigned char *)&ch = 16 * hex1 + hex2; + ch = 16 * hex1 + hex2; p += 2; } if (!ch) @@ -130,14 +145,14 @@ } } else { - if (*p == 'A') cset->flags |= CS_UCASE; - else if (*p == 'a') cset->flags |= CS_LCASE; - else if (*p == '0') cset->flags |= CS_DIGIT; + if (*p == L'A') cset->flags |= CS_UCASE; + else if (*p == L'a') cset->flags |= CS_LCASE; + else if (*p == L'0') cset->flags |= CS_DIGIT; else goto pcsbadstr; } } else { - additem(cbuf,p,errmsg); + additem(cbuf, p,errmsg); if (*errmsg) goto pcserror; } ch = '\0'; @@ -149,11 +164,12 @@ pcscleanup: if (cbuf) freebuffer(cbuf); + if (wstr) free(wstr); return cset; pcsbadstr: - sprintf(errmsg, "Bad charset syntax: %.*s\n", errmsg_size - 22, str); + swprintf(errmsg, errmsg_size, L"Bad charset syntax: %.*s\n", errmsg_size - 22, str); pcserror: @@ -171,14 +187,14 @@ } -int csmember(char c, const charset *cset) +int csmember(wchar_t c, const charset *cset) { return appearsin(c, cset->inlist) || ( !appearsin(c, cset->outlist) && - ( (cset->flags & CS_LCASE && islower(*(unsigned char *)&c)) || - (cset->flags & CS_UCASE && isupper(*(unsigned char *)&c)) || - (cset->flags & CS_DIGIT && isdigit(*(unsigned char *)&c)) || + ( (cset->flags & CS_LCASE && iswlower(*(wint_t *)&c)) || + (cset->flags & CS_UCASE && iswupper(*(wint_t *)&c)) || + (cset->flags & CS_DIGIT && iswdigit(*(wint_t *)&c)) || (cset->flags & CS_NUL && !c ) ) ); } @@ -191,16 +207,16 @@ { charset *csu; buffer *inbuf = NULL, *outbuf = NULL; - char *lists[4], **list, *p, nullchar = '\0'; + wchar_t *lists[4], **list, *p, nullchar = L'\0'; csu = malloc(sizeof (charset)); if (!csu) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto csuderror; } - inbuf = newbuffer(sizeof (char), errmsg); + inbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto csuderror; - outbuf = newbuffer(sizeof (char), errmsg); + outbuf = newbuffer(sizeof (wchar_t), errmsg); if (*errmsg) goto csuderror; csu->inlist = csu->outlist = NULL; csu->flags = u ? cset1->flags | cset2->flags --- charset.h.orig +++ charset.h @@ -1,9 +1,11 @@ -/*********************/ -/* charset.h */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* charset.h */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -11,7 +13,7 @@ /* Note: Those functions declared here which do not use errmsg */ /* always succeed, provided that they are passed valid arguments. */ - +#include #include "errmsg.h" @@ -30,7 +32,7 @@ /* *cset. cset may not be used after this call. */ -int csmember(char c, const charset *cset); +int csmember(wchar_t c, const charset *cset); /* csmember(c,cset) returns 1 if c is a member of *cset, 0 otherwise. */ --- errmsg.c.orig +++ errmsg.c @@ -1,9 +1,11 @@ -/*********************/ -/* errmsg.c */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* errmsg.c */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -11,8 +13,11 @@ #include "errmsg.h" /* Makes sure we're consistent with the declarations. */ -const char * const outofmem = - "Out of memory.\n"; +const wchar_t * const outofmem = + L"Out of memory.\n"; -const char * const impossibility = - "Impossibility #%d has occurred. Please report it.\n"; +const wchar_t * const mbserror = + L"Error in input multibyte string.\n"; + +const wchar_t * const impossibility = + L"Impossibility #%d has occurred. Please report it.\n"; --- errmsg.h.orig +++ errmsg.h @@ -1,9 +1,11 @@ -/*********************/ -/* errmsg.h */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* errmsg.h */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -11,7 +13,7 @@ #ifndef ERRMSG_H #define ERRMSG_H - +#include #define errmsg_size 163 /* This is the maximum number of characters that will */ @@ -20,7 +22,7 @@ /* versions of this header file. */ -typedef char errmsg_t[errmsg_size]; +typedef wchar_t errmsg_t[errmsg_size]; /* Any function which takes the argument errmsg_t errmsg must, before */ /* returning, either set errmsg[0] to '\0' (indicating success), or */ @@ -28,10 +30,13 @@ /* being careful not to overrun the space. */ -extern const char * const outofmem; +extern const wchar_t * const outofmem; /* "Out of memory.\n" */ -extern const char * const impossibility; +extern const wchar_t * const mbserror; + /* "Error in input multibyte string.\n" */ + +extern const wchar_t * const impossibility; /* "Impossibility #%d has occurred. Please report it.\n" */ --- par.1.orig +++ par.1 @@ -1,6 +1,6 @@ .\"********************* .\"* par.1 * -.\"* for Par 1.52 * +.\"* for Par 1.52 i18n * .\"* Copyright 2001 by * .\"* Adam M. Costello * .\"********************* --- par.c.orig +++ par.c @@ -1,9 +1,11 @@ -/*********************/ -/* par.c */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* par.c */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -17,6 +19,7 @@ #include #include #include +#include #undef NULL #define NULL ((void *) 0) @@ -126,7 +129,7 @@ /* line, or the fallback prelen and suflen */ /* of the IP containing a non-bodiless line. */ lflag_t flags; /* Boolean properties (see below). */ - char rc; /* The repeated character of a bodiless line. */ + wchar_t rc; /* The repeated character of a bodiless line. */ } lineprop; /* Flags for marking boolean properties: */ @@ -277,12 +280,12 @@ badarg: - sprintf(errmsg, "Bad argument: %.*s\n", errmsg_size - 16, savearg); + swprintf(errmsg, errmsg_size,L"Bad argument: %.*s\n", errmsg_size - 16, savearg); *phelp = 1; } -static char **readlines( +static wchar_t **readlines( lineprop **pprops, const charset *protectchars, const charset *quotechars, int Tab, int invis, int quote, errmsg_t errmsg ) @@ -303,8 +306,10 @@ { buffer *cbuf = NULL, *lbuf = NULL, *lpbuf = NULL; int c, empty, blank, firstline, qsonly, oldqsonly = 0, vlnlen, i; - char ch, *ln = NULL, nullchar = '\0', *nullline = NULL, *qpend, - *oldln = NULL, *oldqpend = NULL, *p, *op, *vln = NULL, **lines = NULL; + char ch, *ln = NULL, *qpend, *oldln = NULL, + *oldqpend = NULL, *p, *op; + wchar_t nullchar = L'\0'; + wchar_t *nullline = NULL, *vln = NULL, **lines = NULL; lineprop vprop = { 0, 0, 0, '\0' }, iprop = { 0, 0, 0, '\0' }; /* oldqsonly, oldln, and oldquend don't really need to be initialized. */ @@ -318,7 +323,7 @@ cbuf = newbuffer(sizeof (char), errmsg); if (*errmsg) goto rlcleanup; - lbuf = newbuffer(sizeof (char *), errmsg); + lbuf = newbuffer(sizeof (wchar_t *), errmsg); if (*errmsg) goto rlcleanup; lpbuf = newbuffer(sizeof (lineprop), errmsg); if (*errmsg) goto rlcleanup; @@ -339,7 +344,7 @@ if (quote) { for (qpend = ln; *qpend && csmember(*qpend, quotechars); ++qpend); for (p = qpend; *p == ' ' || csmember(*p, quotechars); ++p); - qsonly = *p == '\0'; + qsonly = (*p == '\0'); while (qpend > ln && qpend[-1] == ' ') --qpend; if (!firstline) { for (p = ln, op = oldln; @@ -358,13 +363,13 @@ } else { vlnlen = p - ln; - vln = malloc((vlnlen + 1) * sizeof (char)); + vln = malloc((vlnlen + 1) * sizeof (wchar_t)); if (!vln) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rlcleanup; } - strncpy(vln,ln,vlnlen); - vln[vlnlen] = '\0'; + mbstowcs(vln,ln,vlnlen + 1); + vln[vlnlen] = L'\0'; additem(lbuf, &vln, errmsg); if (*errmsg) goto rlcleanup; additem(lpbuf, invis ? &iprop : &vprop, errmsg); @@ -373,13 +378,27 @@ } } } + if (oldln != NULL) { + free(oldln); + oldln = NULL; + } oldln = ln; oldqpend = qpend; oldqsonly = qsonly; } - additem(lbuf, &ln, errmsg); + vln = malloc((strlen(ln) + 1) * sizeof (wchar_t)); + if (!vln) { + wcscpy(errmsg, outofmem); + goto rlcleanup; + } + mbstowcs(vln,ln,strlen(ln) + 1); + if (oldln == NULL) { + free(ln); + ln = NULL; + } + additem(lbuf, &vln, errmsg); if (*errmsg) goto rlcleanup; - ln = NULL; + vln = NULL; additem(lpbuf, &vprop, errmsg); if (*errmsg) goto rlcleanup; clearbuffer(cbuf); @@ -407,7 +426,8 @@ if (isspace(c) && isascii(c)) ch = ' '; else blank = 0; additem(cbuf, &ch, errmsg); - if (*errmsg) goto rlcleanup; + if (*errmsg) + goto rlcleanup; } } @@ -416,9 +436,17 @@ if (*errmsg) goto rlcleanup; ln = copyitems(cbuf,errmsg); if (*errmsg) goto rlcleanup; - additem(lbuf, &ln, errmsg); - if (*errmsg) goto rlcleanup; + vln = malloc((strlen(ln) + 1) * sizeof (wchar_t)); + if (!vln) { + wcscpy(errmsg, outofmem); + goto rlcleanup; + } + mbstowcs(vln,ln,strlen(ln) + 1); + free(ln); ln = NULL; + additem(lbuf, &vln, errmsg); + if (*errmsg) goto rlcleanup; + vln = NULL; additem(lpbuf, &vprop, errmsg); if (*errmsg) goto rlcleanup; } @@ -450,7 +478,7 @@ static void compresuflen( - const char * const *lines, const char * const *endline, + const wchar_t * const *lines, const wchar_t * const *endline, const charset *bodychars, int body, int pre, int suf, int *ppre, int *psuf ) /* lines is an array of strings, up to but not including endline. */ @@ -458,7 +486,7 @@ /* lines in lines. Assumes that they have already been determined */ /* to be at least pre and suf. endline must not equal lines. */ { - const char *start, *end, *knownstart, * const *line, *p1, *p2, *knownend, + const wchar_t *start, *end, *knownstart, * const *line, *p1, *p2, *knownend, *knownstart2; start = *lines; @@ -475,7 +503,7 @@ } if (body) for (p1 = end; p1 > knownstart; ) - if (*--p1 != ' ') { + if (*--p1 != L' ') { if (csmember(*p1, bodychars)) end = p1; else @@ -502,18 +530,18 @@ } if (body) { for (p1 = start; - start < knownend && (*start == ' ' || csmember(*start, bodychars)); + start < knownend && (*start == L' ' || csmember(*start, bodychars)); ++start); - if (start > p1 && start[-1] == ' ') --start; + if (start > p1 && start[-1] == L' ') --start; } else - while (end - start >= 2 && *start == ' ' && start[1] == ' ') ++start; + while (end - start >= 2 && *start == L' ' && start[1] == L' ') ++start; *psuf = end - start; } static void delimit( - const char * const *lines, const char * const *endline, + const wchar_t * const *lines, const wchar_t * const *endline, const charset *bodychars, int repeat, int body, int div, int pre, int suf, lineprop *props ) @@ -524,8 +552,8 @@ /* and comsuflen of the lines in lines have already been */ /* determined to be at least pre and suf, respectively. */ { - const char * const *line, *end, *p, * const *nextline; - char rc; + const wchar_t * const *line, *end, *p, * const *nextline; + wchar_t rc; lineprop *prop, *nextprop; int anybodiless = 0, status; @@ -546,8 +574,8 @@ for (end = *line; *end; ++end); end -= suf; p = *line + pre; - rc = p < end ? *p : ' '; - if (rc != ' ' && (!repeat || end - p < repeat)) + rc = p < end ? *p : L' '; + if (rc != L' ' && (!repeat || end - p < repeat)) prop->flags &= ~L_BODILESS; else while (p < end) { @@ -590,9 +618,9 @@ } line = lines, prop = props; - status = ((*lines)[pre] == ' '); + status = ((*lines)[pre] == L' '); do { - if (((*line)[pre] == ' ') == status) + if (((*line)[pre] == L' ') == status) prop->flags |= L_FIRST; ++line, ++prop; } while (line < endline); @@ -600,14 +628,14 @@ static void marksuperf( - const char * const * lines, const char * const * endline, lineprop *props + const wchar_t * const * lines, const wchar_t * const * endline, lineprop *props ) /* lines points to the first line of a segment, and endline to one */ /* line beyond the last line in the segment. Sets L_SUPERF bits in */ /* the flags fields of the props array whenever the corresponding */ /* line is superfluous. L_BODILESS bits must already be set. */ { - const char * const *line, *p; + const wchar_t * const *line, *p; lineprop *prop, *mprop, dummy; int inbody, num, mnum; @@ -620,7 +648,7 @@ for (line = lines, prop = props; line < endline; ++line, ++prop) if (isvacant(prop)) { for (num = 0, p = *line; *p; ++p) - if (*p != ' ') ++num; + if (*p != L' ') ++num; if (inbody || num < mnum) mnum = num, mprop = prop; inbody = 0; @@ -632,7 +660,7 @@ static void setaffixes( - const char * const *inlines, const char * const *endline, + const wchar_t * const *inlines, const wchar_t * const *endline, const lineprop *props, const charset *bodychars, const charset *quotechars, int hang, int body, int quote, int *pafp, int *pfs, int *pprefix, int *psuffix @@ -645,7 +673,7 @@ /* default value as specified in "par.doc". */ { int numin, pre, suf; - const char *p; + const wchar_t *p; numin = endline - inlines; @@ -667,11 +695,11 @@ } -static void freelines(char **lines) +static void freelines(wchar_t **lines) /* Frees the elements of lines, and lines itself. */ /* lines is a NULL-terminated array of strings. */ { - char **line; + wchar_t **line; for (line = lines; *line; ++line) free(*line); @@ -679,18 +707,21 @@ free(lines); } - int main(int argc, const char * const *argv) { int help = 0, version = 0, hang = 0, prefix = -1, repeat = 0, suffix = -1, Tab = 1, width = 72, body = 0, cap = 0, div = 0, Err = 0, expel = 0, fit = 0, guess = 0, invis = 0, just = 0, last = 0, quote = 0, Report = 0, touch = -1; - int prefixbak, suffixbak, c, sawnonblank, oweblank, n, i, afp, fs; + int prefixbak, suffixbak, sawnonblank, oweblank, n, i, afp, fs; charset *bodychars = NULL, *protectchars = NULL, *quotechars = NULL; - char *parinit = NULL, *arg, **inlines = NULL, **endline, **firstline, *end, - **nextline, **outlines = NULL, **line, ch; - const char *env, * const whitechars = " \f\n\r\t\v"; + char ch; + wint_t c; + char *arg, *parinit = NULL; + wchar_t *end, **nextline, **inlines = NULL, **endline, **firstline, + **outlines = NULL, **line; + const char *env; + const char * const whitechars = " \f\n\r\t\v"; errmsg_t errmsg = { '\0' }; lineprop *props = NULL, *firstprop, *nextprop; FILE *errout; @@ -729,7 +760,7 @@ if (env) { parinit = malloc((strlen(env) + 1) * sizeof (char)); if (!parinit) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto parcleanup; } strcpy(parinit,env); @@ -757,7 +788,7 @@ } if (Tab == 0) { - strcpy(errmsg, " must not be 0.\n"); + wcscpy(errmsg, L" must not be 0.\n"); goto parcleanup; } @@ -765,8 +796,7 @@ prefixbak = prefix; suffixbak = suffix; -/* Main loop: */ - + /* Main loop: */ for (sawnonblank = oweblank = 0; ; ) { for (;;) { c = getchar(); @@ -779,7 +809,7 @@ if (csmember(ch, protectchars)) { sawnonblank = 1; if (oweblank) { - puts(""); + fputwc('\n', stdout); oweblank = 0; } while (ch != '\n') { @@ -798,8 +828,7 @@ inlines = readlines(&props, protectchars, quotechars, Tab, invis, quote, errmsg); if (*errmsg) goto parcleanup; - - for (endline = inlines; *endline; ++endline); + for (endline = inlines; *endline; ++endline) ; if (endline == inlines) { free(inlines); inlines = NULL; @@ -808,19 +837,20 @@ sawnonblank = 1; if (oweblank) { - puts(""); + fputwc('\n', stdout); oweblank = 0; } - delimit((const char * const *) inlines, - (const char * const *) endline, + delimit((const wchar_t * const *) inlines, + (const wchar_t * const *) endline, bodychars, repeat, body, div, 0, 0, props); if (expel) - marksuperf((const char * const *) inlines, - (const char * const *) endline, props); + marksuperf((const wchar_t * const *) inlines, + (const wchar_t * const *) endline, props); firstline = inlines, firstprop = props; + do { if (isbodiless(firstprop)) { if (!isinvis(firstprop) && !(expel && issuperf(firstprop))) { @@ -828,18 +858,18 @@ if (!repeat || (firstprop->rc == ' ' && !firstprop->s)) { while (end > *firstline && end[-1] == ' ') --end; *end = '\0'; - puts(*firstline); + fwprintf(stdout, L"%ls\n", *firstline); } else { n = width - firstprop->p - firstprop->s; if (n < 0) { - sprintf(errmsg,impossibility,5); + swprintf(errmsg,errmsg_size,impossibility,5); goto parcleanup; } - printf("%.*s", firstprop->p, *firstline); + fwprintf(stdout, L"%.*ls", firstprop->p, *firstline); for (i = n; i; --i) - putchar(*(unsigned char *)&firstprop->rc); - puts(end - firstprop->s); + fputwc(firstprop->rc, stdout); + fwprintf(stdout, L"%ls\n", end - firstprop->s); } } ++firstline, ++firstprop; @@ -851,26 +881,24 @@ ++nextline, ++nextprop); prefix = prefixbak, suffix = suffixbak; - setaffixes((const char * const *) firstline, - (const char * const *) nextline, firstprop, bodychars, + setaffixes((const wchar_t * const *) firstline, + (const wchar_t * const *) nextline, firstprop, bodychars, quotechars, hang, body, quote, &afp, &fs, &prefix, &suffix); if (width <= prefix + suffix) { - sprintf(errmsg, - " (%d) <= (%d) + (%d)\n", + swprintf(errmsg,errmsg_size, + L" (%d) <= (%d) + (%d)\n", width, prefix, suffix); goto parcleanup; } outlines = - reformat((const char * const *) firstline, - (const char * const *) nextline, + reformat((const wchar_t * const *) firstline, + (const wchar_t * const *) nextline, afp, fs, hang, prefix, suffix, width, cap, fit, guess, just, last, Report, touch, errmsg); if (*errmsg) goto parcleanup; - for (line = outlines; *line; ++line) - puts(*line); - + fwprintf(stdout, L"%ls\n", *line); freelines(outlines); outlines = NULL; @@ -895,8 +923,12 @@ if (outlines) freelines(outlines); errout = Err ? stderr : stdout; - if (*errmsg) fprintf(errout, "par error:\n%.*s", errmsg_size, errmsg); - if (version) fputs("par 1.52\n",errout); + if (*errmsg) fwprintf(errout, L"par error:\n%.*ls", errmsg_size, errmsg); +#ifdef NOWIDTH + if (version) fputws(L"par 1.52-i18n.3 (without wcwidth() support)\n",errout); +#else + if (version) fputws(L"par 1.52-i18n.3\n",errout); +#endif if (help) fputs(usagemsg,errout); return *errmsg ? EXIT_FAILURE : EXIT_SUCCESS; --- par.doc.orig +++ par.doc @@ -1,6 +1,6 @@ ********************* * par.doc * - * for Par 1.52 * + * for Par 1.52 i18n * * Copyright 2001 by * * Adam M. Costello * ********************* --- reformat.c.orig +++ reformat.c @@ -1,9 +1,11 @@ -/*********************/ -/* reformat.c */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* reformat.c */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ @@ -16,6 +18,7 @@ #include #include #include +#include #undef NULL #define NULL ((void *) 0) @@ -33,14 +36,15 @@ typedef unsigned char wflag_t; typedef struct word { - const char *chrs; /* Pointer to the characters in the word */ + const wchar_t *chrs; /* Pointer to the characters in the word */ /* (NOT terminated by '\0'). */ struct word *prev, /* Pointer to previous word. */ *next, /* Pointer to next word. */ /* Supposing this word were the first... */ *nextline; /* Pointer to first word in next line. */ int score, /* Value of the objective function. */ - length; /* Length of this word. */ + length, /* Length (in widechar) of this word. */ + width; /* Visual width of this word. */ wflag_t flags; /* Notable properties of this word. */ } word; @@ -57,17 +61,36 @@ #define iscurious(w) (((w)->flags & 2) != 0) #define iscapital(w) (((w)->flags & 4) != 0) +static int getWidth(const wchar_t *beg, const wchar_t *end) +/* Compute (visual) width of a word. This function is aware */ +/* about double-width characters used in oriental langages. */ +{ + int ret, tmp; + + for (ret = 0; beg != end; beg++) { +#ifdef NOWIDTH + tmp = 1; +#else + tmp = wcwidth(*beg); +#endif + if (tmp < 0) + tmp = 0; + ret += tmp; + } + + return ret; +} static int checkcapital(word *w) /* Returns 1 if *w is capitalized according to the definition */ /* in par.doc (assuming is 0), or 0 if not. */ { - const char *p, *end; + const wchar_t *p, *end; for (p = w->chrs, end = p + w->length; - p < end && !isalnum(*(unsigned char *)p); + p < end && !iswalnum(*p); ++p); - return p < end && !islower(*(unsigned char *)p); + return p < end && !iswlower(*p); } @@ -75,19 +98,19 @@ /* Returns 1 if *w is curious according to */ /* the definition in par.doc, or 0 if not. */ { - const char *start, *p; - char ch; + const wchar_t *start, *p; + wchar_t ch; for (start = w->chrs, p = start + w->length; p > start; --p) { ch = p[-1]; - if (isalnum(*(unsigned char *)&ch)) return 0; - if (ch == '.' || ch == '?' || ch == '!' || ch == ':') break; + if (iswalnum(*(wchar_t *)&ch)) return 0; + if (ch == L'.' || ch == L'?' || ch == L'!' || ch == L':') break; } if (p <= start + 1) return 0; --p; - do if (isalnum(*(unsigned char *)--p)) return 1; + do if (iswalnum(*(wchar_t *)--p)) return 1; while (p > start); return 0; @@ -95,31 +118,32 @@ static int simplebreaks(word *head, word *tail, int L, int last) - -/* Chooses line breaks in a list of words which maximize the length of the */ -/* shortest line. L is the maximum line length. The last line counts as a */ -/* line only if last is non-zero. _head must point to a dummy word, and tail */ -/* must point to the last word, whose next field must be NULL. Returns the */ -/* length of the shortest line on success, -1 if there is a word of length */ -/* greater than L, or L if there are no lines. */ +/* Chooses line breaks in a list of words which maximize */ +/* the length of the shortest line. L is the maximum line */ +/* length. The last line counts as a line only if last is */ +/* non-zero. _head must point to a dummy word, and tail */ +/* must point to the last word, whose next field must be */ +/* NULL. Returns the length of the shortest line on */ +/* success, -1 if there is a word of length greater than L, */ +/* or L if there are no lines. */ { word *w1, *w2; int linelen, score; if (!head->next) return L; - for (w1 = tail, linelen = w1->length; + for (w1 = tail, linelen = w1->width; w1 != head && linelen <= L; - linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->length) { + linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->width) { w1->score = last ? linelen : L; w1->nextline = NULL; } for ( ; w1 != head; w1 = w1->prev) { w1->score = -1; - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; linelen <= L; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next) { score = w2->score; if (linelen < score) score = linelen; if (score >= w1->score) { @@ -168,7 +192,7 @@ shortest = simplebreaks(head,tail,target,last); if (shortest < 0) { - sprintf(errmsg,impossibility,1); + swprintf(errmsg,errmsg_size,impossibility,1); return; } @@ -178,9 +202,9 @@ w1 = tail; do { w1->score = -1; - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; linelen <= target; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next) { extra = target - linelen; minlen = shortest; if (w2) @@ -202,7 +226,7 @@ } while (w1 != head); if (head->next->score < 0) - sprintf(errmsg,impossibility,2); + swprintf(errmsg,errmsg_size,impossibility,2); } @@ -225,9 +249,9 @@ w1 = tail; do { w1->score = L; - for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + for (numgaps = 0, extra = L - w1->width, w2 = w1->next; extra >= 0; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next) { gap = numgaps ? (extra + numgaps - 1) / numgaps : L; if (w2) score = w2->score; @@ -247,7 +271,7 @@ maxgap = head->next->score; if (maxgap >= L) { - strcpy(errmsg, "Cannot justify.\n"); + wcscpy(errmsg, L"Cannot justify.\n"); return; } @@ -257,9 +281,9 @@ w1 = tail; do { w1->score = -1; - for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + for (numgaps = 0, extra = L - w1->width, w2 = w1->next; extra >= 0; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next) { gap = numgaps ? (extra + numgaps - 1) / numgaps : L; if (w2) score = w2->score; @@ -288,40 +312,39 @@ } while (w1 != head); if (head->next->score < 0) - sprintf(errmsg,impossibility,3); + swprintf(errmsg,errmsg_size,impossibility,3); } -char **reformat( - const char * const *inlines, const char * const *endline, int afp, int fs, +wchar_t **reformat( + const wchar_t * const *inlines, const wchar_t * const *endline, int afp, int fs, int hang, int prefix, int suffix, int width, int cap, int fit, int guess, int just, int last, int Report, int touch, errmsg_t errmsg ) { int numin, affix, L, onfirstword = 1, linelen, numout, numgaps, extra, phase; - const char * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; - char *q1, *q2, **outlines = NULL; + const wchar_t * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; + wchar_t *q1, *q2, **outlines = NULL; word dummy, *head, *tail, *w1, *w2; buffer *pbuf = NULL; /* Initialization: */ - *errmsg = '\0'; dummy.next = dummy.prev = NULL; dummy.flags = 0; head = tail = &dummy; numin = endline - inlines; if (numin <= 0) { - sprintf(errmsg,impossibility,4); + swprintf(errmsg,errmsg_size,impossibility,4); goto rfcleanup; } numgaps = extra = 0; /* unnecessary, but quiets compiler warnings */ /* Allocate space for pointers to the suffixes: */ - suffixes = malloc(numin * sizeof (const char *)); + suffixes = malloc(numin * sizeof (const wchar_t *)); if (!suffixes) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } @@ -334,8 +357,8 @@ do { for (end = *line; *end; ++end); if (end - *line < affix) { - sprintf(errmsg, - "Line %d shorter than + = %d + %d = %d\n", + swprintf(errmsg,errmsg_size, + L"Line %d shorter than + = %d + %d = %d\n", (int)(line - inlines + 1), prefix, suffix, affix); goto rfcleanup; } @@ -343,17 +366,17 @@ *suf = end; p1 = *line + prefix; for (;;) { - while (p1 < end && *p1 == ' ') ++p1; + while (p1 < end && *p1 == L' ') ++p1; if (p1 == end) break; p2 = p1; if (onfirstword) { p1 = *line + prefix; onfirstword = 0; } - while (p2 < end && *p2 != ' ') ++p2; + while (p2 < end && *p2 != L' ') ++p2; w1 = malloc(sizeof (word)); if (!w1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } w1->next = NULL; @@ -361,6 +384,7 @@ tail = tail->next = w1; w1->chrs = p1; w1->length = p2 - p1; + w1->width = getWidth(p1, p2); w1->flags = 0; p1 = p2; } @@ -377,6 +401,7 @@ if (iscurious(w1)) { if (w1->chrs[w1->length] && w1->chrs + w1->length + 1 == w2->chrs) { w2->length += w1->length + 1; + w2->width += w1->width + 1; w2->chrs = w1->chrs; w2->prev = w1->prev; w2->prev->next = w2; @@ -397,20 +422,20 @@ if (Report) for (w2 = head->next; w2; w2 = w2->next) { - if (w2->length > L) { - linelen = w2->length; + if (w2->width > L) { + linelen = w2->width; if (linelen > errmsg_size - 17) linelen = errmsg_size - 17; - sprintf(errmsg, "Word too long: %.*s\n", linelen, w2->chrs); + swprintf(errmsg,errmsg_size, L"Word too long: %.*ls\n", linelen, w2->chrs); goto rfcleanup; } } else for (w2 = head->next; w2; w2 = w2->next) - while (w2->length > L) { + while (w2->width > L) { w1 = malloc(sizeof (word)); if (!w1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } w1->next = w2; @@ -420,7 +445,9 @@ w1->chrs = w2->chrs; w2->chrs += L; w1->length = L; + w1->width = getWidth(w1->chrs, w1->chrs + L); w2->length -= L; + w2->width -= w1->width; w1->flags = 0; if (iscapital(w2)) { w1->flags |= W_CAPITAL; @@ -444,9 +471,9 @@ L = 0; w1 = head->next; while (w1) { - for (linelen = w1->length, w2 = w1->next; + for (linelen = w1->width, w2 = w1->next; w2 != w1->nextline; - linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next); + linelen += 1 + isshifted(w2) + w2->width, w2 = w2->next); if (linelen > L) L = linelen; w1 = w2; } @@ -454,67 +481,67 @@ /* Construct the lines: */ - pbuf = newbuffer(sizeof (char *), errmsg); + pbuf = newbuffer(sizeof (wchar_t *), errmsg); if (*errmsg) goto rfcleanup; numout = 0; w1 = head->next; while (numout < hang || w1) { if (w1) - for (w2 = w1->next, numgaps = 0, extra = L - w1->length; + for (w2 = w1->next, numgaps = 0, extra = L - w1->width; w2 != w1->nextline; - ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next); + ++numgaps, extra -= 1 + isshifted(w2) + w2->width, w2 = w2->next); linelen = suffix || (just && (w2 || last)) ? L + affix : w1 ? prefix + L - extra : prefix; - q1 = malloc((linelen + 1) * sizeof (char)); + q1 = malloc((linelen + 1) * sizeof (wchar_t)); if (!q1) { - strcpy(errmsg,outofmem); + wcscpy(errmsg,outofmem); goto rfcleanup; } additem(pbuf, &q1, errmsg); if (*errmsg) goto rfcleanup; ++numout; q2 = q1 + prefix; - if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix); - else if (numin > hang ) memcpy(q1, endline[-1], prefix); + if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix * sizeof(wchar_t)); + else if (numin > hang ) memcpy(q1, endline[-1], prefix * sizeof(wchar_t)); else { if (afp > prefix) afp = prefix; - memcpy(q1, endline[-1], afp); + memcpy(q1, endline[-1], afp * sizeof(wchar_t)); q1 += afp; - while (q1 < q2) *q1++ = ' '; + while (q1 < q2) *q1++ = L' '; } q1 = q2; if (w1) { phase = numgaps / 2; for (w2 = w1; ; ) { - memcpy(q1, w2->chrs, w2->length); + memcpy(q1, w2->chrs, w2->length * sizeof(wchar_t)); q1 += w2->length; w2 = w2->next; if (w2 == w1->nextline) break; - *q1++ = ' '; + *q1++ = L' '; if (just && (w1->nextline || last)) { phase += extra; while (phase >= numgaps) { - *q1++ = ' '; + *q1++ = L' '; phase -= numgaps; } } - if (isshifted(w2)) *q1++ = ' '; + if (isshifted(w2)) *q1++ = L' '; } } q2 += linelen - affix; - while (q1 < q2) *q1++ = ' '; + while (q1 < q2) *q1++ = L' '; q2 = q1 + suffix; - if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix); - else if (numin > hang ) memcpy(q1, suffixes[numin - 1], suffix); + if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix * sizeof(wchar_t)); + else if (numin > hang ) memcpy(q1, suffixes[numin - 1], suffix * sizeof(wchar_t)); else { if (fs > suffix) fs = suffix; - memcpy(q1, suffixes[numin - 1], fs); + memcpy(q1, suffixes[numin - 1], fs * sizeof(wchar_t)); q1 += fs; - while(q1 < q2) *q1++ = ' '; + while(q1 < q2) *q1++ = L' '; } - *q2 = '\0'; + *q2 = L'\0'; if (w1) w1 = w1->nextline; } @@ -543,5 +570,6 @@ freebuffer(pbuf); } + return outlines; } --- reformat.h.orig +++ reformat.h @@ -1,18 +1,20 @@ -/*********************/ -/* reformat.h */ -/* for Par 1.52 */ -/* Copyright 2001 by */ -/* Adam M. Costello */ -/*********************/ +/***********************/ +/* reformat.h */ +/* for Par 1.52-i18n.3 */ +/* Copyright 2001 by */ +/* Adam M. Costello */ +/* Modified by */ +/* Jérôme Pouiller */ +/***********************/ /* This is ANSI C code (C89). */ #include "errmsg.h" +#include - -char **reformat( - const char * const *inlines, const char * const *endline, int afp, int fs, +wchar_t **reformat( + const wchar_t * const *inlines, const wchar_t * const *endline, int afp, int fs, int hang, int prefix, int suffix, int width, int cap, int fit, int guess, int just, int last, int Report, int touch, errmsg_t errmsg ); --- releasenotes.orig +++ releasenotes @@ -1,12 +1,23 @@ - ********************* - * releasenotes * - * for Par 1.52 * - * Copyright 2001 by * - * Adam M. Costello * - ********************* + *********************** + * releasenotes * + * for Par 1.52-i18n.3 * + * Copyright 2001 by * + * Adam M. Costello * + * Modified by * + * Jérôme Pouiller * + *********************** Each entry below describes changes since the previous version. +Par 1.52-i18n.3 released 2006-Oct-03 + Fix bug with option 'g' + +Par 1.52-i18n.2 released 2006-Aug-03 + Fix bug debian #310495. + +Par 1.52-i18n.1 released 2006-Jun-22 + Changed char in wchar_t. Allow support of multibytes characters. + Added support for double-width characters. Par 1.52 released 2001-Apr-29 Fixed a portability problem regarding unsigned char versus char.