diff -r -u ./Makefile Makefile --- ./Makefile 2005-01-14 18:27:00.000000000 +0100 +++ Makefile 2006-06-29 13:05:26.000000000 +0200 @@ -1,4 +1,4 @@ -VERSION= 1.4.4 +VERSION= 1.4.8 CHAR_SIZE=16 diff -r -u ./ctype16.h ctype16.h --- ./ctype16.h 2004-03-17 16:53:21.000000000 +0100 +++ ctype16.h 2005-11-04 15:53:06.000000000 +0100 @@ -1,7 +1,7 @@ #ifndef CTYPE16_H #define CTYPE16_H -#ifndef FOR_LT +#ifndef STD_API #define STD_API #endif diff -r -u ./dtd.c dtd.c --- ./dtd.c 2004-11-02 19:20:20.000000000 +0100 +++ dtd.c 2006-05-30 17:11:06.000000000 +0200 @@ -469,6 +469,7 @@ e->xml_space_attribute = 0; e->xml_lang_attribute = 0; e->xml_id_attribute = 0; + e->xml_base_attribute = 0; e->notation_attribute = 0; e->cached_nsdef = 0; e->is_externally_declared = 0; @@ -635,6 +636,7 @@ static Char xml_space[] = {'x','m','l',':','s','p','a','c','e',0}; static Char xml_lang[] = {'x','m','l',':','l','a','n','g',0}; static Char xml_id[] = {'x','m','l',':','i','d',0}; + static Char xml_base[] = {'x','m','l',':','b','a','s','e',0}; static Char xmlns[] = {'x','m','l','n','s',0}; Char *t; @@ -695,6 +697,8 @@ element->xml_lang_attribute = a; else if(Strcmp(name, xml_id) == 0) element->xml_id_attribute = a; + else if(Strcmp(name, xml_base) == 0) + element->xml_base_attribute = a; a->cached_nsdef = 0; diff -r -u ./dtd.h dtd.h --- ./dtd.h 2004-11-02 18:46:07.000000000 +0100 +++ dtd.h 2006-05-30 17:09:37.000000000 +0200 @@ -147,6 +147,7 @@ AttributeDefinition xml_space_attribute; /* xml:space attribute, if it has one */ AttributeDefinition xml_lang_attribute; /* xml:lang attribute, if it has one */ AttributeDefinition xml_id_attribute; /* xml:id attribute, if it has one */ + AttributeDefinition xml_base_attribute; /* xml:base attribute, if it has one */ AttributeDefinition notation_attribute; /* NOTATION attribute, if it has one */ NSElementDefinition cached_nsdef; const Char *prefix, *local; diff -r -u ./input.c input.c --- ./input.c 2005-01-14 17:10:00.000000000 +0100 +++ input.c 2005-10-05 13:44:46.000000000 +0200 @@ -740,7 +740,10 @@ /* There are never more characters than bytes in the input */ if(s->line_alloc < s->line_length + (s->insize - s->nextin)) { - s->line_alloc = s->line_length + (s->insize - s->nextin); + if(s->line_alloc == 0) + s->line_alloc = 1024; + while(s->line_alloc < s->line_length + (s->insize - s->nextin)) + s->line_alloc *= 2; s->line = Realloc(s->line, s->line_alloc * sizeof(Char)); } diff -r -u ./rxp.1 rxp.1 --- ./rxp.1 2004-12-14 14:29:03.000000000 +0100 +++ rxp.1 2006-03-28 13:17:34.000000000 +0200 @@ -171,7 +171,8 @@ set the initial mode for catalog processing; the default is \f3system\f1. If the variable -.B RXPURL is set, it is used as the URL of the document to parse. This may +.B RXPURL +is set, it is used as the URL of the document to parse. This may be useful in CGI scripts and the like to avoid shell parsing of a user-supplied argument. diff -r -u ./rxp.c rxp.c --- ./rxp.c 2005-01-14 17:57:49.000000000 +0100 +++ rxp.c 2006-06-12 16:14:59.000000000 +0200 @@ -37,7 +37,7 @@ void print_ns_attrs(NamespaceBinding ns, int count); void print_namespaces(NamespaceBinding ns); void print_attrs(ElementDefinition e, Attribute a); -void print_text(Char *text); +void print_text(Char *text, int is_attr); int printable(int c); void print_special(int c); void print_text_bit(Char *text); @@ -51,7 +51,7 @@ attr_defaults = 0, merge = 0, strict_xml = 0, tree = 0, validate = 0, xml_space = 0, namespaces = 0, simple_error = 0, experiment = 0, read_dtd = 0, unicode_check = 0, xml_id = 0; -enum {o_unspec, o_none, o_bits, o_plain, o_can1, o_can2, o_can3, o_infoset, o_diff} output_format = o_unspec; +enum {o_unspec, o_none, o_bits, o_plain, o_can1, o_can2, o_can3, o_infoset, o_diff, o_diff2} output_format = o_unspec; char *enc_name = 0, *base_uri = 0, *my_dtd_name, *my_dtd_sysid = 0; CharacterEncoding encoding = CE_unknown; InputSource source = 0; @@ -140,6 +140,9 @@ case 'd': output_format = o_diff; break; + case 'D': + output_format = o_diff2; + break; default: fprintf(stderr, "bad output format %s\n", argv[i]); return 1; @@ -181,7 +184,7 @@ } time_limit = atoi(argv[i]); #else - fprintf("-R not supported on this system\n"); + fprintf(stderr,"-R not supported on this system\n"); return 1; #endif break; @@ -423,7 +426,7 @@ return 1; } } - else if(strict_xml) + else if(strict_xml || canonical_output) encoding = CE_UTF_8; else encoding = source->entity->encoding; @@ -688,37 +691,58 @@ print_ns_attrs(bit->ns_dict, bit->nsc); if(bit->type == XBIT_start) Printf(">"); - else if(canonical_output) + else if(canonical_output && output_format < o_diff) + Printf(">", bit->element_definition->name); + else if(output_format == o_diff) Printf(">", bit->element_definition->name); + else if(output_format > o_diff) + Printf(">\n", bit->element_definition->name); else Printf("/>"); + if(output_format == o_diff2) + Printf("\n"); break; case XBIT_end: Printf("", bit->element_definition->name); + if(output_format == o_diff2) + Printf("\n"); break; case XBIT_pi: Printf("pi_name, bit->pi_chars, nsgml ? ">" : "?>"); - if(p->state <= PS_prolog2 && !canonical_output) + if((p->state <= PS_prolog2 && !canonical_output) || + output_format == o_diff2) Printf("\n"); break; case XBIT_cdsect: if(canonical_output) /* Print CDATA sections as plain PCDATA in canonical XML */ - print_text(bit->cdsect_chars); + print_text(bit->cdsect_chars, 0); else Printf("", bit->cdsect_chars); + if(output_format == o_diff2) + Printf("\n"); break; case XBIT_pcdata: - if(output_format != o_can3 || !bit->pcdata_ignorable_whitespace) - print_text(bit->pcdata_chars); + if(output_format == o_diff2) + { + if(bit->pcdata_chars[0] == '\n') + /* we have already printed a linefeed */ + print_text(bit->pcdata_chars+1, 0); + else + print_text(bit->pcdata_chars, 0); + if(bit->pcdata_chars[Strlen(bit->pcdata_chars) - 1] != '\n') + Printf("\n"); + } + else if(output_format != o_can3 || !bit->pcdata_ignorable_whitespace) + print_text(bit->pcdata_chars, 0); break; case XBIT_comment: if(canonical_output) /* no comments in canonical XML */ break; Printf("", bit->comment_chars); - if(p->state <= PS_prolog2) + if(p->state <= PS_prolog2 || output_format == o_diff2) Printf("\n"); break; default: @@ -764,7 +788,7 @@ aa[i]->definition->local); else Printf(" %S=\"", aa[i]->definition->name); - print_text(aa[i]->value); + print_text(aa[i]->value, 1); Printf("\""); } @@ -801,7 +825,7 @@ VectorPush(dtd_bits, copy); } -void print_text(Char *text) +void print_text(Char *text, int is_attr) { Char *pc, *last; @@ -816,15 +840,18 @@ int c = *pc, type = 0; if(c == '&' || c == '<' || c == '>' || c == '"' || + c == 0x0d || (c > 127 && !printable(c))) type = 1; - else if(c == 9 || c == 10 || c == 13) + else if(c == 9 || c == 10) type = 2; - else if(c < 0x20 || (c >= 0x7f && c < 0xa0)) + else if(c < 0x20 || (c >= 0x7f && c < 0xa0) || + c == 0x85 || c == 0x2028) type = 3; if(type == 1 || - (canonical_output && output_format != o_diff && type == 2) || + (canonical_output && output_format < o_diff && type == 2) || + (is_attr && type == 2) || (xml_version > XV_1_0 && type == 3)) { if(pc > last) diff -r -u ./stdio16.c stdio16.c --- ./stdio16.c 2005-01-14 17:10:54.000000000 +0100 +++ stdio16.c 2005-10-04 17:47:23.000000000 +0200 @@ -55,6 +55,8 @@ #define BufferSize 4096 +static char8 null = 0; + typedef int ReadProc(FILE16 *file, unsigned char *buf, int max_count); typedef int WriteProc(FILE16 *file, const unsigned char *buf, int count); typedef int SeekProc(FILE16 *file, long offset, int ptrname); @@ -106,6 +108,12 @@ static int StringClose(FILE16 *file); static int StringFlush(FILE16 *file); +static int MStringRead(FILE16 *file, unsigned char *buf, int max_count); +static int MStringWrite(FILE16 *file, const unsigned char *buf, int count); +static int MStringSeek(FILE16 *file, long offset, int ptrname); +static int MStringClose(FILE16 *file); +static int MStringFlush(FILE16 *file); + #if defined(WIN32) && ! defined(__CYGWIN__) #ifdef SOCKETS_IMPLEMENTED static int WinsockRead(FILE16 *file, unsigned char *buf, int max_count); @@ -1158,8 +1166,6 @@ static int StringClose(FILE16 *file) { - static char8 null = 0; - if(file->flags & FILE16_write) ConvertASCII(&null, 1, file); /* null terminate */ @@ -1171,6 +1177,115 @@ static int StringFlush(FILE16 *file) { + if(file->flags & FILE16_write) + { + /* null terminate, but leave position unchanged */ + int save = file->handle2; + ConvertASCII(&null, 1, file); + file->handle2 = save; + } + + return 0; +} + +FILE16 *MakeStringFILE16(const char *type) +{ + FILE16 *file; + + if(!(file = MakeFILE16(type))) + return 0; + + file->read = MStringRead; + file->write = MStringWrite; + file->seek = MStringSeek; + file->close = MStringClose; + file->flush = MStringFlush; + + file->handle = 0; + file->handle2 = 0; + file->handle3 = 0; + + return file; +} + +void *StringFILE16String(FILE16 *file) +{ + return file->handle; +} + +int StringFILE16StringLength(FILE16 *file) +{ + return file->handle2; +} + +static int MStringRead(FILE16 *file, unsigned char *buf, int max_count) +{ + return 0; +} + +static int MStringWrite(FILE16 *file, const unsigned char *buf, int count) +{ + char *p; + + if(file->handle2 + count > file->handle3) + { + int newsize = (file->handle3 == 0 ? 32 : file->handle3); + + while(file->handle2 + count > newsize) + newsize *= 2; + + file->handle = Realloc(file->handle, newsize); + if(!file->handle) + return -1; + file->handle3 = newsize; + } + + p = (char *)file->handle + file->handle2; + memcpy(p, buf, count); + file->handle2 += count; + + return 0; +} + +static int MStringSeek(FILE16 *file, long offset, int ptrname) +{ + switch(ptrname) + { + case SEEK_CUR: + offset = file->handle2 + offset; + break; + case SEEK_END: + if(file->handle3 < 0) + return -1; + offset = file->handle3 + offset; + break; + } + + if(file->handle3 >= 0 && offset > file->handle3) + return -1; + + file->handle2 = offset; + + return 0; +} + +static int MStringClose(FILE16 *file) +{ + Free(file->handle); + + return 0; +} + +static int MStringFlush(FILE16 *file) +{ + if(file->flags & FILE16_write) + { + /* null terminate, but leave position unchanged */ + int save = file->handle2; + ConvertASCII(&null, 1, file); + file->handle2 = save; + } + return 0; } diff -r -u ./stdio16.h stdio16.h --- ./stdio16.h 2002-10-03 16:37:44.000000000 +0200 +++ stdio16.h 2005-10-04 17:46:45.000000000 +0200 @@ -15,6 +15,9 @@ STD_API FILE16 *MakeFILE16FromFILE(FILE *f, const char *type); STD_API FILE16 *MakeFILE16FromFD(int fd, const char *type); STD_API FILE16 *MakeFILE16FromString(void *buf, long size, const char *type); +STD_API FILE16 *MakeStringFILE16(const char *type); +STD_API void *StringFILE16String(FILE16 *file); +STD_API int StringFILE16StringLength(FILE16 *file); #ifdef WIN32 #ifdef SOCKETS_IMPLEMENTED STD_API FILE16 *MakeFILE16FromWinsock(int sock, const char *type); diff -r -u ./system.h system.h --- ./system.h 2003-05-20 02:17:27.000000000 +0200 +++ system.h 2005-11-04 15:53:06.000000000 +0100 @@ -1,11 +1,35 @@ +#if defined(WIN32) && ! defined(__GNUC__) +#undef HAVE_LONG_LONG +#else #define HAVE_LONG_LONG +#endif #define SOCKETS_IMPLEMENTED +#define EXPRT +#define IMPRT + +#if defined(WIN32) && ! defined(__CYGWIN__) && ! defined(STD_API) + #ifdef __BORLAND__ + #undef EXPRT + #undef IMPRT + #define EXPRT _export + #define IMPRT _import + #else + #undef STD_API + #ifdef _BUILD_STD + #define STD_API __declspec(dllexport) + #else + #define STD_API __declspec(dllimport) + #endif + #define XML_API STD_API + #endif +#else +#if ! defined(STD_API) #define STD_API #define XML_API -#define WIN_IMP -#define EXPRT +#endif +#endif void *Malloc(int bytes); void *Realloc(void *mem, int bytes); diff -r -u ./url.c url.c --- ./url.c 2004-06-11 13:39:17.000000000 +0200 +++ url.c 2006-05-30 18:29:22.000000000 +0200 @@ -175,6 +175,8 @@ * The parts of the URL are returned in scheme, host, port and path * if these are non-null. * Caller should free the results. + * + * XXX we probably don't handle the query component properly. */ char *url_merge(const char *url, const char *base, @@ -255,6 +257,7 @@ if(j - i == 2 && p[i+1] == '.') { strcpy(&p[i+1], p[j] ? &p[j+1] : &p[j]); + i = 0; /* start again from beginning */ continue; } @@ -559,6 +562,14 @@ *scheme = *host = *path = 0; *port = -1; + /* Check for degenerate case */ + + if(url[0] == 0) + { + *path = strdup8(""); + return; + } + /* Does it start with a scheme? */ for(p = (char *)url; *p; p++) diff -r -u ./version.c version.c --- ./version.c 2005-01-14 18:09:25.000000000 +0100 +++ version.c 2006-06-29 13:04:14.000000000 +0200 @@ -1,2 +1,2 @@ char *rxp_version_string = - "RXP 1.4.4 Copyright Richard Tobin, LTG, HCRC, University of Edinburgh"; + "RXP 1.4.8 Copyright Richard Tobin, LTG, HCRC, University of Edinburgh"; diff -r -u ./xmlparser.c xmlparser.c --- ./xmlparser.c 2004-11-03 14:47:31.000000000 +0100 +++ xmlparser.c 2005-06-17 20:14:34.000000000 +0200 @@ -1,10 +1,10 @@ -/* $Id: xmlparser.c,v 1.131 2004/11/03 13:47:31 richard Exp $ +/* $Id: xmlparser.c,v 1.133 2005/06/17 18:14:34 richard Exp $ */ #define DEBUG_FSM 0 #ifndef lint -static char vcid[] = "$Id: xmlparser.c,v 1.131 2004/11/03 13:47:31 richard Exp $"; +static char vcid[] = "$Id: xmlparser.c,v 1.133 2005/06/17 18:14:34 richard Exp $"; #endif /* lint */ /* @@ -3436,7 +3436,7 @@ e->name)); } else if(ParserGetFlag(p, Validate) && p->standalone == SDD_yes && - e->is_externally_declared) + p->state == PS_body && e->is_externally_declared) { require(validity_error(p, "Reference to externally declared entity " "\"%S\" in document declared standalone", @@ -4257,6 +4257,8 @@ element->xml_id_attribute == a && a->type != AT_id) { warn(p, "xml:id error: xml:id attribute must be declared as type ID"); + /* Fix the declaration so that we treat it as type ID */ + a->type = AT_id; } if(ParserGetFlag(p, XMLNamespaces)) { @@ -4517,6 +4519,8 @@ return 1; } +/* NB assumes we are parsing the DTD */ + static int parsing_external_subset(Parser p) { Entity e = p->source->entity;