diff options
Diffstat (limited to 'vendor/x11iraf/obm/ObmW/HTMLparse.c')
-rw-r--r-- | vendor/x11iraf/obm/ObmW/HTMLparse.c | 1373 |
1 files changed, 1373 insertions, 0 deletions
diff --git a/vendor/x11iraf/obm/ObmW/HTMLparse.c b/vendor/x11iraf/obm/ObmW/HTMLparse.c new file mode 100644 index 00000000..4b603c98 --- /dev/null +++ b/vendor/x11iraf/obm/ObmW/HTMLparse.c @@ -0,0 +1,1373 @@ +/**************************************************************************** + * NCSA Mosaic for the X Window System * + * Software Development Group * + * National Center for Supercomputing Applications * + * University of Illinois at Urbana-Champaign * + * 605 E. Springfield, Champaign IL 61820 * + * mosaic@ncsa.uiuc.edu * + * * + * Copyright (C) 1993, Board of Trustees of the University of Illinois * + * * + * NCSA Mosaic software, both binary and source (hereafter, Software) is * + * copyrighted by The Board of Trustees of the University of Illinois * + * (UI), and ownership remains with the UI. * + * * + * The UI grants you (hereafter, Licensee) a license to use the Software * + * for academic, research and internal business purposes only, without a * + * fee. Licensee may distribute the binary and source code (if released) * + * to third parties provided that the copyright notice and this statement * + * appears on all copies and that no charge is associated with such * + * copies. * + * * + * Licensee may make derivative works. However, if Licensee distributes * + * any derivative work based on or derived from the Software, then * + * Licensee will (1) notify NCSA regarding its distribution of the * + * derivative work, and (2) clearly notify users that such derivative * + * work is a modified version and not the original NCSA Mosaic * + * distributed by the UI. * + * * + * Any Licensee wishing to make commercial use of the Software should * + * contact the UI, c/o NCSA, to negotiate an appropriate license for such * + * commercial use. Commercial use includes (1) integration of all or * + * part of the source code into a product for sale or license by or on * + * behalf of Licensee to third parties, or (2) distribution of the binary * + * code or source code to third parties that need it to utilize a * + * commercial product sold or licensed by or on behalf of Licensee. * + * * + * UI MAKES NO REPRESENTATIONS ABOUT THE SUITABILITY OF THIS SOFTWARE FOR * + * ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED * + * WARRANTY. THE UI SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY THE * + * USERS OF THIS SOFTWARE. * + * * + * By using or copying this Software, Licensee agrees to abide by the * + * copyright law and all other applicable laws of the U.S. including, but * + * not limited to, export control laws, and the terms of this license. * + * UI shall have the right to terminate this license immediately by * + * written notice upon Licensee's breach of, or non-compliance with, any * + * of its terms. Licensee may be held legally responsible for any * + * copyright infringement that is caused or encouraged by Licensee's * + * failure to abide by the terms of this license. * + * * + * Comments and questions are welcome and can be sent to * + * mosaic-x@ncsa.uiuc.edu. * + ****************************************************************************/ + +#ifdef TIMING +#include <sys/time.h> +struct timeval Tv; +struct timezone Tz; +#endif + +#include <stdio.h> +#include <ctype.h> +#ifndef sun +/* To get atoi. */ +#include <stdlib.h> +#endif +#include "HTML.h" +#include "HTMLamp.h" + + +extern void FreeObjList(); +extern struct mark_up *AddObj(); + + +#ifdef NOT_ASCII +#define TOLOWER(x) (tolower(x)) +#else + +/* + * A hack to speed up caseless_equal. Thanks to Quincey Koziol for + * developing it for me + */ +unsigned char map_table[256]={ + 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23, + 24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44, + 45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,97,98, + 99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, + 116,117,118,119,120,121,122,91,92,93,94,95,96,97,98,99,100,101,102, + 103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119, + 120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136, + 137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153, + 154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170, + 171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187, + 188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204, + 205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221, + 222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238, + 239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255}; + +#define TOLOWER(x) (map_table[x]) +#endif /* NOT_ASCII */ + + +/* + * Check if two strings are equal, ignoring case. + * The strings must be of the same length to be equal. + * return 1 if equal, 0 otherwise. + */ +int +caseless_equal(str1, str2) + char *str1; + char *str2; +{ + if ((str1 == NULL)||(str2 == NULL)) + { + return(0); + } + + while ((*str1 != '\0')&&(*str2 != '\0')) + { + if (TOLOWER(*str1) != TOLOWER(*str2)) + { + return(0); + } + str1++; + str2++; + } + + if ((*str1 == '\0')&&(*str2 == '\0')) + { + return(1); + } + else + { + return(0); + } +} + + +/* + * Check if two strings are equal in the first count characters, ignoring case. + * The strings must both be at least of length count to be equal. + * return 1 if equal, 0 otherwise. + */ +int +caseless_equal_prefix(str1, str2, cnt) + char *str1; + char *str2; + int cnt; +{ + int i; + + if ((str1 == NULL)||(str2 == NULL)) + { + return(0); + } + + if (cnt < 1) + { + return(1); + } + + for (i=0; i < cnt; i++) + { + if (TOLOWER(*str1) != TOLOWER(*str2)) + { + return(0); + } + str1++; + str2++; + } + + return(1); +} + + +/* + * Clean up the white space in a string. + * Remove all leading and trailing whitespace, and turn all + * internal whitespace into single spaces separating words. + * The cleaning is done by rearranging the chars in the passed + * txt buffer. The resultant string will probably be shorter, + * it can never get longer. + */ +void +clean_white_space(txt) + char *txt; +{ + char *ptr; + char *start; + + start = txt; + ptr = txt; + + /* + * Remove leading white space + */ + while (isspace((int)*ptr)) + { + ptr++; + } + + /* + * find a word, copying if we removed some space already + */ + if (start == ptr) + { + while ((!isspace((int)*ptr))&&(*ptr != '\0')) + { + ptr++; + } + start = ptr; + } + else + { + while ((!isspace((int)*ptr))&&(*ptr != '\0')) + { + *start++ = *ptr++; + } + } + + while (*ptr != '\0') + { + /* + * Remove trailing whitespace. + */ + while (isspace((int)*ptr)) + { + ptr++; + } + if (*ptr == '\0') + { + break; + } + + /* + * If there are more words, insert a space and if space was + * removed move up remaining text. + */ + *start++ = ' '; + if (start == ptr) + { + while ((!isspace((int)*ptr))&&(*ptr != '\0')) + { + ptr++; + } + start = ptr; + } + else + { + while ((!isspace((int)*ptr))&&(*ptr != '\0')) + { + *start++ = *ptr++; + } + } + } + + *start = '\0'; +} + + +/* + * parse an amperstand escape, and return the appropriate character, or + * '\0' on error. + * we should really only use caseless_equal_prefix for unterminated, and use + * caseless_equal otherwise, but since there are so many escapes, and I + * don't want to type everything twice, I always use caseless_equal_prefix + * Turns out the escapes are case sensitive, use strncmp. + * termination states: + * 0: terminated with a ';' + * 1: unterminated + * 2: terminated with whitespace + */ +char +ExpandEscapes(esc, endp, termination) + char *esc; + char **endp; + int termination; +{ + int cnt; + char val; + int unterminated; + + unterminated = (termination & 0x01); + + esc++; + if (*esc == '#') + { + if (unterminated) + { + char *tptr; + char tchar; + + tptr = (char *)(esc + 1); + while (isdigit((int)*tptr)) + { + tptr++; + } + tchar = *tptr; + *tptr = '\0'; + val = (char)atoi((esc + 1)); + *tptr = tchar; + *endp = tptr; + } + else + { + val = (char)atoi((esc + 1)); + *endp = (char *)(esc + strlen(esc)); + } + } + else + { + cnt = 0; + while (AmpEscapes[cnt].tag != NULL) + { + if (strncmp(esc, AmpEscapes[cnt].tag, + strlen(AmpEscapes[cnt].tag)) == 0) + { + val = AmpEscapes[cnt].value; + *endp = (char *)(esc + + strlen(AmpEscapes[cnt].tag)); + break; + } + cnt++; + } + if (AmpEscapes[cnt].tag == NULL) + { +#ifdef VERBOSE + fprintf(stderr, "Error bad & string\n"); +#endif + val = '\0'; + *endp = (char *)NULL; + } + } + + return(val); +} + + +/* + * Clean the special HTML character escapes out of the text and replace + * them with the appropriate characters "<" = "<", ">" = ">", + * "&" = "&" + * GAG: apperantly < etc. can be left unterminated, what a nightmare. + * Ok, better, they have to be terminated with white-space or ';'. + * the '&' character must be immediately followed by a letter to be + * a valid escape sequence. Other &'s are left alone. + * The cleaning is done by rearranging chars in the passed txt buffer. + * if any escapes are replaced, the string becomes shorter. + */ +void +clean_text(txt) + char *txt; +{ + int unterminated; + int space_terminated; + char *ptr; + char *ptr2; + char *start; + char *text; + char *tend; + char tchar; + char val; + + if (txt == NULL) + { + return; + } + + /* + * Quick scan to find escape sequences. + * Escape is '&' followed by a letter (or a hash mark). + * return if there are none. + */ + ptr = txt; + while (*ptr != '\0') + { + if ((*ptr == '&')&& + ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#'))) + { + break; + } + ptr++; + } + if (*ptr == '\0') + { + return; + } + + /* + * Loop, replaceing escape sequences, and moving up remaining + * text. + */ + ptr2 = ptr; + while (*ptr != '\0') + { + + unterminated = 0; + space_terminated = 0; + /* + * Extract the escape sequence from start to ptr + */ + start = ptr; + while ((*ptr != ';')&&(!isspace((int)*ptr))&&(*ptr != '\0')) + { + ptr++; + } + if (*ptr == '\0') + { +#ifdef VERBOSE + fprintf(stderr, "warning: unterminated & (%s)\n", + start); +#endif + unterminated = 1; + } + else if (isspace((int)*ptr)) + { + space_terminated = 1; + } + + /* + * Copy the escape sequence into a separate buffer. + * Then clean spaces so the "& lt ;" = "<" etc. + * The cleaning should be unnecessary. + */ + tchar = *ptr; + *ptr = '\0'; + text = (char *)malloc(strlen(start) + 1); + if (text == NULL) + { + fprintf(stderr, "Cannot malloc space for & text\n"); + *ptr = tchar; + return; + } + strcpy(text, start); + *ptr = tchar; + clean_white_space(text); + + /* + * Replace escape sequence with appropriate character + */ + val = ExpandEscapes(text, &tend, + ((space_terminated << 1) + unterminated)); + if (val != '\0') + { + if (unterminated) + { + tchar = *tend; + *tend = '\0'; + ptr = (char *)(start + strlen(text) - 1); + *tend = tchar; + } + else if (space_terminated) + { + ptr--; + } + *ptr2 = val; + unterminated = 0; + space_terminated = 0; + } + /* + * invalid escape sequence. skip it. + */ + else + { +#ifdef VERBOSE + fprintf(stderr, "Error bad & string\n"); +#endif + ptr = start; + *ptr2 = *ptr; + } + free(text); + + /* + * Copy forward remaining text until you find the next + * escape sequence + */ + ptr2++; + ptr++; + while (*ptr != '\0') + { + if ((*ptr == '&')&& + ((isalpha((int)*(ptr + 1)))||(*(ptr + 1) == '#'))) + { + break; + } + *ptr2++ = *ptr++; + } + } + *ptr2 = '\0'; +} + + +/* + * Get a block of text from a HTML document. + * All text from start to the end, or the first mark + * (a mark is '<' or '</' followed by any letter or a '!') + * is returned in a malloced buffer. Also, endp returns + * a pointer to the next '<' or '\0' + * The returned text has already expanded '&' escapes. + */ +char * +get_text(start, endp) + char *start; + char **endp; +{ + char *ptr; + char *text; + char tchar; + + if (start == NULL) + { + return(NULL); + } + + /* + * Copy text up to beginning of a mark, or the end + */ + ptr = start; + while (*ptr != '\0') + { + if (*ptr == '<') + { + if (isalpha((int)(*(ptr + 1)))) + { + break; + } + else if (*(ptr + 1) == '/') + { + if (isalpha((int)(*(ptr + 2)))) + { + break; + } + } + else if (*(ptr + 1) == '!') /* a comment */ + { + break; + } + } + ptr++; + } + *endp = ptr; + + if (ptr == start) + { + return(NULL); + } + + /* + * Copy the text into its own buffer, and clean it + * of escape sequences. + */ + tchar = *ptr; + *ptr = '\0'; + text = (char *)malloc(strlen(start) + 1); + if (text == NULL) + { + fprintf(stderr, "Cannot malloc space for text\n"); + *ptr = tchar; + return(NULL); + } + strcpy(text, start); + *ptr = tchar; + clean_text(text); + + return(text); +} + + +/* + * Get the mark text between '<' and '>'. From the text, determine + * its type, and fill in a mark_up structure to return. Also returns + * endp pointing to the ttrailing '>' in the original string. + */ +struct mark_up * +get_mark(start, endp) + char *start; + char **endp; +{ + char *ptr; + char *text; + char tchar; + struct mark_up *mark; + + if (start == NULL) + { + return(NULL); + } + + if (*start != '<') + { + return(NULL); + } + + start++; + + mark = (struct mark_up *)malloc(sizeof(struct mark_up)); + if (mark == NULL) + { + fprintf(stderr, "Cannot malloc space for mark_up struct\n"); + return(NULL); + } + + /* + * Grab the mark text + */ + ptr = start; + while ((*ptr != '>')&&(*ptr != '\0')) + { + ptr++; + } + *endp = ptr; + + if (*ptr != '>') + { +#ifdef VERBOSE + fprintf(stderr, "error: bad mark format\n"); +#endif + return(NULL); + } + + /* + * Copy the mark text to its own buffer, and + * clean it of escapes, and odd white space. + */ + tchar = *ptr; + *ptr = '\0'; + text = (char *)malloc(strlen(start) + 1); + if (text == NULL) + { + fprintf(stderr, "Cannot malloc space for mark\n"); + *ptr = tchar; + return(NULL); + } + strcpy(text, start); + *ptr = tchar; + clean_text(text); +/* + * No longer needed because the parsing code is now smarter + * + clean_white_space(text); + * + */ + + /* + * Set whether this is the start or end of a mark + * block, as well as determining its type. + */ + if (*text == '/') + { + mark->is_end = 1; + mark->type = ParseMarkType((char *)(text + 1)); + mark->start = NULL; + mark->text = NULL; + mark->end = text; + } + else + { + mark->is_end = 0; + mark->type = ParseMarkType(text); + mark->start = text; + mark->text = NULL; + mark->end = NULL; + } + mark->text = NULL; + mark->next = NULL; + + return(mark); +} + + +/* + * Special version of get_text. It reads all text up to the + * end of the plain text mark, or the end of the file. + */ +char * +get_plain_text(start, endp) + char *start; + char **endp; +{ + char *ptr; + char *text; + char tchar; + + if (start == NULL) + { + return(NULL); + } + + /* + * Read until stopped by end plain text mark. + */ + ptr = start; + while (*ptr != '\0') + { + /* + * Beginning of a mark is '<' followed by any letter, + * or followed by '!' for a comment, + * or '</' followed by any letter. + */ + if ((*ptr == '<')&& + ((isalpha((int)(*(ptr + 1))))|| + (*(ptr + 1) == '!')|| + ((*(ptr + 1) == '/')&&(isalpha((int)(*(ptr + 2))))))) + { + struct mark_up *mp; + char *ep; + + /* + * We think we found a mark. If it is the + * end of plain text, break out + */ + mp = get_mark(ptr, &ep); + if (mp != NULL) + { + if (((mp->type == M_PLAIN_TEXT)|| + (mp->type == M_LISTING_TEXT))&&(mp->is_end)) + { + if (mp->end != NULL) + { + free((char *)mp->end); + } + free((char *)mp); + break; + } + if (mp->start != NULL) + { + free((char *)mp->start); + } + if (mp->end != NULL) + { + free((char *)mp->end); + } + free((char *)mp); + } + } + ptr++; + } + *endp = ptr; + + if (ptr == start) + { + return(NULL); + } + + /* + * Copy text to its own malloced buffer, and clean it of + * HTML escapes. + */ + tchar = *ptr; + *ptr = '\0'; + text = (char *)malloc(strlen(start) + 1); + if (text == NULL) + { + fprintf(stderr, "Cannot malloc space for text\n"); + *ptr = tchar; + return(NULL); + } + strcpy(text, start); + *ptr = tchar; + clean_text(text); + + return(text); +} + + +/* + * Main parser of HTML text. Takes raw text, and produces a linked + * list of mark objects. Mark objects are either text strings, or + * starting and ending mark delimiters. + * The old list is passed in so it can be freed, and in the future we + * may want to add code to append to the old list. + */ +struct mark_up * +HTMLParse(old_list, str) + struct mark_up *old_list; + char *str; +{ + int preformat; + char *start, *end; + char *text, *tptr; + struct mark_up *mark; + struct mark_up *list; + struct mark_up *current; +#ifdef TIMING +gettimeofday(&Tv, &Tz); +fprintf(stderr, "HTMLParse enter (%d.%d)\n", Tv.tv_sec, Tv.tv_usec); +#endif + + preformat = 0; + + /* + * Free up the previous Object List if one exists + */ + FreeObjList(old_list); + + if (str == NULL) + { + return(NULL); + } + + list = NULL; + current = NULL; + + start = str; + end = str; + + mark = NULL; + while (*start != '\0') + { + /* + * Get some text (if any). If our last mark was + * a begin plain text we call different function + * If last mark was <PLAINTEXT> we lump all the rest of + * the text in. + */ + if ((mark != NULL)&&(mark->type == M_PLAIN_FILE)&& + (!mark->is_end)) + { + text = start; + end = text; + while (*end != '\0') + { + end++; + } + /* + * Copy text to its own malloced buffer, and clean it of + * HTML escapes. + */ + tptr = (char *)malloc(strlen(text) + 1); + if (tptr == NULL) + { + fprintf(stderr, + "Cannot malloc space for text\n"); + return(list); + } + strcpy(tptr, text); + text = tptr; + } + else if ((mark != NULL)&& + ((mark->type == M_PLAIN_TEXT)|| + (mark->type == M_LISTING_TEXT))&& + (!mark->is_end)) + { + text = get_plain_text(start, &end); + } + else + { + text = get_text(start, &end); + } + + /* + * If text is OK, put it into a mark structure, and add + * it to the linked list. + */ + if (text == NULL) + { + if (start != end) + { + fprintf(stderr, "error parsing text, bailing out\n"); + return(list); + } + } + else + { + mark = (struct mark_up *)malloc(sizeof(struct mark_up)); + if (mark == NULL) + { + fprintf(stderr, "Cannot malloc for mark_up struct\n"); + return(list); + } + mark->type = M_NONE; + mark->is_end = 0; + mark->start = NULL; + mark->text = text; + mark->end = NULL; + mark->next = NULL; + current = AddObj(&list, current, mark, preformat); + } + start = end; + + if (*start == '\0') + { + break; + } + + /* + * Get the next mark if any, and if it is + * valid, add it to the linked list. + */ + mark = get_mark(start, &end); + if (mark == NULL) + { + if (start != end) + { + fprintf(stderr, "error parsing mark, bailing out\n"); + return(list); + } + } + else + { + mark->next = NULL; + current = AddObj(&list, current, mark, preformat); + } + + start = (char *)(end + 1); + + if ((mark != NULL)&&(mark->type == M_PLAIN_FILE)&& + (!mark->is_end)) + { + /* + * A linefeed immediately after the <PLAINTEXT> + * mark is to be ignored. + */ + if (*start == '\n') + { + start++; + } + } + else if ((mark != NULL)&&((mark->type == M_PLAIN_TEXT)|| + (mark->type == M_LISTING_TEXT))&& + (!mark->is_end)) + { + /* + * A linefeed immediately after the <XMP> + * or <LISTING> mark is to be ignored. + */ + if (*start == '\n') + { + start++; + } + } + /* + * If we are parsing pre-formatted text we need to set a + * flag so we don't throw out needed linefeeds. + */ + else if ((mark != NULL)&&(mark->type == M_PREFORMAT)) + { + if (mark->is_end) + { + preformat = 0; + } + else + { + preformat = 1; + /* + * A linefeed immediately after the <PRE> + * mark is to be ignored. + */ + if (*start == '\n') + { + start++; + } + } + } + } +#ifdef TIMING +gettimeofday(&Tv, &Tz); +fprintf(stderr, "HTMLParse exit (%d.%d)\n", Tv.tv_sec, Tv.tv_usec); +#endif + return(list); +} + + +/* + * Determine mark type from the identifying string passed + */ +int +ParseMarkType(str) + char *str; +{ + int type; + char *tptr; + char tchar; + + if (str == NULL) + { + return(M_NONE); + } + + type = M_UNKNOWN; + tptr = str; + while (*tptr != '\0') + { + if (isspace((int)*tptr)) + { + break; + } + tptr++; + } + tchar = *tptr; + *tptr = '\0'; + + if (caseless_equal(str, MT_ANCHOR)) + { + type = M_ANCHOR; + } + else if (caseless_equal(str, MT_TITLE)) + { + type = M_TITLE; + } + else if (caseless_equal(str, MT_FIXED)) + { + type = M_FIXED; + } + else if (caseless_equal(str, MT_BOLD)) + { + type = M_BOLD; + } + else if (caseless_equal(str, MT_ITALIC)) + { + type = M_ITALIC; + } + else if (caseless_equal(str, MT_EMPHASIZED)) + { + type = M_EMPHASIZED; + } + else if (caseless_equal(str, MT_STRONG)) + { + type = M_STRONG; + } + else if (caseless_equal(str, MT_CODE)) + { + type = M_CODE; + } + else if (caseless_equal(str, MT_SAMPLE)) + { + type = M_SAMPLE; + } + else if (caseless_equal(str, MT_KEYBOARD)) + { + type = M_KEYBOARD; + } + else if (caseless_equal(str, MT_VARIABLE)) + { + type = M_VARIABLE; + } + else if (caseless_equal(str, MT_CITATION)) + { + type = M_CITATION; + } + else if (caseless_equal(str, MT_STRIKEOUT)) + { + type = M_STRIKEOUT; + } + else if (caseless_equal(str, MT_HEADER_1)) + { + type = M_HEADER_1; + } + else if (caseless_equal(str, MT_HEADER_2)) + { + type = M_HEADER_2; + } + else if (caseless_equal(str, MT_HEADER_3)) + { + type = M_HEADER_3; + } + else if (caseless_equal(str, MT_HEADER_4)) + { + type = M_HEADER_4; + } + else if (caseless_equal(str, MT_HEADER_5)) + { + type = M_HEADER_5; + } + else if (caseless_equal(str, MT_HEADER_6)) + { + type = M_HEADER_6; + } + else if (caseless_equal(str, MT_ADDRESS)) + { + type = M_ADDRESS; + } + else if (caseless_equal(str, MT_PLAIN_TEXT)) + { + type = M_PLAIN_TEXT; + } + else if (caseless_equal(str, MT_LISTING_TEXT)) + { + type = M_LISTING_TEXT; + } + else if (caseless_equal(str, MT_PLAIN_FILE)) + { + type = M_PLAIN_FILE; + } + else if (caseless_equal(str, MT_PARAGRAPH)) + { + type = M_PARAGRAPH; + } + else if (caseless_equal(str, MT_UNUM_LIST)) + { + type = M_UNUM_LIST; + } + else if (caseless_equal(str, MT_NUM_LIST)) + { + type = M_NUM_LIST; + } + else if (caseless_equal(str, MT_MENU)) + { + type = M_MENU; + } + else if (caseless_equal(str, MT_DIRECTORY)) + { + type = M_DIRECTORY; + } + else if (caseless_equal(str, MT_LIST_ITEM)) + { + type = M_LIST_ITEM; + } + else if (caseless_equal(str, MT_DESC_LIST)) + { + type = M_DESC_LIST; + } + else if (caseless_equal(str, MT_DESC_TITLE)) + { + type = M_DESC_TITLE; + } + else if (caseless_equal(str, MT_DESC_TEXT)) + { + type = M_DESC_TEXT; + } + else if (caseless_equal(str, MT_PREFORMAT)) + { + type = M_PREFORMAT; + } + else if (caseless_equal(str, MT_BLOCKQUOTE)) + { + type = M_BLOCKQUOTE; + } + else if (caseless_equal(str, MT_INDEX)) + { + type = M_INDEX; + } + else if (caseless_equal(str, MT_HRULE)) + { + type = M_HRULE; + } + else if (caseless_equal(str, MT_BASE)) + { + type = M_BASE; + } + else if (caseless_equal(str, MT_LINEBREAK)) + { + type = M_LINEBREAK; + } + else if (caseless_equal(str, MT_IMAGE)) + { + type = M_IMAGE; + } + else if (caseless_equal(str, MT_SELECT)) + { + type = M_SELECT; + } + else if (caseless_equal(str, MT_OPTION)) + { + type = M_OPTION; + } + else if (caseless_equal(str, MT_INPUT)) + { + type = M_INPUT; + } + else if (caseless_equal(str, MT_TEXTAREA)) + { + type = M_TEXTAREA; + } + else if (caseless_equal(str, MT_FORM)) + { + type = M_FORM; + } + else + { +#ifdef VERBOSE + fprintf(stderr, "warning: unknown mark (%s)\n", str); +#endif + type = M_UNKNOWN; + } + + *tptr = tchar; + return(type); +} + + +/* + * Parse a single anchor tag. ptrp is a pointer to a pointer to the + * string to be parsed. On return, the ptr should be changed to + * point to after the text we have parsed. + * On return start and end should point to the beginning, and just + * after the end of the tag's name in the original anchor string. + * Finally the function returns the tag value in a malloced buffer. + */ +char * +AnchorTag(ptrp, startp, endp) + char **ptrp; + char **startp; + char **endp; +{ + char *tag_val; + char *ptr; + char *start; + char tchar; + int quoted; + int has_value; + + quoted = 0; + + /* + * remove leading spaces, and set start + */ + ptr = *ptrp; + while (isspace((int)*ptr)) + { + ptr++; + } + *startp = ptr; + + /* + * Find and set the end of the tag + */ + while ((!isspace((int)*ptr))&&(*ptr != '=')&&(*ptr != '\0')) + { + ptr++; + } + *endp = ptr; + + if (*ptr == '\0') + { + *ptrp = ptr; + return(NULL); + } + + /* + * Move to the start of the tag value, if there is one. + * set the has_value flag. + */ + has_value = 0; + while ((isspace((int)*ptr))||(*ptr == '=')) + { + if (*ptr == '=') + { + has_value = 1; + } + ptr++; + } + + /* + * For a tag with no value, this is a boolean flag. + * Return the string "1" so we know the tag is there. + */ + if (!has_value) + { + *ptrp = *endp; + /* + * set a tag value of 1. + */ + tag_val = (char *)malloc(strlen("1") + 1); + if (tag_val == NULL) + { + fprintf(stderr, "can't malloc space for tag value\n"); + return(NULL); + } + strcpy(tag_val, "1"); + + return(tag_val); + } + + if (*ptr == '\"') + { + quoted = 1; + ptr++; + } + + start = ptr; + /* + * Get tag value. Either a quoted string or a single word + */ + if (quoted) + { + while ((*ptr != '\"')&&(*ptr != '\0')) + { + ptr++; + } + } + else + { + while ((!isspace((int)*ptr))&&(*ptr != '\0')) + { + ptr++; + } + } + if ((quoted)&&(*ptr == '\0')) + { + *ptrp = ptr; + return(NULL); + } + + /* + * Copy the tag value out into a malloced string + */ + tchar = *ptr; + *ptr = '\0'; + tag_val = (char *)malloc(strlen(start) + 1); + if (tag_val == NULL) + { + fprintf(stderr, "can't malloc space for tag value\n"); + *ptr = tchar; + *ptrp = ptr; + return(NULL); + } + strcpy(tag_val, start); + *ptr = tchar; + if (quoted) + { + ptr++; + } + *ptrp = ptr; + + return(tag_val); +} + + +/* + * Parse mark text for the value associated with the + * passed mark tag. + * If the passed tag is not found, return NULL. + * If the passed tag is found but has no value, return "". + */ +char * +ParseMarkTag(text, mtext, mtag) + char *text; + char *mtext; + char *mtag; +{ + char *ptr; + char *start; + char *end; + char *tag_val; + char tchar; + + if ((text == NULL)||(mtext == NULL)||(mtag == NULL)) + { + return(NULL); + } + + ptr = (char *)(text + strlen(mtext)); + + while (*ptr != '\0') + { + tag_val = AnchorTag(&ptr, &start, &end); + + tchar = *end; + *end = '\0'; + if (caseless_equal(start, mtag)) + { + *end = tchar; + if (tag_val == NULL) + { + tag_val = (char *)malloc(1); + *tag_val = '\0'; + return(tag_val); + } + else + { + return(tag_val); + } + } + *end = tchar; + if (tag_val != NULL) + { + free(tag_val); + } + } + return(NULL); +} + |