/* ** Copyright (c) 2002 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public ** License as published by the Free Software Foundation; either ** version 2 of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public ** License along with this library; if not, write to the ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to generate convert wiki text into HTML. */ #include "config.h" #include "format.h" #include #include /* for PATH_MAX */ /* ** Format a relative link for output. The idea here is to determine from context ** whether the link needs to be relative or absolute (i.e. for RSS output, e-mail ** notifications, etc). Returns the formatted string. */ char *format_link(const char* zFormat,...){ char *zLink; va_list ap; va_start(ap,zFormat); zLink = vmprintf(zFormat,ap); va_end(ap); if( g.zLinkURL && g.zLinkURL[0] ){ zLink = mprintf("%s/%z",g.zLinkURL,zLink); } return zLink; } /* ** Return the number digits at the beginning of the string z. */ int ndigit(const char *z){ int i = 0; while( isdigit(*z) ){ i++; z++; } return i; } /* ** Check to see if *z contains nothing but spaces up to the next ** newline. If so, return the number of spaces plus one for the ** newline characters. If not, return 0. ** ** If two or more blank lines occur in a row, go ahead and return ** a number of characters sufficient to cover them all. */ static int is_blank_line(const char *z){ int i = 0; int r = 0; while( isspace(z[i]) ){ if( z[i]=='\n' ){ r = i+1; } i++; } return r; } /* ** Return TRUE if *z points to the terminator for a word. Words ** are terminated by whitespace or end of input or any of the ** characters in zEnd. */ int is_eow(const char *z, const char *zEnd){ if( zEnd==0 ) zEnd = ".,:;?!)\"'"; while( *z!=0 && !isspace(*z) ){ int i; for(i=0; zEnd[i]; i++){ if( *z==zEnd[i] ) break; } if( zEnd[i]==0 ) return 0; z++; } return 1; } /* ** Check to see if *z points to the beginning of a Wiki page name. ** If it does, return the number of characters in that name. If not, ** return 0. ** ** A Wiki page name contains only alphabetic characters. The first ** letter must be capital and there must be at least one other capital ** letter in the word. And every capital leter must be followed by ** one or more lower-case letters. */ int is_wiki_name(const char *z){ int i; int nCap = 0; if( !isupper(z[0]) ) return 0; for(i=0; z[i]; i++){ if( isupper(z[i]) ){ if( !islower(z[i+1]) ) return 0; nCap++; }else if( !islower(z[i]) ){ break; } } return (nCap>=2 && is_eow(&z[i],0)) ? i : 0; } /* ** Check to see if *z points to the beginning of a file in the repository. ** If it does, return the number of characters in that name. If not, ** return 0. ** ** The filename must start with a slash and there'll have to be another slash ** somewhere inside. Spaces in filenames aren't supported. */ int is_repository_file(const char *z){ int i; int gotslash=0; if( z[0]!='/' ) return 0; for(i=1; z[i] && !is_eow(&z[i],0); i++){ if(z[i]=='/') gotslash=1; } if(!gotslash) return 0; /* see if it's in the repository. Note that we strip the leading '/' from the * query. Note that the is_eow() check means there's no ' character. */ if( !db_exists("SELECT filename FROM filechng WHERE filename='%.*s'", i-1, &z[1]) ){ return 0; } return i; } /* ** Check to see if z[] is a form that indicates the beginning of a ** bullet or enumeration list element. z[] can be of the form "*:" ** or "_:" for a bullet or "N:" for an enumeration element where N ** is any number. The colon can repeat 1 or more times. ** ** If z[] is not a list element marker, then return 0. If z[] is ** a list element marker, set *pLevel to indicate the list depth ** (the number of colons) and the type (bullet or enumeration). ** *pLevel is negative for enumerations and positive for bullets and ** the magnitude is the depth. Then return the number of characters ** in the marker (which will always be at least 2.) */ static int is_list_elem(const char *z, int *pLevel){ int type; int depth; const char *zStart = z; if( isdigit(*z) ){ z++; while( isdigit(*z) ){ z++; } type = -1; }else if( *z=='*' || *z=='_' ){ z++; type = +1; }else{ *pLevel = 0; return 0; } depth = 0; while( *z==':' ){ z++; depth++; } while( isspace(*z) && *z!='\n' ){ z++; } if( depth==0 || depth>10 || *z==0 || *z=='\n' ){ *pLevel = 0; return 0; } if( type<0 ){ *pLevel = -depth; }else{ *pLevel = depth; } return z - zStart; } /* ** If *z points to horizontal rule markup, return the number of ** characters in that markup. Otherwise return 0. ** ** Horizontal rule markup consists of four or more '-' or '=' characters ** at the beginning of a line followed by nothing but whitespace ** to the end of the line. */ static int is_horizontal_rule(const char *z){ int i; int c = z[0]; if( c!='-' && c!='=' ) return 0; for(i=0; z[i]==c; i++){} if( i<4 ) return 0; while( isspace(z[i]) && z[i]!='\n' ){ i++; } return z[i]=='\n' || z[i]==0 ? i : 0; } /* ** If *z points to a row of table markup, return the number of ** characters in that markup. Otherwise return 0. ** ** Table markup consists of a line starting with '|' and each cell ** separated by more '|' characters. The line ends with a '|' followed by ** nothing but whitespace to the end-of-line. */ static int is_table_row(const char *z){ int i; int c; if( z[0]!='|' ) return 0; for(i=1,c=z[1]; z[i] && z[i]!='\n'; i++){ if( !isspace(z[i]) ) c=z[i]; } if( c!='|' ) return 0; /* last non-whitespace char wasn't '|' */ return (z[i]=='\n' || z[i]==0) ? i : 0; } /* ** Output the table row defined by z */ static void output_table_row(const char *z, int nLen){ int i, j; char *zCopy = mprintf("%.*s",nLen,z); assert(z[0]=='|'); for(i=nLen-1; i>0 && zCopy[i]!='|'; i--){} zCopy[i+1] = 0; cgi_printf(""); for(i=j=0; zCopy[i]; i++){ if( zCopy[i]=='|' ){ if(i>j){ zCopy[i]=0; /* Each cell is wiki formatted. This _could_ be a bad thing. */ output_formatted(&zCopy[j+1],0); cgi_printf("\n"); } if( zCopy[i+1] ) cgi_printf("\t"); /* except for last '|' */ j = i; } } cgi_printf("\n"); free(zCopy); } /* ** Return the number of characters in the URL that begins ** at *z. Return 0 if *z is not the beginning of a URL. ** ** Algorithm: Advance to the first whitespace character or until ** then end of the string. Then back up over the following ** characters: .)]}?!"':;, */ int is_url(const char *z){ int i; int minlen = 6; switch( z[0] ){ case 'h': if( strncmp(z,"http:",5)==0 ) minlen = 7; else if( strncmp(z,"https:",6)==0 ) minlen = 8; else return 0; break; case 'f': if( strncmp(z,"ftp://",6)==0 ) minlen = 7; else return 0; break; case 'm': if( strncmp(z,"mailto:",7)==0 ) minlen = 10; else return 0; break; default: return 0; } for(i=0; z[i] && !isspace(z[i]); i++){} while( i>0 ){ switch( z[i-1] ){ case '.': case ')': case ']': case '}': case '?': case '!': case '"': case '\'': case ':': case ';': case ',': i--; break; default: return i>=minlen ? i : 0; } } return 0; } /* ** Return true if the given URL points to an image. An image URL is ** any URL that ends with ".gif", ".jpg", ".jpeg", or ".png" */ static int is_image(const char *zUrl, int N){ int i; char zBuf[10]; if( N<5 ) return 0; for(i=0; i<5; i++){ zBuf[i] = tolower(zUrl[N-5+i]); } zBuf[i] = 0; return strcmp(&zBuf[1],".gif")==0 || strcmp(&zBuf[1],".png")==0 || strcmp(&zBuf[1],".jpg")==0 || strcmp(&zBuf[1],".jpe")==0 || strcmp(zBuf,".jpeg")==0; } /* ** Output N characters of text from zText. */ static void put_htmlized_text(const char **pzText, int N){ if( N>0 ){ char *z = htmlize(*pzText, N); cgi_printf("%s", z); free(z); *pzText += N; } } /* ** Search ahead in text z[] looking for a font terminator consisting ** of "n" consecutive instances of character "c". The font terminator ** must be at the end of a word and it must occur before a paragraph break. ** Also, z[] must begin a new word. If any of these conditions are false, ** return false. If all conditions are meet, return true. ** ** TODO: Ignore terminators that occur inside of special markup such ** as "{quote: not-a-terminator_}" */ static int font_terminator(const char *z, int c, int n){ int seenNL = 0; int cnt = 0; if( isspace(*z) || *z==0 || *z==c ) return 0; z++; while( *z ){ if( *z==c && !isspace(z[-1]) ){ cnt++; if( cnt==n && is_eow(&z[1],0) ){ return 1; } }else{ cnt = 0; if( *z=='\n' ){ if( seenNL ) return 0; seenNL = 1; }else if( !isspace(*z) ){ seenNL = 0; } } z++; } return 0; } /* ** Return the number of asterisks at z[] and beyond. */ static int count_stars(const char *z){ int n = 0; while( *z=='*' ){ n++; z++; } return n; } /* ** The following structure is used to record information about a single ** instance of markup. Markup is text of the following form: ** ** {type: key args} ** or {type: key} ** or {type} ** ** The key is permitted to begin with "}". If args is missing, key is ** used in its place. So {type: key} is equivalent to {type: key key}. ** If key is missing, then type is used in its place. So {type} is the ** same as {type: type} which is the same as {type: type type} */ typedef struct Markup Markup; struct Markup { int lenTotal; /* Total length of the markup */ int lenType; /* Length of the "type" field */ int lenKey; /* Length of the "key" field */ int lenArgs; /* Length of the "args" field */ const char *zType; /* Pointer to the start of "type" */ const char *zKey; /* Pointer to the start of "key" */ const char *zArgs; /* Pointer to the start of "args" */ }; /* ** z[] is a string of text beginning with "{". Check to see if it is ** valid markup. If it is, fill in the pMarkup structure and return true. ** If it is not valid markup, return false. */ static int is_markup(const char *z, Markup *pMarkup){ int i, j; int nest = 1; if( *z!='{' ) return 0; for(i=1; isalpha(z[i]); i++){} if( z[i]=='}' ){ pMarkup->lenTotal = i+1; pMarkup->lenType = i-1; pMarkup->lenKey = i-1; pMarkup->lenArgs = i-1; pMarkup->zType = &z[1]; pMarkup->zKey = &z[1]; pMarkup->zArgs = &z[1]; return 1; } if( z[i]!=':' ) return 0; pMarkup->lenType = i-1; pMarkup->zType = &z[1]; i++; while( isspace(z[i]) && z[i]!='\n' ){ i++; } if( z[i]==0 || z[i]=='\n' ) return 0; j = i; pMarkup->zKey = &z[i]; while( z[i] && !isspace(z[i]) ){ if( z[i]=='}' ) nest--; if( z[i]=='{' ) nest++; if( nest==0 ) break; i++; } if( z[i]==0 || z[i]=='\n' ) return 0; pMarkup->lenKey = i - j; if( nest==0 ){ pMarkup->lenArgs = i - j; pMarkup->lenTotal = i+1; pMarkup->zArgs = pMarkup->zKey; return 1; } while( isspace(z[i]) && z[i]!='\n' ){ i++; } if( z[i]=='\n' || z[i]==0 ) return 0; j = i; while( z[i] && z[i]!='\n' ){ if( z[i]=='}' ) nest--; if( z[i]=='{' ) nest++; if( nest==0 ) break; i++; } if( z[i]!='}' || nest>0 ) return 0; pMarkup->zArgs = &z[j]; pMarkup->lenArgs = i - j; pMarkup->lenTotal = i+1; return 1; } /* ** The aList[] array records the current nesting of