/* ** Copyright (c) 2002 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public ** License as published by the Free Software Foundation; either ** version 2 of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public ** License along with this library; if not, write to the ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains C functions and procedures that provide useful ** services to CGI programs. There are procedures for parsing and ** dispensing QUERY_STRING parameters and cookies, the "mprintf()" ** formatting function and its cousins, and routines to encode and ** decode strings in HTML or HTTP. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cgi.h" #if INTERFACE /* ** Shortcuts for cgi_parameter. P("x") returns the value of query parameter ** or cookie "x", or NULL if there is no such parameter or cookie. PD("x","y") ** does the same except "y" is returned in place of NULL if there is not match. */ #define P(x) cgi_parameter((x),0) #define PD(x,y) cgi_parameter((x),(y)) #define QP(x) quotable_string(cgi_parameter((x),0)) #define QPD(x,y) quotable_string(cgi_parameter((x),(y))) #endif /* INTERFACE */ /* ** Provide a reliable implementation of a caseless string comparison ** function. */ #define stricmp sqlite3StrICmp extern int sqlite3StrICmp(const char*, const char*); /* ** The body of the HTTP reply text is stored here. */ static int nAllocTxt = 0; /* Amount of space allocated for HTTP reply text */ static int nUsedTxt = 0; /* Amount of space actually used */ static char *zTxt = 0; /* Pointer to malloced space */ /* ** Append reply content to what already exists. Return a pointer ** to the start of the appended text. */ const char *cgi_append_content(const char *zData, int nAmt){ if( nUsedTxt+nAmt >= nAllocTxt ){ nAllocTxt = nUsedTxt*2 + nAmt + 1000; zTxt = realloc( zTxt, nAllocTxt ); if( zTxt==0 ) exit(1); } memcpy(&zTxt[nUsedTxt], zData, nAmt); nUsedTxt += nAmt; return &zTxt[nUsedTxt-nAmt]; } /* ** Reset the HTTP reply text to be an empty string. */ void cgi_reset_content(void){ nUsedTxt = 0; g.zLinkURL = 0; } /* ** Return a pointer to the HTTP reply text. The text is reset */ char *cgi_extract_content(int *pnAmt){ char *z; *pnAmt = nUsedTxt; if( zTxt ){ zTxt[nUsedTxt] = 0; } z = zTxt; zTxt = 0; nAllocTxt = 0; nUsedTxt = 0; return z; } /* ** Additional information used to form the HTTP reply */ static char *zContentType = "text/html"; /* Content type of the reply */ static char *zReplyStatus = "OK"; /* Reply status description */ static int iReplyStatus = 200; /* Reply status code */ static char *zExtraHeader = 0; /* Extra header text */ static int fullHttpReply = 0; /* True for a full-blown HTTP header */ static const char *zLogFile = 0; /* Name of the log file */ static const char *zLogArg = 0; /* Argument to the log message */ /* ** Set the reply content type */ void cgi_set_content_type(const char *zType){ zContentType = mprintf("%s", zType); } /* ** Set the reply status code */ void cgi_set_status(int iStat, const char *zStat){ zReplyStatus = mprintf("%s", zStat); iReplyStatus = iStat; } /* ** Append text to the header of an HTTP reply */ void cgi_append_header(const char *zLine){ if( zExtraHeader ){ zExtraHeader = mprintf("%z%s", zExtraHeader, zLine); }else{ zExtraHeader = mprintf("%s", zLine); } } /* ** Set a cookie. ** ** Zero lifetime implies a session cookie. */ void cgi_set_cookie( const char *zName, /* Name of the cookie */ const char *zValue, /* Value of the cookie. Automatically escaped */ const char *zPath, /* Path cookie applies to. NULL means "/" */ int lifetime /* Expiration of the cookie in seconds */ ){ char *zCookie; if( zPath==0 ) zPath = "/"; if( lifetime>0 ){ lifetime += (int)time(0); zCookie = mprintf("Set-Cookie: %s=%t; Path=%s; expires=%s; Version=1\r\n", zName, zValue, zPath, cgi_rfc822_datestamp(lifetime)); }else{ zCookie = mprintf("Set-Cookie: %s=%t; Path=%s; Version=1\r\n", zName, zValue, zPath); } cgi_append_header(zCookie); free(zCookie); } /* ** This routine sets up the name of a logfile and an argument to the ** log message. The log message is written when cgi_reply() is invoked. */ void cgi_logfile(const char *zFile, const char *zArg){ if( zFile ) zLogFile = zFile; zLogArg = zArg; } static char *cgi_add_etag(char *zTxt, int nLen){ MD5Context ctx; unsigned char digest[16]; int i, j; char zETag[64]; MD5Init(&ctx); MD5Update(&ctx,zTxt,nLen); MD5Final(digest,&ctx); for(j=i=0; i<16; i++,j+=2){ bprintf(&zETag[j],sizeof(zETag)-j,"%02x",(int)digest[i]); } cgi_append_header( mprintf("ETag: %s\r\n", zETag) ); return strdup(zETag); } /* ** Do some cache control stuff. First, we generate an ETag and include it in ** the response headers. Second, we do whatever is necessary to determine if ** the request was asking about caching and whether we need to send back the ** response body. If we shouldn't send a body, return non-zero. ** ** Currently, we just check the ETag against any If-None-Match header. ** ** FIXME: In some cases (attachments, file contents) we could check ** If-Modified-Since headers and always include Last-Modified in responses. */ static int check_cache_control(void){ /* FIXME: there's some gotchas wth cookies and some headers. */ char *zETag = cgi_add_etag(zTxt,nUsedTxt); char *zMatch = getenv("HTTP_IF_NONE_MATCH"); if( zETag!=0 && zMatch!=0 ) { char *zBuf = strdup(zMatch); if( zBuf!=0 ){ char *zTok = 0; char *zPos; for( zTok = strtok_r(zBuf, ",\"",&zPos); zTok && strcasecmp(zTok,zETag); zTok = strtok_r(0, ",\"",&zPos)){} free(zBuf); if(zTok) return 1; } } return 0; } /* ** Do a normal HTTP reply */ void cgi_reply(void){ FILE *log; if( iReplyStatus<=0 ){ iReplyStatus = 200; zReplyStatus = "OK"; } if( iReplyStatus==200 && check_cache_control() ) { /* change the status to "unchanged" and we can skip sending the ** actual response body. Obviously we only do this when we _have_ a ** body (code 200). */ iReplyStatus = 304; zReplyStatus = "Not Modified"; } if( fullHttpReply ){ printf("HTTP/1.0 %d %s\r\n", iReplyStatus, zReplyStatus); printf("Date: %s\r\n", cgi_rfc822_datestamp(time(0))); printf("Connection: close\r\n"); }else{ printf("Status: %d %s\r\n", iReplyStatus, zReplyStatus); } if( zExtraHeader ){ printf("%s", zExtraHeader); } if( g.isConst ){ /* constant means that the input URL will _never_ generate anything ** else. In the case of attachments, the contents won't change because ** an attempt to change them generates a new attachment number. In the ** case of most /getfile calls for specific versions, the only way the ** content changes is if someone breaks the SCM. And if that happens, a ** stale cache is the least of the problem. So we provide an Expires ** header set to a reasonable period (default: one week). */ time_t expires = time(0) + atoi(db_config("constant_expires","604800")); printf( "Expires: %s\r\n", cgi_rfc822_datestamp(expires)); } if( g.isAnon ){ printf("Cache-control: public\r\n"); }else{ /* Content intended for logged in users should only be cached in ** the browser, not some shared location. */ printf("Cache-control: private\r\n"); } #if CVSTRAC_I18N printf( "Content-Type: %s; charset=%s\r\n", zContentType, nl_langinfo(CODESET)); #else printf( "Content-Type: %s; charset=ISO-8859-1\r\n", zContentType); #endif if( iReplyStatus != 304 ) { printf( "Content-Length: %d\r\n", nUsedTxt ); } printf("\r\n"); if( zTxt && iReplyStatus != 304 ){ fwrite(zTxt, 1, nUsedTxt, stdout); } if( zLogFile && (log = fopen(zLogFile,"a"))!=0 ){ time_t now; struct tm *pTm; const char *zPath; const char *zAddr; struct tms sTms; double rScale; char zDate[200]; if( zLogArg==0 ) zLogArg = "*"; zPath = getenv("REQUEST_URI"); if( zPath==0 ) zPath = "/"; zAddr = getenv("REMOTE_ADDR"); if( zAddr==0 ) zAddr = "*"; time(&now); pTm = localtime(&now); strftime(zDate, sizeof(zDate), "%Y-%m-%d %H:%M:%S", pTm); fprintf(log, "%s %s %s %d %s", zDate, zAddr, zPath, iReplyStatus,zLogArg); times(&sTms); rScale = 1.0/(double)sysconf(_SC_CLK_TCK); fprintf(log, " %g %g %g %g\n", rScale*sTms.tms_utime, rScale*sTms.tms_stime, rScale*sTms.tms_cutime, rScale*sTms.tms_cstime); fclose(log); } } static int is_same_page(const char *zPage1, const char *zPage2){ size_t s1 = strcspn(zPage1,"?#"); size_t s2 = strcspn(zPage2,"?#"); if( s1 != s2 ) return 0; return !strncmp(zPage1,zPage2,s1); } /* ** Do a redirect request to the URL given in the argument. ** ** The URL might be relative to the current document. If so, ** this routine has to translate the URL into an absolute ** before formatting the reply. */ void cgi_redirect(const char *zURL){ char *zLocation; if( strncmp(zURL,"http:",5)==0 || strncmp(zURL,"https:",6)==0 || *zURL=='/' ){ /* An absolute URL. Do nothing */ }else{ int i, j, k=0; char *zDest; char *zCur = getenv("REQUEST_URI"); if( zCur==0 ) zCur = ""; for(i=0; zCur[i] && zCur[i]!='?' && zCur[i]!='#'; i++){} if( g.zExtra ){ /* Skip to start of extra stuff, then pass over any /'s that might ** have separated the document root from the extra stuff. This ** ensures that the redirection actually redirects the root, not ** something deep down at the bottom of a URL. */ i -= strlen(g.zExtra); while( i>0 && zCur[i-1]=='/' ){ i--; } } while( i>0 && zCur[i-1]!='/' ){ i--; } zDest = mprintf("%.*s/%s", i, zCur, zURL); /* don't touch the protocol stuff, if it exists */ if( !strncmp(zDest,"http://",7) ){ k = 7; }else if( !strncmp(zDest,"https://",8) ){ k = 8; } /* strip out constructs like .., /./, //, etc */ for(i=j=k; zDest[i]; i++){ if( zDest[i]=='/' ){ if( zDest[i+1]=='.' ){ if( zDest[i+2]=='/' ){ i += 2; continue; } if( zDest[i+2]=='.' && zDest[i+3]=='/' ){ if( j==0 ){ i += 3; continue; } j--; while( j>0 && zDest[j-1]!='/' ){ j--; } continue; } } if( zDest[i+1]=='/' ) continue; } zDest[j++] = zDest[i]; } zDest[j] = 0; zURL = zDest; /* see if we've got a cycle by matching everything up to the ? or # ** in the new and old URLs. */ if( is_same_page(zDest,zCur) ){ cgi_reset_content(); cgi_printf("\n

Cyclic redirection in %s

\n\n", zURL); cgi_set_status(500, "Internal Server Error"); cgi_reply(); exit(0); } } /* ** The lynx browser complains if the "http:" prefix is missing ** from a redirect. But if we use it, we lose the ability to ** run on a secure server using stunnel. ** ** Relative redirects are used by default. This works with stunnel. ** Lynx sends us a nasty message, but it still works. So with ** relative redirects everybody works. With absolute redirects, ** stunnel will not work. So the default configuration is to go ** with what works for everybody, even if it happens to be technically ** incorrect. */ #ifdef ABSOLUTE_REDIRECT { char *zHost; if( strncmp(zURL,"http:",5)!=0 && strncmp(zURL,"https:",6)!=0 && (zHost = getenv("HTTP_HOST"))!=0 ){ char *zMode = getenv("HTTPS"); if( zMode && strcmp(zMode,"on")==0 ){ zURL = mprintf("https://%s%s", zHost, zURL); }else{ zURL = mprintf("http://%s%s", zHost, zURL); } } } #endif zLocation = mprintf("Location: %s\r\n", zURL); cgi_append_header(zLocation); cgi_reset_content(); cgi_printf("\n

Redirect to %h

\n\n", zURL); cgi_set_status(302, "Moved Temporarily"); free(zLocation); cgi_reply(); exit(0); } /* ** Information about all query parameters and cookies are stored ** in these variables. */ static int nAllocQP = 0; /* Space allocated for aParamQP[] */ static int nUsedQP = 0; /* Space actually used in aParamQP[] */ static int sortQP = 0; /* True if aParamQP[] needs sorting */ static struct QParam { /* One entry for each query parameter or cookie */ char *zName; /* Parameter or cookie name */ char *zValue; /* Value of the query parameter or cookie */ } *aParamQP; /* An array of all parameters and cookies */ /* ** Add another query parameter or cookie to the parameter set. ** zName is the name of the query parameter or cookie and zValue ** is its fully decoded value. ** ** zName and zValue are not copied and must not change or be ** deallocated after this routine returns. */ static void cgi_set_parameter_nocopy(char *zName, char *zValue){ if( nAllocQP<=nUsedQP ){ nAllocQP = nAllocQP*2 + 10; aParamQP = realloc( aParamQP, nAllocQP*sizeof(aParamQP[0]) ); if( aParamQP==0 ) exit(1); } aParamQP[nUsedQP].zName = zName; aParamQP[nUsedQP].zValue = zValue; nUsedQP++; sortQP = 1; } /* ** Add another query parameter or cookie to the parameter set. ** zName is the name of the query parameter or cookie and zValue ** is its fully decoded value. ** ** Copies are made of both the zName and zValue parameters. */ void cgi_set_parameter(const char *zName, const char *zValue){ cgi_set_parameter_nocopy(mprintf("%s",zName), mprintf("%s",zValue)); } /* ** Add a list of query parameters or cookies to the parameter set. ** ** Each parameter is of the form NAME=VALUE. Both the NAME and the ** VALUE may be url-encoded ("+" for space, "%HH" for other special ** characters). But this routine assumes that NAME contains no ** special character and therefore does not decode it. ** ** Parameters are separated by the "terminator" character. Whitespace ** before the NAME is ignored. ** ** The input string "z" is modified but no copies is made. "z" ** should not be deallocated or changed again after this routine ** returns or it will corrupt the parameter table. */ static void add_param_list(char *z, int terminator){ while( *z ){ char *zName; char *zValue; while( isspace(*z) ){ z++; } zName = z; while( *z && *z!='=' && *z!=terminator ){ z++; } if( *z=='=' ){ *z = 0; z++; zValue = z; while( *z && *z!=terminator ){ z++; } if( *z ){ *z = 0; z++; } dehttpize(zValue); cgi_set_parameter_nocopy(zName, zValue); }else{ if( *z ){ *z++ = 0; } cgi_set_parameter_nocopy(zName, ""); } } } /* ** *pz is a string that consists of multiple lines of text. This ** routine finds the end of the current line of text and converts ** the "\n" or "\r\n" that ends that line into a "\000". It then ** advances *pz to the beginning of the next line and returns the ** previous value of *pz (which is the start of the current line.) */ static char *get_line_from_string(char **pz, int *pLen){ char *z = *pz; int i; if( z[0]==0 ) return 0; for(i=0; z[i]; i++){ if( z[i]=='\n' ){ if( i>0 && z[i-1]=='\r' ){ z[i-1] = 0; }else{ z[i] = 0; } i++; break; } } *pz = &z[i]; *pLen -= i; return z; } /* ** The input *pz points to content that is terminated by a "\r\n" ** followed by the boundry marker zBoundry. An extra "--" may or ** may not be appended to the boundry marker. There are *pLen characters ** in *pz. ** ** This routine adds a "\000" to the end of the content (overwriting ** the "\r\n" and returns a pointer to the content. The *pz input ** is adjusted to point to the first line following the boundry. ** The length of the content is stored in *pnContent. */ static char *get_bounded_content( char **pz, /* Content taken from here */ int *pLen, /* Number of bytes of data in (*pz)[] */ char *zBoundry, /* Boundry text marking the end of content */ int *pnContent /* Write the size of the content here */ ){ char *z = *pz; int len = *pLen; int i; int nBoundry = strlen(zBoundry); *pnContent = len; for(i=0; i0 && z[i-1]=='\r' ) i--; z[i] = 0; *pnContent = i; i += nBoundry; break; } } *pz = &z[i]; get_line_from_string(pz, pLen); return z; } /* ** Tokenize a line of text into as many as nArg tokens. Make ** azArg[] point to the start of each token. ** ** Tokens consist of space or semi-colon delimited words or ** strings inside double-quotes. Example: ** ** content-disposition: form-data; name="fn"; filename="index.html" ** ** The line above is tokenized as follows: ** ** azArg[0] = "content-disposition:" ** azArg[1] = "form-data" ** azArg[2] = "name=" ** azArg[3] = "fn" ** azArg[4] = "filename=" ** azArg[5] = "index.html" ** azArg[6] = 0; ** ** '\000' characters are inserted in z[] at the end of each token. ** This routine returns the total number of tokens on the line, 6 ** in the example above. */ static int tokenize_line(char *z, int mxArg, char **azArg){ int i = 0; while( *z ){ while( isspace(*z) || *z==';' ){ z++; } if( *z=='"' && z[1] ){ *z = 0; z++; if( i0 && zType && (strcmp(zType,"application/x-www-form-urlencoded")==0 || strncmp(zType,"multipart/form-data",19)==0) ){ z = malloc( len+1 ); if( z==0 ) exit(1); len = fread(z, 1, len, stdin); z[len] = 0; if( zType[0]=='a' ){ add_param_list(z, '&'); }else{ process_multipart_form_data(z, len); } } z = getenv("HTTP_COOKIE"); if( z ){ z = mprintf("%s",z); add_param_list(z, ';'); } } /* ** This is the comparison function used to sort the aParamQP[] array of ** query parameters and cookies. */ static int qparam_compare(const void *a, const void *b){ struct QParam *pA = (struct QParam*)a; struct QParam *pB = (struct QParam*)b; return strcmp(pA->zName, pB->zName); } /* ** Return the value of a query parameter or cookie whose name is zName. ** If there is no query parameter or cookie named zName, then return ** zDefault instead. */ const char *cgi_parameter(const char *zName, const char *zDefault){ int lo, hi, mid, c; if( nUsedQP<=0 ) return zDefault; if( sortQP ){ qsort(aParamQP, nUsedQP, sizeof(aParamQP[0]), qparam_compare); sortQP = 0; } lo = 0; hi = nUsedQP-1; while( lo<=hi ){ mid = (lo+hi)/2; c = strcmp(aParamQP[mid].zName, zName); if( c==0 ){ return aParamQP[mid].zValue; }else if( c>0 ){ hi = mid-1; }else{ lo = mid+1; } } return zDefault; } /* ** Return true if any of the query parameters in the argument ** list are defined. */ int cgi_any(const char *z, ...){ va_list ap; char *z2; if( cgi_parameter(z,0)!=0 ) return 1; va_start(ap, z); while( (z2 = va_arg(ap, char*))!=0 ){ if( cgi_parameter(z2,0)!=0 ) return 1; } va_end(ap); return 0; } /* ** Return true if all of the query parameters in the argument list ** are defined. */ int cgi_all(const char *z, ...){ va_list ap; char *z2; if( cgi_parameter(z,0)==0 ) return 0; va_start(ap, z); while( (z2 = va_arg(ap, char*))==0 ){ if( cgi_parameter(z2,0)==0 ) return 0; } va_end(ap); return 1; } /* ** Print all query parameters on standard output. Format the ** parameters as HTML. This is used for testing and debugging. */ void cgi_print_all(void){ int i; cgi_parameter("",""); /* For the parameters into sorted order */ for(i=0; i\n", htmlize(aParamQP[i].zName, -1), htmlize(aParamQP[i].zValue, -1)); } } /* ** Write HTML text for an option menu to standard output. zParam ** is the query parameter that the option menu sets. zDflt is the ** initial value of the option menu. Additional arguments are name/value ** pairs that define values on the menu. The list is terminated with ** a single NULL argument. */ void cgi_optionmenu(int in, const char *zP, const char *zD, ...){ va_list ap; char *zName, *zVal; int dfltSeen = 0; cgi_printf("%*s\n", in, ""); } /* ** This routine works a lot like cgi_optionmenu() except that the list of ** values is contained in an array. Also, the values are just values, not ** name/value pairs as in cgi_optionmenu. */ void cgi_v_optionmenu( int in, /* Indent by this amount */ const char *zP, /* The query parameter name */ const char *zD, /* Default value */ const char **az /* NULL-terminated list of allowed values */ ){ const char *zVal; int i; cgi_printf("%*s\n", in, ""); } /* ** This routine works a lot like cgi_v_optionmenu() except that the list ** is a list of pairs. The first element of each pair is the value used ** internally and the second element is the value displayed to the user. */ void cgi_v_optionmenu2( int in, /* Indent by this amount */ const char *zP, /* The query parameter name */ const char *zD, /* Default value */ const char **az /* NULL-terminated list of allowed values */ ){ const char *zVal; int i; cgi_printf("%*s\n", in, ""); } /* ** This routine should never be called directly. Use wrapper functions below. ** Generates HTML input element to be used in forms. ** Parameters are explained below inline. If any param is 0 that ** attribute/feature will not be used. zValue is required, except for text ** fields. zName is also required except for submit, reset and button. */ void cgi_input_elem( int nType, /* 1:submit, 2:reset, 3:button, 4:file, 5:hidden, ** 6:checkbox, 7:radio, 8:password, 9:text */ const char *zName, /* CGI param name */ const char *zId, /* HTML element id */ const char *zClass, /* CSS class to apply */ char nAccessKey, /* Access key to assign */ int nTabIndex, /* Element's tab index */ int nSize, /* Used only for text fields */ int nMaxLen, /* Used only for text fields */ int nLabelOnLeft, /* If set, put label text left of element */ const char *zValue, /* Element's value */ const char *zDflt, /* If same as zValue, "select" this element */ const char *zLabel /* Label text. No HTML escaping is done on it */ ){ /* Buttons and hidden fields can't have label */ int bHasLabel = ( nType>4 && zLabel && zLabel[0] ); assert( nType > 0 ); assert( nType <= 9 ); if( zValue==0 || zValue[0]==0 ) return; if( nType<1 && nType>3 && (!zName || !zName[0]) ) return; if( bHasLabel ){ /* Make sure we have some valid id because