/*
** Copyright (c) 2002 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the GNU General Public
** License as published by the Free Software Foundation; either
** version 2 of the License, or (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
** General Public License for more details.
**
** You should have received a copy of the GNU General Public
** License along with this library; if not, write to the
** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
** Boston, MA 02111-1307, USA.
**
** Author contact information:
** drh@hwaci.com
** http://www.hwaci.com/drh/
**
*******************************************************************************
**
** This file contains code used to do text searches of the database
*/
#include "config.h"
#include "search.h"
/*
** We'll use this routine in several places.
*/
extern int sqlite3StrNICmp(const char*,const char*,int);
/*
** Search for a keyword in text. Return a matching score:
**
** 0 No sign of the word was found in the text
** 6 The word was found but not on a word boundry
** 8 The word was found with different capitalization
** 10 The word was found in the text exactly as given.
*/
static int score_word(const char *zWord, const char *zText, int n, int *pIdx){
int c1, c2, i, best;
int idx = -1;
best = 0;
c1 = zWord[0];
if( isupper(c1) ){
c2 = tolower(c1);
}else{
c2 = toupper(c1);
}
if( n<=0 ) n = strlen(zWord);
for(i=0; zText[i]; i++){
if( (zText[i]==c1 || zText[i]==c2) && sqlite3StrNICmp(zWord,&zText[i],n)==0){
int score = 6;
if( (i==0 || !isalnum(zText[i-1]))
&& (zText[i+n]==0 || !isalnum(zText[i+n])) ){
score = 8;
if( strncmp(zWord,&zText[i],n)==0 ){
idx = i;
best = score = 10;
break;
}
}
if( score>best ){
best = score;
idx = i;
}
}
}
if( pIdx ) *pIdx = idx;
return best;
}
/*
** search(PATTERN, STRING, ...)
**
** This routine implements the SQLite "search()" function. There are two
** arguments: text to be searched and a keyword pattern. The function
** returns an integer score which is higher depending on how well the
** search went.
*/
void srchFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
int i, j, k, score, total;
const char *zPattern;
if( argc<2 || argv[0]==0 ) return;
zPattern = sqlite3_value_text(argv[0]);
total = 0;
if( zPattern && zPattern[0] ){
for(i=0; zPattern[i]; i++){
if( isspace(zPattern[i]) ) continue;
for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
score = 0;
for(k=1; k<argc; k++){
int one_score;
char *zWord = sqlite3_value_text(argv[k]);
if( zWord==0 || zWord[0]==0 ) continue;
one_score = score_word(&zPattern[i], zWord, j, 0);
if( one_score>score ) score = one_score;
}
total += score;
i += j-1;
}
}
sqlite3_result_int(context, total);
}
/*
** highlight(WORD-LIST, TEXT, ...)
**
** Return HTML which contains TEXT but with every word in WORD-LIST
** enclosed within <b>...</b>. Long segments of TEXT that do not
** contain any matching words are replace with elipses.
*/
static void highlightFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
int i, j, k;
int size;
int wn, idx;
int nKey; /* Number of search terms in zPattern */
char *azKey[50]; /* Up to 50 search terms in zPattern */
int keySize[50]; /* Number of characters in each search term */
int begin[5], end[5]; /* Up to 5 100-character segments of text */
int sbegin[5], send[5]; /* The same 5 segments, sorted and coalesced */
char mask[256]; /* True if first character of a key */
char *zAll;
char *zPattern;
char *z;
/* Output is written into zOut[]. There are at most 5 exemplars of
** about 100 characters each - total 500 characters. But then we have
** to insert <b> and </b> around search terms and escape HTML characters
** such as < and > and &. The maximum expansion is "& " converted
** into "<b>&</b> " or less than 7-to-1. So the maximum output
** is 3500. Add a little extra space for the ellipses between
** exemplars. Plus some margin in case the calculation above is
** wrong in some way.
*/
char zOut[8000];
/*
** We must have a pattern and at least one text block to function.
*/
if( argc<2 || argv[0]==0 ) return;
zPattern = sqlite3_value_text(argv[0]);
if( zPattern[0]==0 ) return;
/*
** Make a copy of the pattern.
*/
zPattern = strdup(zPattern);
if( zPattern==0 ) return;
/*
** Concatenate all text to be analyzed.
*/
size = 0;
for(k=1; k<argc; k++){
if( argv[k]==0 ) continue;
z = (char*)sqlite3_value_text(argv[k]);
if( z==0 ) continue;
size += strlen(z)+1;
}
zAll = malloc( size );
if( zAll==0 ) return;
for(i=0, k=1; k<argc; k++){
if( argv[k]==0 ) continue;
z = (char*)sqlite3_value_text(argv[k]);
if( z==0 ) continue;
if( i>0 ){ zAll[i++] = '\n'; }
strcpy(&zAll[i], z);
i += strlen(&zAll[i]);
}
/*
** Find as many as 5 exemplar segments in the text with each segment
** as long as 100 characters.
*/
nKey = 0;
for(wn=i=0; wn<50 && zPattern[i];){
int score;
if( isspace(zPattern[i]) ){ i++; continue; }
for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
azKey[nKey] = &zPattern[i];
keySize[nKey++] = j;
score = score_word(&zPattern[i], zAll, j, &idx);
i += j;
if( zPattern[i] ) zPattern[i++] = 0;
if( score==0 ) continue;
begin[wn] = idx-50 > 0 ? idx-50 : 0;
end[wn] = begin[wn]+100 > size ? size : begin[wn]+100;
wn++;
}
/*
** Sort an coalesce the exemplars
*/
if( wn==0 ){
begin[0] = 0;
end[0] = size>100 ? 100 : size;
wn = 1;
}
for(i=0; i<5 && wn>0; i++){
int min = begin[0];
k = 0;
for(j=1; j<wn; j++){
if( begin[j]<min ){
k = j;
min = begin[j];
}
}
sbegin[i] = begin[k];
send[i] = end[k];
wn--;
begin[k] = begin[wn];
end[k] = end[wn];
}
wn = i;
begin[0] = sbegin[0];
end[0] = send[0];
for(i=j=1; i<wn; i++){
if( sbegin[i]>end[j-1]+1 ){
begin[j] = sbegin[i];
end[j] = send[i];
j++;
}else if( send[i]>end[j-1] ){
end[j-1] = send[i];
}
}
wn = j;
/*
** Initialize the mask[] array so that mask[x] has value 1 if x is the
** first character of any key pattern.
*/
memset(mask, 0, sizeof(mask));
for(i=0; i<nKey; i++){
mask[toupper(azKey[i][0])] = 1;
mask[tolower(azKey[i][0])] = 1;
}
/*
** Generate the output stream
*/
z = zOut;
for(i=0; i<wn; i++){
if( begin[i]>0 ){
if( i>0 ){ *(z++) = ' '; }
strcpy(z, "<b>...</b> ");
z += strlen(z);
}
for(j=begin[i]; j<end[i]; j++){
int c = zAll[j];
if( c=='&' ){
strcpy(z, "&");
z += 5;
}else if( c=='<' ){
strcpy(z, "<");
z += 4;
}else if( c=='>' ){
strcpy(z, ">");
z += 4;
}else if( mask[c] ){
for(k=0; k<nKey; k++){
int n;
if( tolower(c)!=tolower(azKey[k][0]) ) continue;
n = keySize[k];
if( sqlite3StrNICmp(&zAll[j],azKey[k],n)==0 ){
strcpy(z,"<b>");
z += 3;
while( n ){
c = zAll[j++];
if( c=='&' ){
strcpy(z, "&");
z += 5;
}else if( c=='<' ){
strcpy(z, "<");
z += 4;
}else if( c=='>' ){
strcpy(z, ">");
z += 4;
}else{
*(z++) = c;
}
n--;
}
j--;
strcpy(z,"</b>");
z += 4;
break;
}
}
if( k>=nKey ){
*(z++) = c;
}
}else{
*(z++) = c;
}
}
}
if( end[wn-1]<size ){
strcpy(z, " <b>...</b>");
}else{
*z = 0;
}
/*
** Report back the results
*/
sqlite3_result_text(context, zOut, -1, SQLITE_TRANSIENT);
free(zAll);
free(zPattern);
}
/*
** WEBPAGE: /search
*/
void search_page(void){
int srchTkt = PD("t","0")[0]!='0';
int srchCkin = PD("c","0")[0]!='0';
int srchWiki = PD("w","0")[0]!='0';
int srchFile = PD("f","0")[0]!='0';
int nPage = atoi(PD("p","0"));
const char *zPattern;
char **azResult = 0;
sqlite3 *db;
char zSql[10000];
login_check_credentials();
zPattern = P("s");
if( zPattern && zPattern[0] ){
zPattern = sqlite3_mprintf("%q",zPattern);
}
if( srchTkt+srchCkin+srchWiki+srchFile==0 ){
srchTkt = srchCkin = srchWiki = srchFile = 1;
}
if( !g.okRead ) srchTkt = 0;
if( !g.okCheckout ) srchFile = srchCkin = 0;
if( !g.okRdWiki ) srchWiki = 0;
if( srchTkt==0 && srchCkin==0 && srchWiki==0 && srchFile==0 ){
login_needed();
return;
}
if( zPattern && zPattern[0] && strlen(zPattern)<100 ){
int i = 0;
char *zConnect = " (";
db = db_open();
db_add_functions();
sqlite3_create_function(db, "highlight", -1, SQLITE_ANY, 0,
highlightFunc, 0, 0);
appendf(zSql,&i,sizeof(zSql),"SELECT score, url, title, body FROM");
if( srchTkt ){
appendf(zSql,&i,sizeof(zSql), "%s SELECT "
"search('%s',title,description,remarks) as score, "
"'tktview?tn=' || tn as url, "
"'Ticket #' || tn || ': ' || highlight('%s',title) as title, "
"highlight('%s',description,remarks) as body FROM ticket "
"WHERE score>0",
zConnect, zPattern, zPattern, zPattern);
zConnect = " UNION ALL";
}
if( srchCkin ){
appendf(zSql,&i,sizeof(zSql),"%s SELECT "
"search('%s',message) as score, "
"'chngview?cn=' || cn as url, "
"'Check-in [' || cn || ']' as title, "
"highlight('%s',message) as body FROM chng "
"WHERE score>0",
zConnect, zPattern, zPattern);
zConnect = " UNION ALL";
}
if( srchWiki ){
appendf(zSql,&i,sizeof(zSql),"%s SELECT "
"search('%s',name, text) as score, "
"'wiki?p=' || name as url, "
"'Wiki Page ' || highlight('%s',name) as title, "
"highlight('%s',text) as body "
"FROM (select name as nm, min(invtime) as tm FROM wiki GROUP BY name) "
" as x, wiki "
"WHERE score>0 AND x.nm=wiki.name AND x.tm=wiki.invtime",
zConnect, zPattern, zPattern, zPattern);
zConnect = " UNION ALL";
}
if( srchFile ){
appendf(zSql,&i,sizeof(zSql),"%s SELECT "
"search('%s', dir || '/' || base) as score, "
"'rlog?f=' || dir || '/' || base as url, "
"'File ' || dir || '/' || base as title, "
"highlight('%s',dir || '/' || base) as body FROM file "
"WHERE score>0",
zConnect, zPattern, zPattern);
zConnect = " UNION ALL";
appendf(zSql,&i,sizeof(zSql),"%s SELECT "
"search('%s', fname, description) as score, "
"'attach_get/' || atn as url, "
"'Attachment ' || fname as title, "
"highlight('%s',fname, description) as body FROM attachment "
"WHERE score>0",
zConnect, zPattern, zPattern);
zConnect = " UNION ALL";
}
appendf(zSql,&i,sizeof(zSql), ") ORDER BY score DESC LIMIT 30 OFFSET %d;",
nPage*30);
azResult = db_query("%s",zSql);
}
common_standard_menu("search", 0);
common_add_help_item("CvstracSearch");
common_header("Search");
@ <form action="search" method="GET">
@ Search:
@ <input type="text" size=40 name="s" value="%h(PD("s",""))">
@ <input type="submit" value="Go">
if( g.okRead + g.okCheckout + g.okRdWiki>1 ){
char *z;
@ <br>Look in:
if( g.okRead ){
z = srchTkt ? " checked " : "";
@ <label for="t"><input type="checkbox" name="t" id="t" value="1"%s(z)>
@ Tickets</label>
}
if( g.okCheckout ){
z = srchCkin ? " checked " : "";
@ <label for="c"><input type="checkbox" name="c" id="c" value="1"%s(z)>
@ Checkins</label>
}
if( g.okRdWiki ){
z = srchWiki ? " checked " : "";
@ <label for="w"><input type="checkbox" name="w" id="w" value="1"%s(z)>
@ Wiki Pages</label>
}
if( g.okCheckout ){
z = srchFile ? " checked " : "";
@ <label for="f"><input type="checkbox" name="f" id="f" value="1"%s(z)>
@ Filenames</label>
}
}
@ </form>
@ <div>
if( azResult && azResult[0] ){
int i, n;
@ <hr>
for(i=n=0; azResult[i]; i+=4, n++){
@ <p><a href="%s(azResult[i+1])">%s(azResult[i+2])</a><br>
@ %s(azResult[i+3])</p>
}
if( nPage || n>=30 ){
@ <hr><p align="center">
/* provide a history */
cgi_printf("Page: ");
for( i=0; i<nPage; i++ ){
cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">%d</a> ",
zPattern, srchTkt, srchCkin, srchWiki, srchFile, i, i+1);
}
cgi_printf("%d ", nPage+1);
if( n>=30 ){
cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">More</a>",
zPattern, srchTkt, srchCkin, srchWiki, srchFile, nPage+1);
}
@ </p>
}
}else if( zPattern && zPattern[0] ){
@ No results for <strong>%h(zPattern)</strong>
}
@ </div>
common_footer();
}
syntax highlighted by Code2HTML, v. 0.9.1