/* ** Copyright (c) 2002 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public ** License as published by the Free Software Foundation; either ** version 2 of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public ** License along with this library; if not, write to the ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, ** Boston, MA 02111-1307, USA. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to read the CVSROOT/history file from ** the CVS archive and update the CHNG and FILECHNG tables according to ** the content of the history file. All the other CVS-specific stuff should also ** be found here. */ #include "config.h" #include #include #include #include #include #include #include #include #include /* for PATH_MAX */ #include "git.h" static void err_pipe(const char* zMsg,const char* zCmd){ int nErr = 0; error_init(&nErr); @ %h(zMsg) @
  • Unable to execute the following command: @

      @ %h(zCmd)
      @ 

  • @ %h(strerror(errno)) error_finish(nErr); } static int next_cnum(){ char *zResult = db_short_query("SELECT max(cn) FROM chng"); int next_cnum = zResult ? atoi(zResult)+1 : 0; if(zResult) free(zResult); return next_cnum; } /* ** If *nDate==0, it's usually because the commit wasn't correctly read. A NULL ** return code just means that the commit could be the root object. */ static char **git_read_commit( const char *zGitDir, const char *zObject, /* git sha1 object */ char *zAuthor, /* at least 100 bytes */ int *nDate, char *zComment, int nMaxComment ){ char *zCmd; FILE *in; char zLine[PATH_MAX*2]; int bInMsg = 0; char **azParents = 0; int nParent = 0; int nMaxParent = 0; int nComment = 0; char zCommitter[100]; assert(nDate); zCmd = mprintf("GIT_DIR='%s' git-cat-file commit '%s' 2>&1", zGitDir, zObject); if( zCmd==0 ) return 0; in = popen(zCmd, "r"); if( in==0 ){ err_pipe("Reading commit",zCmd); free(zCmd); return 0; } free(zCmd); if( zAuthor ) zAuthor[0] = 0; if( zComment ) zComment[0] = 0; zCommitter[0] = 0; *nDate = 0; while( !feof(in) && !ferror(in) ){ if( 0==fgets(zLine,sizeof(zLine),in) ) break; /* you'll get this if it was some other kind of object */ if( !strncmp(zLine,"error:",6) ) break; if( bInMsg==0 ){ if( zLine[0]=='\n' ){ bInMsg = 1; if( zComment==0 ) break; }else if( 0==strncmp(zLine,"parent ",7) ){ char zParent[100]; if( nParent+2 >= nMaxParent ){ nMaxParent = (nParent+2) * 2; azParents = realloc(azParents, sizeof(char*)*nMaxParent); if( azParents==0 ) common_err("%s",strerror(errno)); } sscanf(&zLine[7],"%50[0-9a-fA-F]",zParent); azParents[nParent++] = strdup(zParent); azParents[nParent] = 0; }else if( zAuthor!= 0 && 0==strncmp(zLine,"author ",7) ){ sscanf(&zLine[7],"%90[^<]%*[^>]>",zAuthor); }else if( 0==strncmp(zLine,"committer ",10) ){ sscanf(&zLine[10],"%90[^<]%*[^>]> %d",zCommitter,nDate); } }else{ int len = strlen(zLine); if( len+nComment >= nMaxComment ) break; strcpy(&zComment[nComment], zLine); nComment += len; } } pclose(in); if( *nDate==0 ){ if( azParents ) db_query_free(azParents); return NULL; } if( zComment && zComment[0]==0 && bInMsg ){ strncpy(zComment,"Empty log message",nMaxComment); nComment = strlen(zComment); } if( zCommitter[0] ){ char *zMsg = mprintf( "\n\nCommitter: %s", zCommitter); int len = strlen(zLine); if( len+nComment < nMaxComment ){ strcpy(&zComment[nComment], zMsg); nComment += len; } if( zAuthor!=0 && zAuthor[0]==0 ){ /* apparently GIT commits don't always have an author */ strcpy(zAuthor, zCommitter); } } return azParents; } static void git_ingest_commit_chng( const char *zGitDir, int cn, const char *zCommit, time_t nDate, const char *zAuthor, const char *zComment, const char *zPrevVers, int skipInsertFile ){ FILE *in = 0; char zLine[PATH_MAX*3]; if( zPrevVers[0]==0 ){ /* Initial commit, hence no parent(s) to compare against. That means just a ** straight tree list */ char *zCmd = mprintf("GIT_DIR='%s' git-ls-tree -r '%s'", zGitDir, zCommit); in = popen(zCmd,"r"); if( in==0 ){ err_pipe("Reading tree",zCmd); return; } free(zCmd); while( !feof(in) && !ferror(in) ){ char zMode[100], zType[100], zObject[100], zPath[PATH_MAX]; if( 0==fgets(zLine,sizeof(zLine),in) ) break; remove_newline(zLine); sscanf(zLine, "%8[0-9] %90s %50[0-9a-fA-F] %[^\t]", zMode, zType, zObject, zPath); if( !strcmp(zType,"blob") ){ int nIns = 0; int nDel = 0; db_execute( "INSERT INTO filechng(cn,filename,vers,prevvers,chngtype,nins,ndel) " "VALUES(%d,'%q','%s','',1,%d,%d)", cn, zPath, zCommit, nIns, nDel); if( !skipInsertFile ) insert_file(zPath, cn); } } }else{ /* Now get the list of changed files and turn them into FILE ** and FILECHNG records. git-diff-tree is disgustingly PERFECT for ** this. Compared to the hassles one has to go through with CVS or ** Subversion to find out what's in a change tree, it's just mind ** blowing how ideal this is. FIXME: we're not handling renames or ** copies right now. When/if we do, add in the "-C -M" flags. */ char *zCmd = mprintf("GIT_DIR='%s' git-diff-tree -r -t '%s' '%s'", zGitDir, zPrevVers, zCommit); in = popen(zCmd,"r"); if( in==0 ){ err_pipe("Reading tree",zCmd); return; } free(zCmd); while( !feof(in) && !ferror(in) ){ char zSrcMode[100], zDstMode[100], zSrcObject[100], zDstObject[100]; char cStatus, zPath[PATH_MAX]; if( 0==fgets(zLine,sizeof(zLine),in) ) break; remove_newline(zLine); sscanf(zLine, "%*c%8s %8s %50[0-9a-fA-F] %50[0-9a-fA-F] %c %[^\t]", zSrcMode, zDstMode, zSrcObject, zDstObject, &cStatus, zPath); if( zSrcMode[1]=='0' || zDstMode[1]=='0' ){ int nIns = 0; int nDel = 0; if( cStatus=='N' || cStatus=='A' ){ if( !skipInsertFile ) insert_file(zPath, cn); db_execute( "INSERT INTO " " filechng(cn,filename,vers,prevvers,chngtype,nins,ndel) " "VALUES(%d,'%q','%s','',1,%d,%d)", cn, zPath, zCommit, nIns, nDel); }else if( cStatus=='D' ){ db_execute( "INSERT INTO " " filechng(cn,filename,vers,prevvers,chngtype,nins,ndel) " "VALUES(%d,'%q','%s','%s',2,%d,%d)", cn, zPath, zCommit, zPrevVers, nIns, nDel); }else{ db_execute( "INSERT INTO " " filechng(cn,filename,vers,prevvers,chngtype,nins,ndel) " "VALUES(%d,'%q','%s','%s',0,%d,%d)", cn, zPath, zCommit, zPrevVers, nIns, nDel); } } } } assert(in); pclose(in); db_execute( "INSERT INTO chng(cn, date, branch, milestone, user, message) " "VALUES(%d,%d,'',0,'%q','%q')", cn, nDate, zAuthor, zComment ); xref_checkin_comment(cn, zComment); } /* ** Read in any commits in the tree into the ci table. To sanely deal with ** multi-parent merges, this may be a recursive function. Returns the ** number of _new_ commits. */ static int git_ingest_commit_tree(const char *zGitDir, const char *zCommit){ int i; char **azParents = 0; char zCur[50]; int nNew = 0; strncpy(zCur,zCommit,sizeof(zCur)); while( zCur[0]!=0 ){ if( db_exists("SELECT 1 FROM filechng WHERE vers='%s' " "UNION ALL " "SELECT 1 FROM ci WHERE vers='%s'", zCur, zCur)){ /* Seen this already, or it's already one of the commits we're going ** to ingest. */ break; } { /* Read the commit in a different scope so all the large static ** buffers aren't holding stack space when we recurse. */ char zComment[10000]; char zAuthor[100]; int nDate = 0; azParents = git_read_commit(zGitDir,zCur,zAuthor,&nDate, zComment,sizeof(zComment)); if( nDate==0 ) break; db_execute("INSERT INTO ci(vers,date,author,message,prevvers) " "VALUES('%s',%d,'%q','%q','%s');", zCur, nDate, zAuthor, zComment, azParents ? azParents[0] : ""); nNew ++; } /* we'll want to break out if we're at a root object */ zCur[0] = 0; if( azParents && azParents[0] ){ /* If there's more than one parent, recurse on the extras. Otherwise, ** just update our "current" counter. This minimizes actual recursions ** so multi-parent commits don't end up blowing our stack. */ for(i=1; azParents[i]; i++){ nNew += git_ingest_commit_tree(zGitDir,azParents[i]); } strncpy(zCur,azParents[0],sizeof(zCur)); db_query_free(azParents); } } return nNew; } /* ** Read in the git references of zType (either "heads" or "tags") and turn them ** into new CHNG records. If bTags is non-zero, also generate/update milestones ** for the references (i.e. for tags rather than heads). */ static int git_read_refs(const char *zGitDir,const char *zType){ DIR *dir; struct dirent *ent; int nCommits = 0; char *zFile = mprintf("%s/refs/%s", zGitDir, zType); dir = opendir( zFile ); free(zFile); if( dir==NULL ) return 0; while( 0!=(ent=readdir(dir)) ){ char zObject[100]; char *zContents; char *zFile; char **azRef; struct stat statbuf; int cn = 0; if( ent->d_name[0]=='.' ) continue; zFile = mprintf("%s/refs/%s/%s", zGitDir, zType, ent->d_name); if( zFile==0 ) continue; if( stat(zFile, &statbuf) ){ /* Can't read the file, skip */ free(zFile); continue; } azRef = db_query("SELECT object,cn,seen FROM %s WHERE name='%q'", zType, ent->d_name); if( azRef && azRef[0] && azRef[1] && azRef[2] ){ if( statbuf.st_mtime<=atoi(azRef[2]) ){ /* file hasn't been modified since last time we looked at it */ db_query_free(azRef); free(zFile); continue; } cn = atoi(azRef[1]); /* don't need to lose this... */ } zContents = common_readfile( zFile ); free(zFile); if(zContents==0) continue; zObject[0] = 0; sscanf(zContents,"%50[0-9a-fA-F]",zObject); free(zContents); if( zObject[0]==0 ) continue; /* update the seen time... We'll be updating cn later, after we actually ** read any new stuff */ db_execute("REPLACE INTO %s(name,cn,object,seen) VALUES('%q',%d,'%q',%d)", zType, ent->d_name, cn, zObject, statbuf.st_mtime); if( azRef && azRef[0] && !strcmp(zObject,azRef[0]) ){ /* contents of the ref haven't changed */ db_query_free(azRef); continue; } /* Fill out the temporary ci table with any changes. */ nCommits += git_ingest_commit_tree(zGitDir,zObject); db_query_free(azRef); } closedir(dir); return nCommits; } static void git_update_refs(const char* zType){ int i; char **azRefs; /* assumes a logical ordering for names... There's no other way to really do ** this, unfortunately. */ azRefs = db_query("SELECT name,object,seen,cn FROM %s ORDER by name", zType); for( i=0; azRefs[i]; i+=4 ){ const char *zName = azRefs[i]; const char *zObject = azRefs[i+1]; time_t nSeen = atoi(azRefs[i+2]); int cn = atoi(azRefs[i+3]); int chngcn = 0; char **azChng; char *zCn; /* Find the CHNG record for the commit. This isn't as nice as we'd like ** because CHNG records, being designed for CVS, don't actually store ** version numbers and, unlike Subversion, a cn doesn't directly map ** to a revision number. So we need to grab the cn from whatever FILECHNG ** record corresponds. Note that we _can_ do this in a single query, but ** it's quite, quite slow. Probably faster in SQLite 3... */ zCn = db_short_query( "SELECT cn FROM filechng WHERE vers='%s' LIMIT 1", zObject); if( zCn==0 ) continue; chngcn = atoi(zCn); free(zCn); if( chngcn==0 ) continue; azChng = db_query( "SELECT date, user FROM chng WHERE cn=%d", chngcn ); /* a FILECHNG without a corresponding CHNG? I think not... */ assert(azChng); if( cn==0 ) cn = next_cnum(); /* Create/update a milestone. The message text basically contains ** some information about the type of ref, the name, the commit object, ** and, most importantly, a reference to the actual CHNG which, at ** display time, should turn into a hyperlink. Note that in practice, ** the milestone will appear next to the commit in the timeline. But ** it serves as the only way to document that the commit itself is ** somehow special. At some point we should be able to add some concept ** of tag browsing. */ db_execute( "REPLACE INTO chng(cn,date,branch,milestone,user,directory,message) " "VALUES(%d,%d,'',2,'%q','','%q (%s, commit [%d], object %q)')", cn, atoi(azChng[0]), azChng[1], zName, zType, chngcn, zObject ); db_execute( "REPLACE INTO %s(name,cn,object,seen) VALUES('%q',%d,'%s',%d)", zType, zName, cn, zObject, nSeen ); db_query_free( azChng ); } db_query_free(azRefs); } /* ** Process recent activity in the GIT repository. ** ** If any errors occur, output an error page and exit. ** ** If the "isReread" flag is set, it means that the history file is being ** reread to pick up changes that we may have missed earlier. */ static int git_history_update(int isReread){ const char *zRoot; char **azResult; int cn; int nOldSize = 0; int i; int nNewRevs; db_execute("BEGIN"); /* Get the path to local repository and last revision number we have in db * If there's no repository defined, bail and wait until the admin sets one. */ zRoot = db_config("cvsroot",""); if( zRoot[0]==0 ) return 0; nOldSize = atoi(db_config("historysize","0")); /* When the historysize is zero, it means we're in for a fresh start or we've ** restarted at the beginning. In either case, we go with an empty HEADS ** and TAGS tables. */ if( nOldSize==0 ){ sqlite3 *pDb = db_open(); char *zErrMsg = 0; char *zHead; zHead = mprintf("%s/HEAD"); if( !access(zHead,R_OK) ){ /* no HEAD in the project directory means it's probably _not_ a GIT ** repository. */ int nErr = 0; error_init(&nErr); @ Error @
  • Unable to locate: @

          @ %h(zHead)
          @ 

  • @ Are you sure this is a GIT repository? error_finish(nErr); db_execute("COMMIT;"); return -1; } /* If it doesn't succeed, hope it's just a "already exists" error because we ** don't seem to have return codes accurate enough to determine if the table ** add failed. If the table already exists, it _will_ fail. */ sqlite3_exec(pDb,"CREATE TABLE " "heads(name text primary key, " " object text,cn,seen,UNIQUE(name));", 0, 0, &zErrMsg); sqlite3_exec(pDb, "CREATE TABLE " "tags(name text primary key, " " object text,cn,seen,UNIQUE(name));", 0, 0, &zErrMsg); /* Make sure they're empty. We're starting fresh */ db_execute("DELETE FROM heads; DELETE from tags"); } /* git has multiple "heads", each representing a different ** branch. Changes may occur in any of them and it's most efficient ** just to check each one separately for new commits, _then_ to combine ** everything into one merged linear sequence */ db_execute( "CREATE TEMP TABLE ci(vers,date,author,message,prevvers);"); nNewRevs = git_read_refs(zRoot,"heads"); nNewRevs += git_read_refs(zRoot,"tags"); if( nNewRevs==0 ) { git_update_refs("heads"); git_update_refs("tags"); db_execute("COMMIT"); return 0; } /* That filled the ci table, but we dont't actually generate any CHNG ** or FILECHNG entries because walking the revision tree from multiple ** leaf nodes isn't going to give us a nice ordering. In fact, the most ** recent changes would have lower change numbers than the oldest, among ** other things. ** ** Now we turn each revision into a list of files and generate the CHNG, ** FILE and FILECHNG records */ azResult = db_query("SELECT vers,date,author,message,prevvers " "FROM ci ORDER BY date"); assert(azResult); for(cn=next_cnum(), i=0; azResult[i]; cn++, i+=5){ git_ingest_commit_chng(zRoot, cn, azResult[i], atoi(azResult[i+1]), azResult[i+2], azResult[i+3], azResult[i+4], (nOldSize==0)?1:0); } db_query_free(azResult); /* We couldn't do this before since GIT tags are basically milestones ** that point at other CHNG entries and we may not have had all the CHNG ** records. We do heads here too. What this means is that each head is ** basically a moving milestone. Not sure how desirable this really is. */ git_update_refs("heads"); git_update_refs("tags"); /* ** Update the "historysize" entry. For GIT, it only matters that it's ** non-zero except when we need to re-read the database. */ db_execute("UPDATE config SET value=%d WHERE name='historysize';", nOldSize + nNewRevs ); db_config(0,0); /* We delayed populating FILE till now on initial scan */ if( nOldSize==0 ){ update_file_table_with_lastcn(); } /* Commit all changes to the database */ db_execute("COMMIT;"); return 0; } /* ** Diff two versions of a file, handling all exceptions. ** ** If oldVersion is NULL, then this function will output the ** text of version newVersion of the file instead of doing ** a diff. */ static int git_diff_versions( const char *oldVersion, const char *newVersion, const char *zFile ){ char *zCmd; FILE *in; zCmd = mprintf("GIT_DIR='%s' git-diff-tree -t -p -r '%s' '%s' 2>/dev/null", db_config("cvsroot",""), quotable_string(oldVersion), quotable_string(newVersion), quotable_string(zFile)); in = popen(zCmd, "r"); free(zCmd); if( in==0 ) return -1; output_pipe_as_html(in,1); pclose(in); return 0; } static char *git_get_blob( const char *zGitDir, const char *zTreeish, const char* zPath ){ FILE *in; char zLine[PATH_MAX*2]; char *zCmd; if( zTreeish==0 || zTreeish[0]==0 || zPath==0 || zPath[0]==0 ) return 0; zCmd = mprintf("GIT_DIR='%s' git-ls-tree -r '%s' '%s'", zGitDir, quotable_string(zTreeish), quotable_string(zPath)); in = popen(zCmd,"r"); if( in==0 ){ err_pipe("Reading tree",zCmd); return 0; } free(zCmd); while( !feof(in) && !ferror(in) ){ char zMode[100], zType[100], zObject[100]; if( 0==fgets(zLine,sizeof(zLine),in) ) break; sscanf(zLine, "%8s %90s %50[0-9a-fA-F]", zMode, zType, zObject); if( !strcmp(zType,"blob") ){ return strdup(zObject); } } return 0; } static int git_dump_version(const char *zVersion, const char *zFile,int bRaw){ int rc = -1; char *zCmd; const char *zRoot = db_config("cvsroot",""); const char *zBlob = git_get_blob(zRoot, zVersion, zFile); if( zBlob==0 ) return -1; zCmd = mprintf("GIT_DIR='%s' git-cat-file blob '%s' 2>/dev/null", zRoot, zBlob); rc = common_dumpfile( zCmd, zVersion, zFile, bRaw ); free(zCmd); return rc; } static int git_diff_chng(int cn, int bRaw){ char *zRev; char *zCmd; char zLine[2000]; FILE *in; zRev = db_short_query("SELECT vers FROM filechng WHERE cn=%d", cn); if( !zRev || !zRev[0] ) return -1; /* Invalid check-in number */ zCmd = mprintf("GIT_DIR='%s' git-diff-tree -t -p -r '%s' 2>/dev/null", db_config("cvsroot",""), quotable_string(zRev)); free(zRev); in = popen(zCmd, "r"); free(zCmd); if( in==0 ) return -1; if( bRaw ){ while( !feof(in) ){ int amt = fread(zLine, 1, sizeof(zLine), in); if( amt<=0 ) break; cgi_append_content(zLine, amt); } }else{ output_pipe_as_html(in,1); } pclose(in); return 0; } void init_git(){ g.scm.zSCM = "git"; g.scm.zName = "GIT"; g.scm.pxHistoryUpdate = git_history_update; g.scm.pxDiffVersions = git_diff_versions; g.scm.pxDiffChng = git_diff_chng; g.scm.pxIsFileAvailable = 0; /* use the database */ g.scm.pxDumpVersion = git_dump_version; }