/* * lcs.c : routines for creating an lcs * * ==================================================================== * Copyright (c) 2000-2004 CollabNet. All rights reserved. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://subversion.tigris.org/license-1.html. * If newer versions of this license are posted there, you may use a * newer version instead, at your option. * * This software consists of voluntary contributions made by many * individuals. For exact contribution history, see the revision * history and logs, available at http://subversion.tigris.org/. * ==================================================================== */ #include #include #include #include "diff.h" /* * Calculate the Longest Common Subsequence between two datasources. * This function is what makes the diff code tick. * * The LCS algorithm implemented here is described by Sun Wu, * Udi Manber and Gene Meyers in "An O(NP) Sequence Comparison Algorithm" * */ typedef struct svn_diff__snake_t svn_diff__snake_t; struct svn_diff__snake_t { apr_off_t y; svn_diff__lcs_t *lcs; svn_diff__position_t *position[2]; }; static APR_INLINE void svn_diff__snake(apr_off_t k, svn_diff__snake_t *fp, int idx, svn_diff__lcs_t **freelist, apr_pool_t *pool) { svn_diff__position_t *start_position[2]; svn_diff__position_t *position[2]; svn_diff__lcs_t *lcs; svn_diff__lcs_t *previous_lcs; /* The previous entry at fp[k] is going to be replaced. See if we * can mark that lcs node for reuse, because the sequence up to this * point was a dead end. */ lcs = fp[k].lcs; while (lcs) { lcs->refcount--; if (lcs->refcount) break; previous_lcs = lcs->next; lcs->next = *freelist; *freelist = lcs; lcs = previous_lcs; } if (fp[k - 1].y + 1 > fp[k + 1].y) { start_position[0] = fp[k - 1].position[0]; start_position[1] = fp[k - 1].position[1]->next; previous_lcs = fp[k - 1].lcs; } else { start_position[0] = fp[k + 1].position[0]->next; start_position[1] = fp[k + 1].position[1]; previous_lcs = fp[k + 1].lcs; } /* ### Optimization, skip all positions that don't have matchpoints * ### anyway. Beware of the sentinel, don't skip it! */ position[0] = start_position[0]; position[1] = start_position[1]; while (position[0]->node == position[1]->node) { position[0] = position[0]->next; position[1] = position[1]->next; } if (position[1] != start_position[1]) { lcs = *freelist; if (lcs) { *freelist = lcs->next; } else { lcs = apr_palloc(pool, sizeof(*lcs)); } lcs->position[idx] = start_position[0]; lcs->position[abs(1 - idx)] = start_position[1]; lcs->length = position[1]->offset - start_position[1]->offset; lcs->next = previous_lcs; lcs->refcount = 1; fp[k].lcs = lcs; } else { fp[k].lcs = previous_lcs; } if (previous_lcs) { previous_lcs->refcount++; } fp[k].position[0] = position[0]; fp[k].position[1] = position[1]; fp[k].y = position[1]->offset; } static svn_diff__lcs_t * svn_diff__lcs_reverse(svn_diff__lcs_t *lcs) { svn_diff__lcs_t *next; svn_diff__lcs_t *prev; next = NULL; while (lcs != NULL) { prev = lcs->next; lcs->next = next; next = lcs; lcs = prev; } return next; } svn_diff__lcs_t * svn_diff__lcs(svn_diff__position_t *position_list1, /* pointer to tail (ring) */ svn_diff__position_t *position_list2, /* pointer to tail (ring) */ apr_pool_t *pool) { int idx; apr_off_t length[2]; svn_diff__snake_t *fp; apr_off_t d; apr_off_t k; apr_off_t p = 0; svn_diff__lcs_t *lcs, *lcs_freelist = NULL; svn_diff__position_t sentinel_position[2]; /* Since EOF is always a sync point we tack on an EOF link * with sentinel positions */ lcs = apr_palloc(pool, sizeof(*lcs)); lcs->position[0] = apr_pcalloc(pool, sizeof(*lcs->position[0])); lcs->position[0]->offset = position_list1 ? position_list1->offset + 1 : 1; lcs->position[1] = apr_pcalloc(pool, sizeof(*lcs->position[1])); lcs->position[1]->offset = position_list2 ? position_list2->offset + 1 : 1; lcs->length = 0; lcs->refcount = 1; lcs->next = NULL; if (position_list1 == NULL || position_list2 == NULL) return lcs; /* Calculate length of both sequences to be compared */ length[0] = position_list1->offset - position_list1->next->offset + 1; length[1] = position_list2->offset - position_list2->next->offset + 1; idx = length[0] > length[1] ? 1 : 0; /* strikerXXX: here we allocate the furthest point array, which is * strikerXXX: sized M + N + 3 (!) */ fp = apr_pcalloc(pool, sizeof(*fp) * (apr_size_t)(length[0] + length[1] + 3)); fp += length[idx] + 1; sentinel_position[idx].next = position_list1->next; position_list1->next = &sentinel_position[idx]; sentinel_position[idx].offset = position_list1->offset + 1; sentinel_position[abs(1 - idx)].next = position_list2->next; position_list2->next = &sentinel_position[abs(1 - idx)]; sentinel_position[abs(1 - idx)].offset = position_list2->offset + 1; /* These are never dereferenced, only compared by value, so we * can safely fake these up and the void* cast is OK. */ sentinel_position[0].node = (void*)&sentinel_position[0]; sentinel_position[1].node = (void*)&sentinel_position[1]; d = length[abs(1 - idx)] - length[idx]; /* k = -1 will be the first to be used to get previous * position information from, make sure it holds sane * data */ fp[-1].position[0] = sentinel_position[0].next; fp[-1].position[1] = &sentinel_position[1]; p = 0; do { /* Forward */ for (k = -p; k < d; k++) { svn_diff__snake(k, fp, idx, &lcs_freelist, pool); } for (k = d + p; k >= d; k--) { svn_diff__snake(k, fp, idx, &lcs_freelist, pool); } p++; } while (fp[d].position[1] != &sentinel_position[1]); lcs->next = fp[d].lcs; lcs = svn_diff__lcs_reverse(lcs); position_list1->next = sentinel_position[idx].next; position_list2->next = sentinel_position[abs(1 - idx)].next; return lcs; }