/*
	CVSNT binary delta generator
    Copyright (C) 2004 Tony Hoyle and March-Hare Software Ltd

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "config.h"

#include <stdlib.h>
#ifdef _WIN32
#include <malloc.h>
#endif
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "cvsdelta.h"

#if defined(_WIN32)
  #include <winsock2.h>
#else
 #ifdef HAVE_ARPA_INET_H
  #include <arpa/inet.h> 
 #elif defined(HAVE_NETINET_IN_H)
  #include <netinet/in.h> 
 #endif
#endif

#ifndef min
#define min(a,b) (((a)<(b))?(a):(b))
#endif

/* Certain systems don't allow nonaligned word comparisons, which is a real
   performance drag on the cvsdelta code as you have to do it a byte at a time */
#if defined(__sparc__) || defined(__hppa__)
#define USE_BYTE_COMPARES
#endif

static const unsigned short single_hash[256] =
{
  /* Random numbers generated using SLIB's pseudo-random number
   *    * generator. */
  0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
  0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
  0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
  0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
  0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
  0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
  0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
  0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
  0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
  0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
  0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
  0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
  0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
  0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
  0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
  0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
  0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
  0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
  0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
  0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
  0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
  0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
  0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
  0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
  0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
  0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
  0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
  0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
  0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
  0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
  0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
  0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
};

#define CHEW(x) (single_hash[(unsigned)x])

static const unsigned primes[] =
{
//  11, 19, 37, 73, 109, 163, 251, 367, 557, 823, 1237,
  1861, 2777, 4177, 6247, 9371, 14057, 21089, 31627,
  47431, 71143, 106721, 160073, 240101, 360163, 540217,
  810343, 1215497, 1823231, 2734867, 4102283, 6153409,
  9230113, 13845163
};

#define NUM_PRIMES (sizeof(primes)/sizeof(primes[0]))

void cvsdelta::calc_check(const Byte *data, checksum_t *check)
{
  int n;
  unsigned short low=0,high=0;

  check->next=0;
  check->base=data;
  check->new_data=false;

  for (n=0; n<block_size; n++)
  {
       low  += CHEW(*data++);
       high += low;
   }
  check->low=low;
  check->high=high;
}

void cvsdelta::slide_check(checksum_t *check)
{
   const Byte *p=check->base;
   unsigned short d0 = CHEW(*p);
   unsigned short d1 = CHEW(*(p+block_size));
   check->base++;
   check->low-=d0;
   check->high-=d0*block_size;
   check->low+=d1;
   check->high+=check->low;
}

unsigned cvsdelta::calc_hash(const checksum_t *check)
{
    const unsigned high = check->high;
    const unsigned low = check->low;
    const unsigned it = (high >> 2) + (low << 3) + (high << 16);

    return (it ^ high ^ low);
}

int cvsdelta::next_prime(unsigned int size)
{
  int n;

  for (n = 0; n < NUM_PRIMES; n++)
    if (primes[n]>size)
      return primes[n];

  return primes[NUM_PRIMES-1];
}

int cvsdelta::copy_buffer(const Byte *from, unsigned start, unsigned len, ByteArray& data_buffer)
{
	Byte *p;
	size_t count,n;
	size_t s = data_buffer.size();
	data_buffer.resize(s+len);
	memcpy((Byte*)&data_buffer[s],from+start,len);

	/* Merge the buffer into the calculated checksum data */
	count=len/block_size;
	for(n=0, p=(Byte*)&data_buffer[s]+(count-1)*block_size; n<count; n++, p-=block_size)
	{
		checksum_t *c = check2+n+check2_count;
		unsigned h;
		calc_check(p,c);
		h=calc_hash(c)%prime;
		c->next=hash[h];
		c->new_data=true;
		hash[h]=c;
	}
	check2_count+=n;

	return s;
}

unsigned cvsdelta::write_control(char region, unsigned start, unsigned len, const Byte *data, ByteArray& control_buffer, ByteArray& data_buffer, bool from_data)
{
  unsigned control=control_buffer.size();
  Byte *cp;
  unsigned char control_bit = region?0x80:0;

  control_buffer.resize(control_buffer.size()+32);

  cp = (Byte*)&control_buffer[control];

  if(region && !from_data)
    start = copy_buffer(data,start,len,data_buffer);

  if(len<0x20)
  {
    *(cp++)=(unsigned char)len|control_bit;
    control+=1;
  }
  else if(len<0x2000)
  {
    *(cp++)=(unsigned char)(len>>8)+0x20|control_bit;
    *(cp++)=(unsigned char)(len&0xFF);
    control+=2;
  }
  else if(len<0x200000)
  {
    *(cp++)=(unsigned char)(len>>16)+0x40|control_bit;
    *(cp++)=(unsigned char)((len&0xFF00)>>8);
    *(cp++)=(unsigned char)(len&0xFF);
    control+=3;
  }
  else if(len<0x20000000)
  {
    *(cp++)=(unsigned char)(len>>24)+0x60|control_bit;
    *(cp++)=(unsigned char)((len&0xFFFF00)>>16);
    *(cp++)=(unsigned char)((len&0xFF00)>>8);
    *(cp++)=(unsigned char)(len&0xFF);
    control+=4;
  }
  else assert(0); /* Max. chunksize limit 768MB */

  if(start<0x40)
  {
    *(cp++)=(unsigned char)start;
    control+=1;
  }
  else if(start<0x4000)
  {
    *(cp++)=(unsigned char)(start>>8)+0x40;
    *(cp++)=(unsigned char)(start&0xFF);
    control+=2;
  }
  else if(start<0x400000)
  {
    *(cp++)=(unsigned char)(start>>16)+0x80;
    *(cp++)=(unsigned char)((start&0xFF00)>>8);
    *(cp++)=(unsigned char)(start&0xFF);
    control+=3;
  }
  else if(start<0x40000000)
  {
    *(cp++)=(unsigned char)(start>>24)+0xC0;
    *(cp++)=(unsigned char)((start&0xFF0000)>>16);
    *(cp++)=(unsigned char)((start&0xFF00)>>8);
    *(cp++)=(unsigned char)(start&0xFF);
    control+=4;
  }
  else assert(0); /* Max chunkstart 1.6GB */

  control_buffer.resize(control);
  return control;
}

unsigned cvsdelta::match(const Byte *p1, const Byte *p2, const Byte **pbase, int max_back, int max_forward)
{
	const Byte *startp1=p1,*startp2=p2;
	const unsigned long *ul1=(const unsigned long *)p1,*ul2=(const unsigned long *)p2;
	const Byte *minp1 = p1 - max_back;
	const Byte *maxp1 = p1 + max_forward;
	const Byte *minp2 = p2 - max_back;
	const Byte *maxp2 = p2 + max_forward;

#ifndef USE_BYTE_COMPARES
	ul1=(const unsigned long *)p1;
	ul2=(const unsigned long *)p2;
	ul1--;
	ul2--;
	while(((const Byte *)ul1)>=minp1 && ((const Byte *)ul2)>=minp2 && *ul1==*ul2)
	{
		ul1--;
		ul2--;
	}
	ul1++;
	ul2++;

	p1=(const Byte *)ul1;
	p2=(const Byte *)ul2; 
#endif

	p1--;
	p2--;
	while(p1>=minp1 && p2>=minp2 && *p1==*p2)
	{
		p1--;
		p2--;
	}
	p1++;
	p2++;

	*pbase = p1;

	p1=startp1+block_size;
	p2=startp2+block_size;

#ifndef USE_BYTE_COMPARES
	ul1=(const unsigned long *)p1;
	ul2=(const unsigned long *)p2;
	while(((const Byte *)ul1)<=maxp1 && ((const Byte *)ul2)<=maxp2 && *ul1==*ul2)
	{
		ul1++;
		ul2++;
	}
	ul1--;
	ul2--;
#endif

	p1=(const Byte *)ul1;
	p2=(const Byte *)ul2; 
	while(p1<=maxp1 && p2<=maxp2 && *p1==*p2)
	{
		p1++;
		p2++;
	}
	p1--;
	p2--;

	return p1 - (*pbase);
}


bool cvsdelta::diff(const ByteArray& file1, const ByteArray& file2, ByteArray& output)
{
  unsigned n;
  const Byte *p;
  unsigned count1;
  unsigned count2;
  unsigned miss,hit,control;
  unsigned match_len;
  const Byte *match_start,*last_match;
  ByteArray data_buffer;
  ByteArray control_buffer;
  checksum_t c = {0};
  unsigned int cl,dl;
  unsigned real_size;

  if(file2.size()<=(256*1024))
	  block_size=8;
  else if(file2.size()<=(512*1024))
	  block_size=16;
  else if(file2.size()<=(1024*1024))
	  block_size=32;
  else if(file2.size()<=(2048*1024))
	  block_size=64;
  else
	  block_size=128;

  count1 = file1.size()/block_size;
  count2 = file2.size()/block_size;

  real_size = file2.size();

  if(file1.size()%block_size)
	count1++;

  if(file2.size()%block_size)
	count2++;

  prime = next_prime(count1);
  check1 = (checksum_t*)malloc(count1*sizeof(checksum_t));
  check2 = (checksum_t*)malloc(count2*sizeof(checksum_t));
  check2_count=0;
  hash = (checksum_t**)calloc(prime,sizeof(checksum_t*));
  data_buffer.clear();
  data_buffer.reserve(file2.size()); /* Reserve some memory */
  control_buffer.clear();
  control_buffer.reserve(1024); /* Reserve some memory */

  for(n=0, p=(&file1[0])+((count1-1)*block_size); n<count1; n++, p-=block_size)
  {
     checksum_t *c = check1+n;
     unsigned h;
     calc_check(p,c);
     h=calc_hash(c)%prime;
     c->next=hash[h];
     hash[h]=c;
  }

  miss=hit=control=0;
  last_match = &file2[0]; 
  for(n=0, p=&file2[0]; n<file2.size();)
  {
     checksum_t *c1;
     unsigned h;

     if(c.base==p-1)
       slide_check(&c);
     else 
       calc_check(p,&c);
     h=calc_hash(&c)%prime;
     c1=hash[h];
     while(c1 && (c.low!=c1->low || c.high!=c1->high || memcmp(c.base,c1->base,block_size)))
		c1=c1->next;
     if(c1)
	 {
		size_t f2s = file2.size();
		size_t f2off = (p-&file2[0]);
		size_t f1s = file1.size();
		size_t f1off = (c1->base-&file1[0]);
		match_len = match(c.base,c1->base, &match_start, p-last_match, min(f2s-f2off,f1s-f1off));
	 }
     else
		match_len = 0;
     if(match_len)
     {
		unsigned matched_minus = c.base-match_start;
		unsigned matched_plus = match_len - matched_minus;
		if(match_start>last_match)
		{
			/* insert */
			control+=write_control(1,last_match-&file2[0],match_start-last_match,&file2[0],control_buffer,data_buffer, 0);
			miss+=match_start-last_match;
		}
		/* copy */
		if(c1->new_data)
		{
			assert(((c1->base-matched_minus)-&data_buffer[0])<data_buffer.size());
			control+=write_control(1,(c1->base-matched_minus)-&data_buffer[0],match_len,NULL,control_buffer,data_buffer, 1);
		}
		else
		{
			assert(((c1->base-matched_minus)-&file1[0])<file1.size());
			control+=write_control(0,(c1->base-matched_minus)-&file1[0],match_len,NULL,control_buffer,data_buffer, 0);
		}

		if(!matched_plus)
			matched_plus++;
		p+=matched_plus;
		n+=matched_plus;
		last_match=p;
		hit+=match_len;
     }
     else
     {
		p++;
		n++;
     }
  }
  
  if(p>last_match)
  {
	control+=write_control(1,last_match-&file2[0],p-last_match,&file2[0],control_buffer,data_buffer, 0);
	miss+=p-last_match;
  }


  cl = control_buffer.size();
  dl = data_buffer.size();

  output.resize(cl+dl+sizeof(cvsd_header));
  cvsd_header *h = (cvsd_header*)&output[0];
  h->version=htons(1);
  h->control_length=htonl(cl);
  h->data_length=htonl(dl);
  h->block_size=htons(block_size);
  h->file_size=htonl(real_size);
  memcpy((Byte*)&output[0]+sizeof(cvsd_header),&control_buffer[0],cl);
  memcpy((Byte*)&output[0]+cl+sizeof(cvsd_header),&data_buffer[0],dl);
  free(check1);
  free(check2);
  free(hash);
  control_buffer.clear();
  data_buffer.clear();
  return true;
}

// file1 original file
// file2 patch
// file3 output buffer
bool cvsdelta::patch(const ByteArray& file1, const ByteArray& file2, ByteArray& output)
{
	const Byte *control_p,*data_p;
	Byte *dest_p;
	size_t control_l,data_l, total_size,blocksize;
	cvsd_header *h = (cvsd_header*)&file2[0];
	if(h->version!=htons(1))
		return false;
		
	control_p = &file2[0]+sizeof(cvsd_header);
	control_l = ntohl(h->control_length);
	data_p = control_p + control_l;
	data_l = ntohl(h->data_length);
	total_size = ntohl(h->file_size);
	blocksize = ntohl(h->block_size);

	output.resize(total_size);
	dest_p = &output[0];


	while(control_l)
	{
		int region = control_p[0]&0x80;
		size_t pos,len;

		if((control_p[0]&0x60)==0x60)
		{
			assert(control_l>=4);
			len = ((control_p[0]&0x1f)<<24)|(control_p[1]<<16)|(control_p[2]<<8)|control_p[3];
			control_p+=4;
			control_l-=4;
		} else if((control_p[0]&0x60)==0x40)
		{
			assert(control_l>=3);
			len = ((control_p[0]&0x1f)<<16)|(control_p[1]<<8)|control_p[2];
			control_p+=3;
			control_l-=3;
		} else if((control_p[0]&0x60)==0x20)
		{
			assert(control_l>=2);
			len = ((control_p[0]&0x1f)<<8)|control_p[1];
			control_p+=2;
			control_l-=2;
		}
		else
		{
			assert(control_l>=1);
			len = (control_p[0]&0x1f);
			control_p+=1;
			control_l-=1;
		}

		if((control_p[0]&0xC0)==0xC0)
		{
			assert(control_l>=4);
			pos = ((control_p[0]&0x3f)<<24)|(control_p[1]<<16)|(control_p[2]<<8)|control_p[3];
			control_p+=4;
			control_l-=4;
		} else if((control_p[0]&0xC0)==0x80)
		{
			assert(control_l>=3);
			pos = ((control_p[0]&0x3f)<<16)|(control_p[1]<<8)|control_p[2];
			control_p+=3;
			control_l-=3;
		} else if((control_p[0]&0xC0)==0x40)
		{
			assert(control_l>=2);
			pos = ((control_p[0]&0x3f)<<8)|control_p[1];
			control_p+=2;
			control_l-=2;
		}
		else
		{
			assert(control_l>=1);
			pos = (control_p[0]&0x3f);
			control_p+=1;
			control_l-=1;
		}
		
		if(control_l==0 && dest_p+len>&output[0]+total_size)
		{
			/* There can be a slight overflow due to block padding */
			/* This is normal.. fix it here */
			len=(&output[0]+total_size)-dest_p;
		}
		assert(dest_p+len<=&output[0]+total_size);
		if(region)
		{
			assert(data_p+pos+len<=data_p+data_l);
			if(len)
				memcpy(dest_p,data_p+pos,len);
		}
		else
		{	
			assert(pos+len<=file1.size());
			if(len)
				memcpy(dest_p,&file1[0]+pos,len);
		}
		dest_p+=len;
		assert(dest_p<=&output[0]+total_size);
	}
    
	return true;
}


syntax highlighted by Code2HTML, v. 0.9.1