/*
 *  Hashtest -- a small utility to test how  CRC32() and PJWHASH32()
 *  functions work with real-world user input.
 *
 *  Part of ZMailer -- test which mailbox hash function is best for you.
 *
 *  This should show to you, that 'hashtest -XX' produces best two-
 *  level subdirectory hash with max 676 sub-buckets, and likely all
 *  bucket abundances are within 20-30 % of each other.
 *
 *  My test material (189438 userids from several systems pulled together)
 *  did show that  pjwhash32() suffers from some odd thing which always
 *  looses two low bits, and thus produces only 169 different hash buckets,
 *  while crc32() produces all 676 buckets.
 *
 *  Old "Pick two first letters of the username for subdir" approach
 *  produced 565 buckets, but the distribution was absolutely terribly
 *  scewed - 20 top-abundant buckets had over 50% of all hits.
 *  The 5 top-abundant buckets all had more than 7000 hits.
 *
 *  Runtime comparisons show that:
 *     -PP:  0.598 sec user space
 *     -XX:  0.592 sec user space
 *     -DD:  0.443 sec user space
 *
 *  from which we can probably safely say that  crc32() and pjwhash32()
 *  are absolutely equal in execution time, and likely present only
 *  0.150 seconds of the test runtime.  ( Or 790 nanoseconds per user
 *  name -- yeah, Alpha rules ;) Guestimate says each hash took some
 *  680 instruction cycles -- HOT caches! )
 *
 *  Matti Aarnio <mea@nic.funet.fi> 9-Sep-1999
 *
 */


#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>

extern long pjwhash32 (const void *);
extern long crc32     (const void *);

static void
usage()
{
      printf("hashtest -({P|D|X}+) < usernamefile\n");
      exit(64);
}


int main(argc, argv)
     int argc;
     char *argv[];
{
  int c;
  int pjwhashes = 0, dirhashes = 0, crchashes = 0;
  char buf[2000];

  while ((c = getopt(argc, argv, "?PDX")) != EOF) {
    switch (c) {
    case 'P':
      ++pjwhashes;
      break;
    case 'D':
      ++dirhashes;
      break;
    case 'X':
      ++crchashes;
      break;
    default:
      usage();
    }
  }
  if (!pjwhashes && !dirhashes && !crchashes)
    usage();


  while (!feof(stdin) && !ferror(stdin)) {

    char *s;
    int hash, i;

    if (fgets(buf, sizeof(buf), stdin) == NULL)
      break;
    s = strchr(buf,'\n');
    if (s) *s = 0;
    hash = 0;
    if (dirhashes) {

      s = buf;
      for (i = 0; i < dirhashes; ++i,++s)
	putc(*s, stdout);

    } else if (pjwhashes) {

      hash = pjwhash32(buf);
      for (i = 0; i < pjwhashes; ++i) {
	putc('A' + (hash % 26), stdout);
	hash /= 26;
      }

    } else { /* CRChashes */

      hash = crc32(buf);
      for (i = 0; i < crchashes; ++i) {
	putc('A' + (hash % 26), stdout);
	hash /= 26;
      }

    }
    printf("\n");
  }

  return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1