From 515a33d6bd2e41d69c04f28b8a24d9ea85618d42 Mon Sep 17 00:00:00 2001
From: Craig Tiller <ctiller@google.com>
Date: Thu, 10 Nov 2016 15:33:32 -0800
Subject: [PATCH] Perfect hashing code

---
 tools/codegen/core/perfect/.gitignore |    7 +
 tools/codegen/core/perfect/lookupa.c  |  240 +++++
 tools/codegen/core/perfect/lookupa.h  |   24 +
 tools/codegen/core/perfect/perfect.c  | 1367 +++++++++++++++++++++++++
 tools/codegen/core/perfect/perfect.h  |  132 +++
 tools/codegen/core/perfect/perfhex.c  | 1308 +++++++++++++++++++++++
 tools/codegen/core/perfect/recycle.c  |   87 ++
 tools/codegen/core/perfect/recycle.h  |   65 ++
 tools/codegen/core/perfect/run.sh     |    7 +
 tools/codegen/core/perfect/standard.h |   57 ++
 10 files changed, 3294 insertions(+)
 create mode 100644 tools/codegen/core/perfect/.gitignore
 create mode 100644 tools/codegen/core/perfect/lookupa.c
 create mode 100644 tools/codegen/core/perfect/lookupa.h
 create mode 100644 tools/codegen/core/perfect/perfect.c
 create mode 100644 tools/codegen/core/perfect/perfect.h
 create mode 100644 tools/codegen/core/perfect/perfhex.c
 create mode 100644 tools/codegen/core/perfect/recycle.c
 create mode 100644 tools/codegen/core/perfect/recycle.h
 create mode 100755 tools/codegen/core/perfect/run.sh
 create mode 100644 tools/codegen/core/perfect/standard.h

diff --git a/tools/codegen/core/perfect/.gitignore b/tools/codegen/core/perfect/.gitignore
new file mode 100644
index 0000000000..c1489f0819
--- /dev/null
+++ b/tools/codegen/core/perfect/.gitignore
@@ -0,0 +1,7 @@
+perfect
+*.o
+phash.h
+phash.c
+compile.txt
+hash.txt
+
diff --git a/tools/codegen/core/perfect/lookupa.c b/tools/codegen/core/perfect/lookupa.c
new file mode 100644
index 0000000000..c122c4f107
--- /dev/null
+++ b/tools/codegen/core/perfect/lookupa.c
@@ -0,0 +1,240 @@
+/*
+--------------------------------------------------------------------
+lookupa.c, by Bob Jenkins, December 1996.  Same as lookup2.c
+Use this code however you wish.  Public Domain.  No warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.c
+--------------------------------------------------------------------
+*/
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+
+/*
+--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+  high bits or all three low bits, whether the original value of a,b,c
+  is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+  have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a 
+  structure that could supported 2x parallelism, like so:
+      a -= b; 
+      a -= c; x = (c>>13);
+      b -= c; a ^= x;
+      b -= a; x = (a<<8);
+      c -= a; b ^= x;
+      c -= b; x = (b>>13);
+      ...
+  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
+  of that parallelism.  They've also turned some of those single-cycle
+  latency instructions into multi-cycle latency instructions.  Still,
+  this is the fastest good hash I could find.  There were about 2^^68
+  to choose from.  I only looked at a billion or so.
+--------------------------------------------------------------------
+*/
+#define mix(a,b,c) \
+{ \
+  a -= b; a -= c; a ^= (c>>13); \
+  b -= c; b -= a; b ^= (a<<8); \
+  c -= a; c -= b; c ^= (b>>13); \
+  a -= b; a -= c; a ^= (c>>12);  \
+  b -= c; b -= a; b ^= (a<<16); \
+  c -= a; c -= b; c ^= (b>>5); \
+  a -= b; a -= c; a ^= (c>>3);  \
+  b -= c; b -= a; b ^= (a<<10); \
+  c -= a; c -= b; c ^= (b>>15); \
+}
+
+/*
+--------------------------------------------------------------------
+lookup() -- hash a variable-length key into a 32-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  level : can be any 4-byte value
+Returns a 32-bit value.  Every bit of the key affects every bit of
+the return value.  Every 1-bit and 2-bit delta achieves avalanche.
+About 6len+35 instructions.
+
+The best hash table sizes are powers of 2.  There is no need to do
+mod a prime (mod is sooo slow!).  If you need less than 32 bits,
+use a bitmask.  For example, if you need only 10 bits, do
+  h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0, h=0; i<n; ++i) h = lookup( k[i], len[i], h);
+
+By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
+code any way you wish, private, educational, or commercial.
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable.  Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------
+*/
+
+ub4 lookup( k, length, level)
+register ub1 *k;        /* the key */
+register ub4  length;   /* the length of the key */
+register ub4  level;    /* the previous hash, or an arbitrary value */
+{
+   register ub4 a,b,c,len;
+
+   /* Set up the internal state */
+   len = length;
+   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
+   c = level;           /* the previous hash value */
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 12)
+   {
+      a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
+      b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
+      c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
+      mix(a,b,c);
+      k += 12; len -= 12;
+   }
+
+   /*------------------------------------- handle the last 11 bytes */
+   c += length;
+   switch(len)              /* all the case statements fall through */
+   {
+   case 11: c+=((ub4)k[10]<<24);
+   case 10: c+=((ub4)k[9]<<16);
+   case 9 : c+=((ub4)k[8]<<8);
+      /* the first byte of c is reserved for the length */
+   case 8 : b+=((ub4)k[7]<<24);
+   case 7 : b+=((ub4)k[6]<<16);
+   case 6 : b+=((ub4)k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=((ub4)k[3]<<24);
+   case 3 : a+=((ub4)k[2]<<16);
+   case 2 : a+=((ub4)k[1]<<8);
+   case 1 : a+=k[0];
+     /* case 0: nothing left to add */
+   }
+   mix(a,b,c);
+   /*-------------------------------------------- report the result */
+   return c;
+}
+
+
+/*
+--------------------------------------------------------------------
+mixc -- mixc 8 4-bit values as quickly and thoroughly as possible.
+Repeating mix() three times achieves avalanche.
+Repeating mix() four times eliminates all funnels and all
+  characteristics stronger than 2^{-11}.
+--------------------------------------------------------------------
+*/
+#define mixc(a,b,c,d,e,f,g,h) \
+{ \
+   a^=b<<11; d+=a; b+=c; \
+   b^=c>>2;  e+=b; c+=d; \
+   c^=d<<8;  f+=c; d+=e; \
+   d^=e>>16; g+=d; e+=f; \
+   e^=f<<10; h+=e; f+=g; \
+   f^=g>>4;  a+=f; g+=h; \
+   g^=h<<8;  b+=g; h+=a; \
+   h^=a>>9;  c+=h; a+=b; \
+}
+
+/*
+--------------------------------------------------------------------
+checksum() -- hash a variable-length key into a 256-bit value
+  k     : the key (the unaligned variable-length array of bytes)
+  len   : the length of the key, counting by bytes
+  state : an array of CHECKSTATE 4-byte values (256 bits)
+The state is the checksum.  Every bit of the key affects every bit of
+the state.  There are no funnels.  About 112+6.875len instructions.
+
+If you are hashing n strings (ub1 **)k, do it like this:
+  for (i=0; i<8; ++i) state[i] = 0x9e3779b9;
+  for (i=0, h=0; i<n; ++i) checksum( k[i], len[i], state);
+
+See http://burtleburtle.net/bob/hash/evahash.html
+Use to detect changes between revisions of documents, assuming nobody
+is trying to cause collisions.  Do NOT use for cryptography.
+--------------------------------------------------------------------
+*/
+void  checksum( k, len, state)
+register ub1 *k;
+register ub4  len;
+register ub4 *state;
+{
+   register ub4 a,b,c,d,e,f,g,h,length;
+
+   /* Use the length and level; add in the golden ratio. */
+   length = len;
+   a=state[0]; b=state[1]; c=state[2]; d=state[3];
+   e=state[4]; f=state[5]; g=state[6]; h=state[7];
+
+   /*---------------------------------------- handle most of the key */
+   while (len >= 32)
+   {
+      a += (k[0] +(k[1]<<8) +(k[2]<<16) +(k[3]<<24));
+      b += (k[4] +(k[5]<<8) +(k[6]<<16) +(k[7]<<24));
+      c += (k[8] +(k[9]<<8) +(k[10]<<16)+(k[11]<<24));
+      d += (k[12]+(k[13]<<8)+(k[14]<<16)+(k[15]<<24));
+      e += (k[16]+(k[17]<<8)+(k[18]<<16)+(k[19]<<24));
+      f += (k[20]+(k[21]<<8)+(k[22]<<16)+(k[23]<<24));
+      g += (k[24]+(k[25]<<8)+(k[26]<<16)+(k[27]<<24));
+      h += (k[28]+(k[29]<<8)+(k[30]<<16)+(k[31]<<24));
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      mixc(a,b,c,d,e,f,g,h);
+      k += 32; len -= 32;
+   }
+
+   /*------------------------------------- handle the last 31 bytes */
+   h += length;
+   switch(len)
+   {
+   case 31: h+=(k[30]<<24);
+   case 30: h+=(k[29]<<16);
+   case 29: h+=(k[28]<<8);
+   case 28: g+=(k[27]<<24);
+   case 27: g+=(k[26]<<16);
+   case 26: g+=(k[25]<<8);
+   case 25: g+=k[24];
+   case 24: f+=(k[23]<<24);
+   case 23: f+=(k[22]<<16);
+   case 22: f+=(k[21]<<8);
+   case 21: f+=k[20];
+   case 20: e+=(k[19]<<24);
+   case 19: e+=(k[18]<<16);
+   case 18: e+=(k[17]<<8);
+   case 17: e+=k[16];
+   case 16: d+=(k[15]<<24);
+   case 15: d+=(k[14]<<16);
+   case 14: d+=(k[13]<<8);
+   case 13: d+=k[12];
+   case 12: c+=(k[11]<<24);
+   case 11: c+=(k[10]<<16);
+   case 10: c+=(k[9]<<8);
+   case 9 : c+=k[8];
+   case 8 : b+=(k[7]<<24);
+   case 7 : b+=(k[6]<<16);
+   case 6 : b+=(k[5]<<8);
+   case 5 : b+=k[4];
+   case 4 : a+=(k[3]<<24);
+   case 3 : a+=(k[2]<<16);
+   case 2 : a+=(k[1]<<8);
+   case 1 : a+=k[0];
+   }
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+   mixc(a,b,c,d,e,f,g,h);
+
+   /*-------------------------------------------- report the result */
+   state[0]=a; state[1]=b; state[2]=c; state[3]=d;
+   state[4]=e; state[5]=f; state[6]=g; state[7]=h;
+}
diff --git a/tools/codegen/core/perfect/lookupa.h b/tools/codegen/core/perfect/lookupa.h
new file mode 100644
index 0000000000..0b27db680d
--- /dev/null
+++ b/tools/codegen/core/perfect/lookupa.h
@@ -0,0 +1,24 @@
+/*
+------------------------------------------------------------------------------
+By Bob Jenkins, September 1996.
+lookupa.h, a hash function for table lookup, same function as lookup.c.
+Use this code in any way you wish.  Public Domain.  It has no warranty.
+Source is http://burtleburtle.net/bob/c/lookupa.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef LOOKUPA
+#define LOOKUPA
+
+#define CHECKSTATE 8
+#define hashsize(n) ((ub4)1<<(n))
+#define hashmask(n) (hashsize(n)-1)
+
+ub4  lookup(/*_ ub1 *k, ub4 length, ub4 level _*/);
+void checksum(/*_ ub1 *k, ub4 length, ub4 *state _*/);
+
+#endif /* LOOKUPA */
diff --git a/tools/codegen/core/perfect/perfect.c b/tools/codegen/core/perfect/perfect.c
new file mode 100644
index 0000000000..67fd2fd262
--- /dev/null
+++ b/tools/codegen/core/perfect/perfect.c
@@ -0,0 +1,1367 @@
+/*
+------------------------------------------------------------------------------
+perfect.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996, December 1999
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.c
+
+This generates a minimal perfect hash function.  That means, given a
+set of n keys, this determines a hash function that maps each of
+those keys into a value in 0..n-1 with no collisions.
+
+The perfect hash function first uses a normal hash function on the key
+to determine (a,b) such that the pair (a,b) is distinct for all
+keys, then it computes a^scramble[tab[b]] to get the final perfect hash.
+tab[] is an array of 1-byte values and scramble[] is a 256-term array of 
+2-byte or 4-byte values.  If there are n keys, the length of tab[] is a 
+power of two between n/3 and n.
+
+I found the idea of computing distinct (a,b) values in "Practical minimal 
+perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, 
+Communications of the ACM, January 1992.  They found the idea in Chichelli 
+(CACM Jan 1980).  Beyond that, our methods differ.
+
+The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in
+0..*blen*-1.  A fast hash function determines both a and b
+simultaneously.  Any decent hash function is likely to produce
+hashes so that (a,b) is distinct for all pairs.  I try the hash
+using different values of *salt* until all pairs are distinct.
+
+The final hash is (a XOR scramble[tab[b]]).  *scramble* is a
+predetermined mapping of 0..255 into 0..smax-1.  *tab* is an
+array that we fill in in such a way as to make the hash perfect.
+
+First we fill in all values of *tab* that are used by more than one
+key.  We try all possible values for each position until one works.
+
+This leaves m unmapped keys and m values that something could hash to.
+If you treat unmapped keys as lefthand nodes and unused hash values
+as righthand nodes, and draw a line connecting each key to each hash
+value it could map to, you get a bipartite graph.  We attempt to
+find a perfect matching in this graph.  If we succeed, we have
+determined a perfect hash for the whole set of keys.
+
+*scramble* is used because (a^tab[i]) clusters keys around *a*.
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+#ifndef RECYCLE
+#include "recycle.h"
+#endif
+#ifndef PERFECT
+#include "perfect.h"
+#endif
+
+/*
+------------------------------------------------------------------------------
+Find the mapping that will produce a perfect hash
+------------------------------------------------------------------------------
+*/
+
+/* return the ceiling of the log (base 2) of val */
+ub4  mylog2(val)
+ub4  val;
+{
+  ub4 i;
+  for (i=0; ((ub4)1<<i) < val; ++i)
+    ;
+  return i;
+}
+
+/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */
+/* permute(0)=0.  This is intended and useful. */
+static ub4  permute(x, nbits)
+ub4 x;                                       /* input, a value in some range */
+ub4 nbits;                                 /* input, number of bits in range */
+{
+  int i;
+  int mask   = ((ub4)1<<nbits)-1;                                /* all ones */
+  int const2 = 1+nbits/2;
+  int const3 = 1+nbits/3;
+  int const4 = 1+nbits/4;
+  int const5 = 1+nbits/5;
+  for (i=0; i<20; ++i)
+  {
+    x = (x+(x<<const2)) & mask; 
+    x = (x^(x>>const3));
+    x = (x+(x<<const4)) & mask;
+    x = (x^(x>>const5));
+  }
+  return x;
+}
+
+/* initialize scramble[] with distinct random values in 0..smax-1 */
+static void scrambleinit(scramble, smax)
+ub4      *scramble;                            /* hash is a^scramble[tab[b]] */
+ub4       smax;                    /* scramble values should be in 0..smax-1 */
+{
+  ub4 i;
+
+  /* fill scramble[] with distinct random integers in 0..smax-1 */
+  for (i=0; i<SCRAMBLE_LEN; ++i)
+  {
+    scramble[i] = permute(i, mylog2(smax));
+  }
+}
+
+/* 
+ * Check if key1 and key2 are the same. 
+ * We already checked (a,b) are the same.
+ */
+static void checkdup(key1, key2, form)
+key      *key1;
+key      *key2;
+hashform *form;
+{
+  switch(form->hashtype)
+  {
+  case STRING_HT:
+    if ((key1->len_k == key2->len_k) &&
+	!memcmp(key1->name_k, key2->name_k, (size_t)key1->len_k))
+    {
+      fprintf(stderr, "perfect.c: Duplicates keys!  %.*s\n",
+	      key1->len_k, key1->name_k);
+      exit(SUCCESS);
+    }
+    break;
+  case INT_HT:
+    if (key1->hash_k == key2->hash_k)
+    {
+      fprintf(stderr, "perfect.c: Duplicate keys!  %.8lx\n", key1->hash_k);
+      exit(SUCCESS);
+    }
+    break;
+  case AB_HT:
+    fprintf(stderr, "perfect.c: Duplicate keys!  %.8lx %.8lx\n",
+	    key1->a_k, key1->b_k);
+    exit(SUCCESS);
+    break;
+  default:
+    fprintf(stderr, "perfect.c: Illegal hash type %ld\n", (ub4)form->hashtype);
+    exit(SUCCESS);
+    break;
+  }
+}
+
+
+/* 
+ * put keys in tabb according to key->b_k
+ * check if the initial hash might work 
+ */
+static int inittab(tabb, blen, keys, form, complete)
+bstuff   *tabb;                     /* output, list of keys with b for (a,b) */
+ub4       blen;                                            /* length of tabb */
+key      *keys;                               /* list of keys already hashed */
+hashform *form;                                           /* user directives */
+int       complete;        /* TRUE means to complete init despite collisions */
+{
+  int  nocollision = TRUE;
+  key *mykey;
+
+  memset((void *)tabb, 0, (size_t)(sizeof(bstuff)*blen));
+
+  /* Two keys with the same (a,b) guarantees a collision */
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+  {
+    key *otherkey;
+
+    for (otherkey=tabb[mykey->b_k].list_b; 
+	 otherkey; 
+	 otherkey=otherkey->nextb_k)
+    {
+      if (mykey->a_k == otherkey->a_k)
+      {
+        nocollision = FALSE;
+	checkdup(mykey, otherkey, form);
+	if (!complete)
+	  return FALSE;
+      }
+    }
+    ++tabb[mykey->b_k].listlen_b;
+    mykey->nextb_k = tabb[mykey->b_k].list_b;
+    tabb[mykey->b_k].list_b = mykey;
+  }
+
+  /* no two keys have the same (a,b) pair */
+  return nocollision;
+}
+
+
+/* Do the initial hash for normal mode (use lookup and checksum) */
+static void initnorm(keys, alen, blen, smax, salt, final)
+key      *keys;                                          /* list of all keys */
+ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
+ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
+ub4       smax;                   /* maximum range of computable hash values */
+ub4       salt;                     /* used to initialize the hash function */
+gencode  *final;                          /* output, code for the final hash */
+{
+  key *mykey;
+  if (mylog2(alen)+mylog2(blen) > UB4BITS)
+  {
+    ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
+
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+    {
+      ub4 i, state[CHECKSTATE];
+      for (i=0; i<CHECKSTATE; ++i) state[i] = initlev;
+      checksum( mykey->name_k, mykey->len_k, state);
+      mykey->a_k = state[0]&(alen-1);
+      mykey->b_k = state[1]&(blen-1);
+    }
+    final->used = 4;
+    sprintf(final->line[0], 
+	    "  ub4 i,state[CHECKSTATE],rsl;\n");
+    sprintf(final->line[1], 
+	    "  for (i=0; i<CHECKSTATE; ++i) state[i]=0x%lx;\n",initlev);
+    sprintf(final->line[2],
+	    "  checksum(key, len, state);\n");
+    sprintf(final->line[3], 
+	    "  rsl = ((state[0]&0x%x)^scramble[tab[state[1]&0x%x]]);\n",
+	    alen-1, blen-1);
+  }
+  else
+  {
+    ub4 loga = mylog2(alen);                            /* log based 2 of blen */
+    ub4 initlev = salt*0x9e3779b9;  /* the golden ratio; an arbitrary value */
+
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+    {
+      ub4 hash = lookup(mykey->name_k, mykey->len_k, initlev);
+      mykey->a_k = (loga > 0) ? hash>>(UB4BITS-loga) : 0;
+      mykey->b_k = (blen > 1) ? hash&(blen-1) : 0;
+    }
+    final->used = 2;
+    sprintf(final->line[0], 
+	    "  ub4 rsl, val = lookup(key, len, 0x%lx);\n", initlev);
+    if (smax <= 1)
+    {
+      sprintf(final->line[1], "  rsl = 0;\n");
+    }
+    else if (mylog2(alen) == 0)
+    {
+      sprintf(final->line[1], "  rsl = tab[val&0x%x];\n", blen-1);
+    }
+    else if (blen < USE_SCRAMBLE)
+    {
+      sprintf(final->line[1], "  rsl = ((val>>%ld)^tab[val&0x%x]);\n",
+	      UB4BITS-mylog2(alen), blen-1);
+    }
+    else
+    {
+      sprintf(final->line[1], "  rsl = ((val>>%ld)^scramble[tab[val&0x%x]]);\n",
+	      UB4BITS-mylog2(alen), blen-1);
+    }
+  }
+}
+
+
+
+/* Do initial hash for inline mode */
+static void initinl(keys, alen, blen, smax, salt, final)
+key      *keys;                                          /* list of all keys */
+ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
+ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
+ub4       smax;                           /* range of computable hash values */
+ub4       salt;                     /* used to initialize the hash function */
+gencode  *final;                            /* generated code for final hash */
+{
+  key *mykey;
+  ub4  amask = alen-1;
+  ub4  blog  = mylog2(blen);
+  ub4  initval = salt*0x9e3779b9;    /* the golden ratio; an arbitrary value */
+
+  /* It's more important to have b uniform than a, so b is the low bits */
+  for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
+  {
+    ub4   hash = initval;
+    ub4   i;
+    for (i=0; i<mykey->len_k; ++i)
+    {
+      hash = (mykey->name_k[i] ^ hash) + ((hash<<(UB4BITS-6))+(hash>>6));
+    }
+    mykey->hash_k = hash;
+    mykey->a_k = (alen > 1) ? (hash & amask) : 0;
+    mykey->b_k = (blen > 1) ? (hash >> (UB4BITS-blog)) : 0;
+  }
+  final->used = 1;
+  if (smax <= 1)
+  {
+    sprintf(final->line[0], "  ub4 rsl = 0;\n");
+  }
+  else if (blen < USE_SCRAMBLE)
+  {
+    sprintf(final->line[0], "  ub4 rsl = ((val & 0x%lx) ^ tab[val >> %ld]);\n",
+	    amask, UB4BITS-blog);
+  }
+  else
+  {
+    sprintf(final->line[0], "  ub4 rsl = ((val & 0x%lx) ^ scramble[tab[val >> %ld]]);\n",
+	    amask, UB4BITS-blog);
+  }
+}
+
+
+/* 
+ * Run a hash function on the key to get a and b 
+ * Returns:
+ *   0: didn't find distinct (a,b) for all keys
+ *   1: found distinct (a,b) for all keys, put keys in tabb[]
+ *   2: found a perfect hash, no need to do any more work
+ */
+static ub4 initkey(keys, nkeys, tabb, alen, blen, smax, salt, form, final)
+key      *keys;                                          /* list of all keys */
+ub4       nkeys;                                     /* total number of keys */
+bstuff   *tabb;                                        /* stuff indexed by b */
+ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
+ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
+ub4       smax;                           /* range of computable hash values */
+ub4       salt;                     /* used to initialize the hash function */
+hashform *form;                                           /* user directives */
+gencode  *final;                                      /* code for final hash */
+{
+  ub4 finished;
+
+  /* Do the initial hash of the keys */
+  switch(form->mode)
+  {
+  case NORMAL_HM:
+    initnorm(keys, alen, blen, smax, salt, final);
+    break;
+  case INLINE_HM:
+    initinl(keys, alen, blen, smax, salt, final);
+    break;
+  case HEX_HM:
+  case DECIMAL_HM:
+    finished = inithex(keys, nkeys, alen, blen, smax, salt, final, form); 
+    if (finished) return 2;
+    break;
+  default:
+    fprintf(stderr, "fatal error: illegal mode\n"); 
+    exit(1);
+  }
+
+  if (nkeys <= 1)
+  {
+    final->used = 1;
+    sprintf(final->line[0], "  ub4 rsl = 0;\n");
+    return 2;
+  }
+
+  return inittab(tabb, blen, keys, form, FALSE);
+}
+
+/* Print an error message and exit if there are duplicates */
+static void duplicates(tabb, blen, keys, form)
+bstuff   *tabb;                    /* array of lists of keys with the same b */
+ub4       blen;                              /* length of tabb, a power of 2 */
+key      *keys;
+hashform *form;                                           /* user directives */
+{
+  ub4  i;
+  key *key1;
+  key *key2;
+
+  (void)inittab(tabb, blen, keys, form, TRUE);
+
+  /* for each b, do nested loops through key list looking for duplicates */
+  for (i=0; i<blen; ++i)
+    for (key1=tabb[i].list_b; key1; key1=key1->nextb_k)
+      for (key2=key1->nextb_k; key2; key2=key2->nextb_k)
+	checkdup(key1, key2, form);
+}
+
+
+/* Try to apply an augmenting list */
+static int apply(tabb, tabh, tabq, blen, scramble, tail, rollback)
+bstuff *tabb;
+hstuff *tabh;
+qstuff *tabq;
+ub4     blen;
+ub4    *scramble;
+ub4     tail;
+int     rollback;          /* FALSE applies augmenting path, TRUE rolls back */
+{
+  ub4     hash;
+  key    *mykey;
+  bstuff *pb;
+  ub4     child;
+  ub4     parent;
+  ub4     stabb;                                         /* scramble[tab[b]] */
+
+  /* walk from child to parent */
+  for (child=tail-1; child; child=parent)
+  {
+    parent = tabq[child].parent_q;                    /* find child's parent */
+    pb     = tabq[parent].b_q;             /* find parent's list of siblings */
+
+    /* erase old hash values */
+    stabb = scramble[pb->val_b];
+    for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+    {
+      hash = mykey->a_k^stabb;
+      if (mykey == tabh[hash].key_h)
+      {                            /* erase hash for all of child's siblings */
+	tabh[hash].key_h = (key *)0;
+      }
+    }
+
+    /* change pb->val_b, which will change the hashes of all parent siblings */
+    pb->val_b = (rollback ? tabq[child].oldval_q : tabq[child].newval_q);
+
+    /* set new hash values */
+    stabb = scramble[pb->val_b];
+    for (mykey=pb->list_b; mykey; mykey=mykey->nextb_k)
+    {
+      hash = mykey->a_k^stabb;
+      if (rollback)
+      {
+	if (parent == 0) continue;                  /* root never had a hash */
+      }
+      else if (tabh[hash].key_h)
+      {
+	/* very rare: roll back any changes */
+	(void *)apply(tabb, tabh, tabq, blen, scramble, tail, TRUE);
+	return FALSE;                                  /* failure, collision */
+      }
+      tabh[hash].key_h = mykey;
+    }
+  }
+  return TRUE;
+}
+
+
+/*
+-------------------------------------------------------------------------------
+augment(): Add item to the mapping.
+
+Construct a spanning tree of *b*s with *item* as root, where each
+parent can have all its hashes changed (by some new val_b) with 
+at most one collision, and each child is the b of that collision.
+
+I got this from Tarjan's "Data Structures and Network Algorithms".  The
+path from *item* to a *b* that can be remapped with no collision is 
+an "augmenting path".  Change values of tab[b] along the path so that 
+the unmapped key gets mapped and the unused hash value gets used.
+
+Assuming 1 key per b, if m out of n hash values are still unused, 
+you should expect the transitive closure to cover n/m nodes before 
+an unused node is found.  Sum(i=1..n)(n/i) is about nlogn, so expect
+this approach to take about nlogn time to map all single-key b's.
+-------------------------------------------------------------------------------
+*/
+static int augment(tabb, tabh, tabq, blen, scramble, smax, item, nkeys, 
+		   highwater, form)
+bstuff   *tabb;                                        /* stuff indexed by b */
+hstuff   *tabh;  /* which key is associated with which hash, indexed by hash */
+qstuff   *tabq;            /* queue of *b* values, this is the spanning tree */
+ub4       blen;                                            /* length of tabb */
+ub4      *scramble;                      /* final hash is a^scramble[tab[b]] */
+ub4       smax;                                 /* highest value in scramble */
+bstuff   *item;                           /* &tabb[b] for the b to be mapped */
+ub4       nkeys;                         /* final hash must be in 0..nkeys-1 */
+ub4       highwater;        /* a value higher than any now in tabb[].water_b */
+hashform *form;               /* TRUE if we should do a minimal perfect hash */
+{
+  ub4  q;                      /* current position walking through the queue */
+  ub4  tail;              /* tail of the queue.  0 is the head of the queue. */
+  ub4  limit=((blen < USE_SCRAMBLE) ? smax : UB1MAXVAL+1);
+  ub4  highhash = ((form->perfect == MINIMAL_HP) ? nkeys : smax);
+  int  trans = (form->speed == SLOW_HS || form->perfect == MINIMAL_HP);
+
+  /* initialize the root of the spanning tree */
+  tabq[0].b_q = item;
+  tail = 1;
+
+  /* construct the spanning tree by walking the queue, add children to tail */
+  for (q=0; q<tail; ++q)
+  {
+    bstuff *myb = tabq[q].b_q;                        /* the b for this node */
+    ub4     i;                              /* possible value for myb->val_b */
+
+    if (!trans && (q == 1)) 
+      break;                                  /* don't do transitive closure */
+
+    for (i=0; i<limit; ++i)
+    {
+      bstuff *childb = (bstuff *)0;             /* the b that this i maps to */
+      key    *mykey;                       /* for walking through myb's keys */
+
+      for (mykey = myb->list_b; mykey; mykey=mykey->nextb_k)
+      {
+	key    *childkey;
+	ub4 hash = mykey->a_k^scramble[i];
+
+	if (hash >= highhash) break;                        /* out of bounds */
+	childkey = tabh[hash].key_h;
+
+	if (childkey)
+	{
+	  bstuff *hitb = &tabb[childkey->b_k];
+
+	  if (childb)
+	  {
+	    if (childb != hitb) break;            /* hit at most one child b */
+	  }
+	  else
+	  {
+	    childb = hitb;                        /* remember this as childb */
+	    if (childb->water_b == highwater) break;     /* already explored */
+	  }
+	}
+      }
+      if (mykey) continue;             /* myb with i has multiple collisions */
+
+      /* add childb to the queue of reachable things */
+      if (childb) childb->water_b = highwater;
+      tabq[tail].b_q      = childb;
+      tabq[tail].newval_q = i;     /* how to make parent (myb) use this hash */
+      tabq[tail].oldval_q = myb->val_b;            /* need this for rollback */
+      tabq[tail].parent_q = q;
+      ++tail;
+
+      if (!childb)
+      {                                  /* found an *i* with no collisions? */
+	/* try to apply the augmenting path */
+	if (apply(tabb, tabh, tabq, blen, scramble, tail, FALSE))
+	  return TRUE;        /* success, item was added to the perfect hash */
+
+	--tail;                    /* don't know how to handle such a child! */
+      }
+    }
+  }
+  return FALSE;
+}
+
+
+/* find a mapping that makes this a perfect hash */
+static int perfect(tabb, tabh, tabq, blen, smax, scramble, nkeys, form)
+bstuff   *tabb;
+hstuff   *tabh;
+qstuff   *tabq;
+ub4       blen;
+ub4       smax;
+ub4      *scramble;
+ub4       nkeys;
+hashform *form;
+{
+  ub4 maxkeys;                           /* maximum number of keys for any b */
+  ub4 i, j;
+
+  /* clear any state from previous attempts */
+  memset((void *)tabh, 0, 
+	 (size_t)(sizeof(hstuff)*
+		  ((form->perfect == MINIMAL_HP) ? nkeys : smax)));
+  memset((void *)tabq, 0, (size_t)(sizeof(qstuff)*(blen+1)));
+
+  for (maxkeys=0,i=0; i<blen; ++i) 
+    if (tabb[i].listlen_b > maxkeys) 
+      maxkeys = tabb[i].listlen_b;
+
+  /* In descending order by number of keys, map all *b*s */
+  for (j=maxkeys; j>0; --j)
+    for (i=0; i<blen; ++i)
+      if (tabb[i].listlen_b == j)
+	if (!augment(tabb, tabh, tabq, blen, scramble, smax, &tabb[i], nkeys, 
+		     i+1, form))
+	{
+	  printf("fail to map group of size %ld for tab size %ld\n", j, blen);
+	  return FALSE;
+	}
+
+  /* Success!  We found a perfect hash of all keys into 0..nkeys-1. */
+  return TRUE;
+}
+
+
+/*
+ * Simple case: user gave (a,b).  No more mixing, no guessing alen or blen. 
+ * This assumes a,b reside in (key->a_k, key->b_k), and final->form == AB_HK.
+ */
+static void hash_ab(tabb, alen, blen, salt, final, 
+	     scramble, smax, keys, nkeys, form)
+bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
+ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
+ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
+ub4      *salt;                         /* output, initializes initial hash */
+gencode  *final;                                      /* code for final hash */
+ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
+ub4      *smax;                           /* input, scramble[i] in 0..smax-1 */
+key      *keys;                                       /* input, keys to hash */
+ub4       nkeys;                       /* input, number of keys being hashed */
+hashform *form;                                           /* user directives */
+{
+  hstuff *tabh;
+  qstuff *tabq;
+  key    *mykey;
+  ub4     i;
+  int     used_tab;
+
+  /* initially make smax the first power of two bigger than nkeys */
+  *smax = ((ub4)1<<mylog2(nkeys));
+  scrambleinit(scramble, *smax);
+
+  /* set *alen and *blen based on max A and B from user */
+  *alen = 1;
+  *blen = 1;
+  for (mykey = keys;  mykey != (key *)0;  mykey = mykey->next_k)
+  {
+    while (*alen <= mykey->a_k) *alen *= 2;
+    while (*blen <= mykey->b_k) *blen *= 2;
+  }
+  if (*alen > 2**smax)
+  {
+    fprintf(stderr,
+      "perfect.c: Can't deal with (A,B) having A bigger than twice \n");
+    fprintf(stderr,
+      "  the smallest power of two greater or equal to any legal hash.\n");
+    exit(SUCCESS);
+  }
+
+  /* allocate working memory */
+  *tabb = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen))); 
+  tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
+  tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
+					     nkeys : *smax),
+			     "perfect.c, tabh");
+
+  /* check that (a,b) are distinct and put them in tabb indexed by b */
+  (void)inittab(*tabb, *blen, keys, form, FALSE);
+
+  /* try with smax */
+  if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+  {
+    if (form->perfect == MINIMAL_HP)
+    {
+      printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+      exit(SUCCESS);
+    }
+    else
+    {
+      /* try with 2*smax */
+      free((void *)tabh);
+      *smax = *smax * 2;
+      scrambleinit(scramble, *smax);
+      tabh = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ?
+						nkeys : *smax),
+				"perfect.c, tabh");
+      if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+      {
+	printf("fatal error: Cannot find perfect hash for user (A,B) pairs\n");
+	exit(SUCCESS);
+      }
+    }
+  }
+
+  /* check if tab[] was really needed */
+  for (i=0; i<*blen; ++i)
+  {
+    if ((*tabb)[i].val_b != 0) break;            /* assumes permute(0) == 0 */
+  }
+  used_tab = (i < *blen);
+
+  /* write the code for the perfect hash */
+  *salt = 1;
+  final->used = 1;
+  if (!used_tab)
+  {
+    sprintf(final->line[0], "  ub4 rsl = a;\n");
+  }
+  else if (*blen < USE_SCRAMBLE)
+  {
+    sprintf(final->line[0], "  ub4 rsl = (a ^ tab[b]);\n");
+  }
+  else
+  {
+    sprintf(final->line[0], "  ub4 rsl = (a ^ scramble[tab[b]]);\n");
+  }
+
+  printf("success, found a perfect hash\n");
+
+  free((void *)tabq);
+  free((void *)tabh);
+}
+
+
+/* guess initial values for alen and blen */
+static void initalen(alen, blen, smax, nkeys, form)
+ub4      *alen;                                      /* output, initial alen */
+ub4      *blen;                                      /* output, initial blen */
+ub4      *smax;    /* input, power of two greater or equal to max hash value */
+ub4       nkeys;                              /* number of keys being hashed */
+hashform *form;                                           /* user directives */
+{
+  /*
+   * Find initial *alen, *blen
+   * Initial alen and blen values were found empirically.  Some factors:
+   *
+   * If smax<256 there is no scramble, so tab[b] needs to cover 0..smax-1.
+   *
+   * alen and blen must be powers of 2 because the values in 0..alen-1 and
+   * 0..blen-1 are produced by applying a bitmask to the initial hash function.
+   *
+   * alen must be less than smax, in fact less than nkeys, because otherwise
+   * there would often be no i such that a^scramble[i] is in 0..nkeys-1 for
+   * all the *a*s associated with a given *b*, so there would be no legal
+   * value to assign to tab[b].  This only matters when we're doing a minimal
+   * perfect hash.
+   *
+   * It takes around 800 trials to find distinct (a,b) with nkey=smax*(5/8)
+   * and alen*blen = smax*smax/32.
+   *
+   * Values of blen less than smax/4 never work, and smax/2 always works.
+   *
+   * We want blen as small as possible because it is the number of bytes in
+   * the huge array we must create for the perfect hash.
+   *
+   * When nkey <= smax*(5/8), blen=smax/4 works much more often with 
+   * alen=smax/8 than with alen=smax/4.  Above smax*(5/8), blen=smax/4
+   * doesn't seem to care whether alen=smax/8 or alen=smax/4.  I think it
+   * has something to do with 5/8 = 1/8 * 5.  For example examine 80000, 
+   * 85000, and 90000 keys with different values of alen.  This only matters
+   * if we're doing a minimal perfect hash.
+   *
+   * When alen*blen <= 1<<UB4BITS, the initial hash must produce one integer.
+   * Bigger than that it must produce two integers, which increases the
+   * cost of the hash per character hashed.
+   */
+  if (form->perfect == NORMAL_HP)
+  {
+    if ((form->speed == FAST_HS) && (nkeys > *smax*0.8))
+    {
+      *smax = *smax * 2;
+    }
+
+    *alen = ((form->hashtype==INT_HT) && *smax>131072) ? 
+      ((ub4)1<<(UB4BITS-mylog2(*blen))) :   /* distinct keys => distinct (A,B) */
+      *smax;                         /* no reason to restrict alen to smax/2 */
+    if ((form->hashtype == INT_HT) && *smax < 32)
+      *blen = *smax;                      /* go for function speed not space */
+    else if (*smax/4 <= (1<<14))
+      *blen = ((nkeys <= *smax*0.56) ? *smax/32 :
+	       (nkeys <= *smax*0.74) ? *smax/16 : *smax/8);
+    else
+      *blen = ((nkeys <= *smax*0.6) ? *smax/16 : 
+	       (nkeys <= *smax*0.8) ? *smax/8 : *smax/4);
+
+    if ((form->speed == FAST_HS) && (*blen < *smax/8))
+      *blen = *smax/8;
+
+    if (*alen < 1) *alen = 1;
+    if (*blen < 1) *blen = 1;
+  }
+  else
+  {
+    switch(mylog2(*smax))
+    {
+    case 0:
+      *alen = 1;
+      *blen = 1;
+    case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
+      *alen = (form->perfect == NORMAL_HP) ? *smax : *smax/2;
+      *blen = *smax/2;
+      break;
+    case 9:
+    case 10:
+    case 11:
+    case 12:
+    case 13:
+    case 14:
+    case 15:
+    case 16:
+    case 17:
+      if (form->speed == FAST_HS)
+      {
+	*alen = *smax/2;
+	*blen = *smax/4;
+      }
+      else if (*smax/4 < USE_SCRAMBLE)
+      {
+	*alen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+	*blen = ((nkeys <= *smax*0.52) ? *smax/8 : *smax/4);
+      }
+      else
+      {
+	*alen = ((nkeys <= *smax*(5.0/8.0)) ? *smax/8 : 
+		 (nkeys <= *smax*(3.0/4.0)) ? *smax/4 : *smax/2);
+	*blen = *smax/4;                /* always give the small size a shot */
+      }
+      break;
+    case 18:
+      if (form->speed == FAST_HS)
+      {
+	*alen = *smax/2;
+	*blen = *smax/2;
+      }
+      else
+      {
+	*alen = *smax/8;                 /* never require the multiword hash */
+	*blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+      }
+      break;
+    case 19:
+    case 20:
+      *alen = (nkeys <= *smax*(5.0/8.0)) ? *smax/8 : *smax/2;
+      *blen = (nkeys <= *smax*(5.0/8.0)) ? *smax/4 : *smax/2;
+      break;
+    default:
+      *alen = *smax/2;              /* just find a hash as quick as possible */
+      *blen = *smax/2;     /* we'll be thrashing virtual memory at this size */
+      break;
+    }
+  }
+}
+
+/* 
+** Try to find a perfect hash function.  
+** Return the successful initializer for the initial hash. 
+** Return 0 if no perfect hash could be found.
+*/
+void findhash(tabb, alen, blen, salt, final, 
+	      scramble, smax, keys, nkeys, form)
+bstuff  **tabb;           /* output, tab[] of the perfect hash, length *blen */
+ub4      *alen;                 /* output, 0..alen-1 is range for a of (a,b) */
+ub4      *blen;                 /* output, 0..blen-1 is range for b of (a,b) */
+ub4      *salt;                         /* output, initializes initial hash */
+gencode  *final;                                      /* code for final hash */
+ub4      *scramble;                      /* input, hash = a^scramble[tab[b]] */
+ub4      *smax;                           /* input, scramble[i] in 0..smax-1 */
+key      *keys;                                       /* input, keys to hash */
+ub4       nkeys;                       /* input, number of keys being hashed */
+hashform *form;                                           /* user directives */
+{
+  ub4 bad_initkey;                       /* how many times did initkey fail? */
+  ub4 bad_perfect;                       /* how many times did perfect fail? */
+  ub4 trysalt;                        /* trial initializer for initial hash */
+  ub4 maxalen;
+  hstuff *tabh;                       /* table of keys indexed by hash value */
+  qstuff *tabq;    /* table of stuff indexed by queue value, used by augment */
+
+  /* The case of (A,B) supplied by the user is a special case */
+  if (form->hashtype == AB_HT)
+  {
+    hash_ab(tabb, alen, blen, salt, final, 
+	    scramble, smax, keys, nkeys, form);
+    return;
+  }
+
+  /* guess initial values for smax, alen and blen */
+  *smax = ((ub4)1<<mylog2(nkeys));
+  initalen(alen, blen, smax, nkeys, form);
+
+  scrambleinit(scramble, *smax);
+
+  maxalen = (form->perfect == MINIMAL_HP) ? *smax/2 : *smax;
+
+  /* allocate working memory */
+  *tabb = (bstuff *)remalloc((size_t)(sizeof(bstuff)*(*blen)), 
+			     "perfect.c, tabb");
+  tabq  = (qstuff *)remalloc(sizeof(qstuff)*(*blen+1), "perfect.c, tabq");
+  tabh  = (hstuff *)remalloc(sizeof(hstuff)*(form->perfect == MINIMAL_HP ? 
+					     nkeys : *smax),
+			     "perfect.c, tabh");
+
+  /* Actually find the perfect hash */
+  *salt = 0;
+  bad_initkey = 0;
+  bad_perfect = 0;
+  for (trysalt=1; ; ++trysalt)
+  {
+    ub4 rslinit;
+    /* Try to find distinct (A,B) for all keys */
+    
+    rslinit = initkey(keys, nkeys, *tabb, *alen, *blen, *smax, trysalt,
+		      form, final);
+
+    if (rslinit == 2)
+    {      /* initkey actually found a perfect hash, not just distinct (a,b) */
+      *salt = 1;
+      *blen = 0;
+      break;
+    }
+    else if (rslinit == 0)
+    {
+      /* didn't find distinct (a,b) */
+      if (++bad_initkey >= RETRY_INITKEY)
+      {
+	/* Try to put more bits in (A,B) to make distinct (A,B) more likely */
+	if (*alen < maxalen)
+	{
+	  *alen *= 2;
+	} 
+	else if (*blen < *smax)
+	{
+	  *blen *= 2;
+	  free(tabq);
+	  free(*tabb);
+	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
+	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
+	}
+	else
+	{
+	  duplicates(*tabb, *blen, keys, form);      /* check for duplicates */
+	  printf("fatal error: Cannot perfect hash: cannot find distinct (A,B)\n");
+	  exit(SUCCESS);
+	}
+	bad_initkey = 0;
+	bad_perfect = 0;
+      }
+      continue;                             /* two keys have same (a,b) pair */
+    }
+
+    printf("found distinct (A,B) on attempt %ld\n", trysalt);
+
+    /* Given distinct (A,B) for all keys, build a perfect hash */
+    if (!perfect(*tabb, tabh, tabq, *blen, *smax, scramble, nkeys, form))
+    {
+      if ((form->hashtype != INT_HT && ++bad_perfect >= RETRY_PERFECT) || 
+	  (form->hashtype == INT_HT && ++bad_perfect >= RETRY_HEX))
+      {
+	if (*blen < *smax)
+	{
+	  *blen *= 2;
+	  free(*tabb);
+	  free(tabq);
+	  *tabb  = (bstuff *)malloc((size_t)(sizeof(bstuff)*(*blen)));
+	  tabq  = (qstuff *)malloc((size_t)(sizeof(qstuff)*(*blen+1)));
+	  --trysalt;               /* we know this salt got distinct (A,B) */
+	}
+	else
+	{
+	  printf("fatal error: Cannot perfect hash: cannot build tab[]\n");
+	  exit(SUCCESS);
+	}
+	bad_perfect = 0;
+      }
+      continue;
+    }
+    
+    *salt = trysalt;
+    break;
+  }
+
+  printf("built perfect hash table of size %ld\n", *blen);
+
+  /* free working memory */
+  free((void *)tabh);
+  free((void *)tabq);
+}
+
+/*
+------------------------------------------------------------------------------
+Input/output type routines
+------------------------------------------------------------------------------
+*/
+
+/* get the list of keys */
+static void getkeys(keys, nkeys, textroot, keyroot, form)
+key      **keys;                                         /* list of all keys */
+ub4       *nkeys;                                          /* number of keys */
+reroot    *textroot;                          /* get space to store key text */
+reroot    *keyroot;                                    /* get space for keys */
+hashform  *form;                                          /* user directives */
+{
+  key  *mykey;
+  char *mytext;
+  mytext = (char *)renew(textroot);
+  *keys = 0;
+  *nkeys = 0;
+  while (fgets(mytext, MAXKEYLEN, stdin))
+  {
+    mykey = (key *)renew(keyroot);
+    if (form->mode == AB_HM)
+    {
+      sscanf(mytext, "%lx %lx ", &mykey->a_k, &mykey->b_k);
+    }
+    else if (form->mode == ABDEC_HM)
+    {
+      sscanf(mytext, "%ld %ld ", &mykey->a_k, &mykey->b_k);
+    }
+    else if (form->mode == HEX_HM)
+    {
+      sscanf(mytext, "%lx ", &mykey->hash_k);
+    }
+    else if (form->mode == DECIMAL_HM)
+    {
+      sscanf(mytext, "%ld ", &mykey->hash_k);
+    }
+    else
+    {
+      mykey->name_k = (ub1 *)mytext;
+      mytext = (char *)renew(textroot);
+      mykey->len_k  = (ub4)(strlen((char *)mykey->name_k)-1);
+    }
+    mykey->next_k = *keys;
+    *keys = mykey;
+    ++*nkeys;
+  }
+  redel(textroot, mytext);
+}
+
+/* make the .h file */
+static void make_h(blen, smax, nkeys, salt)
+ub4  blen;
+ub4  smax;
+ub4  nkeys;
+ub4  salt;
+{
+  FILE *f;
+  f = fopen("phash.h", "w");
+  fprintf(f, "/* Perfect hash definitions */\n");
+  fprintf(f, "#ifndef STANDARD\n");
+  fprintf(f, "#include \"standard.h\"\n");
+  fprintf(f, "#endif /* STANDARD */\n");
+  fprintf(f, "#ifndef PHASH\n");
+  fprintf(f, "#define PHASH\n");
+  fprintf(f, "\n");
+  if (blen > 0)
+  {
+    if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+      fprintf(f, "extern ub1 tab[];\n");
+    else
+    {
+      fprintf(f, "extern ub2 tab[];\n");
+      if (blen >= USE_SCRAMBLE)
+      {
+	if (smax <= UB2MAXVAL+1)
+	  fprintf(f, "extern ub2 scramble[];\n");
+	else
+	  fprintf(f, "extern ub4 scramble[];\n");
+      }
+    }
+    fprintf(f, "#define PHASHLEN 0x%lx  /* length of hash mapping table */\n",
+	    blen);
+  }
+  fprintf(f, "#define PHASHNKEYS %ld  /* How many keys were hashed */\n",
+          nkeys);
+  fprintf(f, "#define PHASHRANGE %ld  /* Range any input might map to */\n",
+          smax);
+  fprintf(f, "#define PHASHSALT 0x%.8lx /* internal, initialize normal hash */\n",
+          salt*0x9e3779b9);
+  fprintf(f, "\n");
+  fprintf(f, "ub4 phash();\n");
+  fprintf(f, "\n");
+  fprintf(f, "#endif  /* PHASH */\n");
+  fprintf(f, "\n");
+  fclose(f);
+}
+
+/* make the .c file */
+static void make_c(tab, smax, blen, scramble, final, form)
+bstuff   *tab;                                         /* table indexed by b */
+ub4       smax;                                       /* range of scramble[] */
+ub4       blen;                                /* b in 0..blen-1, power of 2 */
+ub4      *scramble;                                    /* used in final hash */
+gencode  *final;                                  /* code for the final hash */
+hashform *form;                                           /* user directives */
+{
+  ub4   i;
+  FILE *f;
+  f = fopen("phash.c", "w");
+  fprintf(f, "/* table for the mapping for the perfect hash */\n");
+  fprintf(f, "#ifndef STANDARD\n");
+  fprintf(f, "#include \"standard.h\"\n");
+  fprintf(f, "#endif /* STANDARD */\n");
+  fprintf(f, "#ifndef PHASH\n");
+  fprintf(f, "#include \"phash.h\"\n");
+  fprintf(f, "#endif /* PHASH */\n");
+  fprintf(f, "#ifndef LOOKUPA\n");
+  fprintf(f, "#include \"lookupa.h\"\n");
+  fprintf(f, "#endif /* LOOKUPA */\n");
+  fprintf(f, "\n");
+  if (blen >= USE_SCRAMBLE)
+  {
+    fprintf(f, "/* A way to make the 1-byte values in tab bigger */\n");
+    if (smax > UB2MAXVAL+1)
+    {
+      fprintf(f, "ub4 scramble[] = {\n");
+      for (i=0; i<=UB1MAXVAL; i+=4)
+        fprintf(f, "0x%.8lx, 0x%.8lx, 0x%.8lx, 0x%.8lx,\n",
+                scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3]);
+    }
+    else
+    {
+      fprintf(f, "ub2 scramble[] = {\n");
+      for (i=0; i<=UB1MAXVAL; i+=8)
+        fprintf(f, 
+"0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx, 0x%.4lx,\n",
+                scramble[i+0], scramble[i+1], scramble[i+2], scramble[i+3],
+                scramble[i+4], scramble[i+5], scramble[i+6], scramble[i+7]);
+    }
+    fprintf(f, "};\n");
+    fprintf(f, "\n");
+  }
+  if (blen > 0)
+  {
+    fprintf(f, "/* small adjustments to _a_ to make values distinct */\n");
+
+    if (smax <= UB1MAXVAL+1 || blen >= USE_SCRAMBLE)
+      fprintf(f, "ub1 tab[] = {\n");
+    else
+      fprintf(f, "ub2 tab[] = {\n");
+
+    if (blen < 16)
+    {
+      for (i=0; i<blen; ++i) fprintf(f, "%3d,", scramble[tab[i].val_b]);
+    }
+    else if (blen <= 1024)
+    {
+      for (i=0; i<blen; i+=16)
+	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b], 
+		scramble[tab[i+8].val_b], scramble[tab[i+9].val_b], 
+		scramble[tab[i+10].val_b], scramble[tab[i+11].val_b], 
+		scramble[tab[i+12].val_b], scramble[tab[i+13].val_b], 
+		scramble[tab[i+14].val_b], scramble[tab[i+15].val_b]); 
+    }
+    else if (blen < USE_SCRAMBLE)
+    {
+      for (i=0; i<blen; i+=8)
+	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+		scramble[tab[i+0].val_b], scramble[tab[i+1].val_b], 
+		scramble[tab[i+2].val_b], scramble[tab[i+3].val_b], 
+		scramble[tab[i+4].val_b], scramble[tab[i+5].val_b], 
+		scramble[tab[i+6].val_b], scramble[tab[i+7].val_b]); 
+    }
+    else 
+    {
+      for (i=0; i<blen; i+=16)
+	fprintf(f, "%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,%ld,\n",
+		tab[i+0].val_b, tab[i+1].val_b, 
+		tab[i+2].val_b, tab[i+3].val_b, 
+		tab[i+4].val_b, tab[i+5].val_b, 
+		tab[i+6].val_b, tab[i+7].val_b, 
+		tab[i+8].val_b, tab[i+9].val_b, 
+		tab[i+10].val_b, tab[i+11].val_b, 
+		tab[i+12].val_b, tab[i+13].val_b, 
+		tab[i+14].val_b, tab[i+15].val_b); 
+    }
+    fprintf(f, "};\n");
+    fprintf(f, "\n");
+  }
+  fprintf(f, "/* The hash function */\n");
+  switch(form->mode)
+  {
+  case NORMAL_HM:
+    fprintf(f, "ub4 phash(key, len)\n");
+    fprintf(f, "char *key;\n");
+    fprintf(f, "int   len;\n");
+    break;
+  case INLINE_HM:
+  case HEX_HM:
+  case DECIMAL_HM:
+    fprintf(f, "ub4 phash(val)\n");
+    fprintf(f, "ub4 val;\n");
+    break;
+  case AB_HM:
+  case ABDEC_HM:
+    fprintf(f, "ub4 phash(a,b)\n");
+    fprintf(f, "ub4 a;\n");
+    fprintf(f, "ub4 b;\n");
+    break;
+  }
+  fprintf(f, "{\n");
+  for (i=0; i<final->used; ++i)
+    fprintf(f, final->line[i]);
+  fprintf(f, "  return rsl;\n");
+  fprintf(f, "}\n");
+  fprintf(f, "\n");
+  fclose(f);
+}
+
+/*
+------------------------------------------------------------------------------
+Read in the keys, find the hash, and write the .c and .h files
+------------------------------------------------------------------------------
+*/
+static void driver(form)
+hashform *form;                                           /* user directives */
+{
+  ub4       nkeys;                                         /* number of keys */
+  key      *keys;                                    /* head of list of keys */
+  bstuff   *tab;                                       /* table indexed by b */
+  ub4       smax;            /* scramble[] values in 0..smax-1, a power of 2 */
+  ub4       alen;                            /* a in 0..alen-1, a power of 2 */
+  ub4       blen;                            /* b in 0..blen-1, a power of 2 */
+  ub4       salt;                       /* a parameter to the hash function */
+  reroot   *textroot;                      /* MAXKEYLEN-character text lines */
+  reroot   *keyroot;                                       /* source of keys */
+  gencode   final;                                    /* code for final hash */
+  ub4       i;
+  ub4       scramble[SCRAMBLE_LEN];           /* used in final hash function */
+  char      buf[10][80];                        /* buffer for generated code */
+  char     *buf2[10];                             /* also for generated code */
+
+  /* set up memory sources */
+  textroot = remkroot((size_t)MAXKEYLEN);
+  keyroot  = remkroot(sizeof(key));
+
+  /* set up code for final hash */
+  final.line = buf2;
+  final.used = 0;
+  final.len  = 10;
+  for (i=0; i<10; ++i) final.line[i] = buf[i];
+
+  /* read in the list of keywords */
+  getkeys(&keys, &nkeys, textroot, keyroot, form);
+  printf("Read in %ld keys\n",nkeys);
+
+  /* find the hash */
+  findhash(&tab, &alen, &blen, &salt, &final, 
+	   scramble, &smax, keys, nkeys, form);
+
+  /* generate the phash.h file */
+  make_h(blen, smax, nkeys, salt);
+  printf("Wrote phash.h\n");
+
+  /* generate the phash.c file */
+  make_c(tab, smax, blen, scramble, &final, form);
+  printf("Wrote phash.c\n");
+
+  /* clean up memory sources */
+  refree(textroot);
+  refree(keyroot);
+  free((void *)tab);
+  printf("Cleaned up\n");
+}
+
+
+/* Describe how to use this utility */
+static void usage_error()
+{
+  printf("Usage: perfect [-{NnIiHhDdAaBb}{MmPp}{FfSs}] < key.txt \n");
+  printf("The input is a list of keys, one key per line.\n");
+  printf("Only one of NnIiHhDdAa and one of MmPp may be specified.\n");
+  printf("  N,n: normal mode, key is any string string (default).\n");
+  printf("  I,i: initial hash for ASCII char strings.\n");
+  printf("The initial hash must be\n");
+  printf("  hash = PHASHSALT;\n");
+  printf("  for (i=0; i<keylength; ++i) {\n");
+  printf("    hash = (hash ^ key[i]) + ((hash<<26)+(hash>>6));\n");
+  printf("  }\n");
+  printf("Note that this can be inlined in any user loop that walks\n");
+  printf("through the key anyways, eliminating the loop overhead.\n");
+  printf("  H,h: Keys are 4-byte integers in hex in this format:\n");
+  printf("ffffffff\n");
+  printf("This is good for optimizing switch statement compilation.\n");
+  printf("  D,d: Same as H,h, except in decimal not hexidecimal\n");
+  printf("  A,a: An (A,B) pair is supplied in hex in this format:\n");
+  printf("aaa bbb\n");
+  printf("  B,b: Same as A,a, except in decimal not hexidecimal\n");
+  printf("This mode does nothing but find the values of tab[].\n");
+  printf("*A* must be less than the total number of keys.\n");
+  printf("  M,m: Minimal perfect hash.  Hash will be in 0..nkeys-1 (default)\n");
+  printf("  P,p: Perfect hash.  Hash will be in 0..n-1, where n >= nkeys\n");
+  printf("and n is a power of 2.  Will probably use a smaller tab[].");
+  printf("  F,f: Fast mode.  Generate the perfect hash fast.\n");
+  printf("  S,s: Slow mode.  Spend time finding a good perfect hash.\n");
+
+  exit(SUCCESS);
+}
+
+
+/* Interpret arguments and call the driver */
+/* See usage_error for the expected arguments */
+int main(argc, argv)
+int    argc;
+char **argv;
+{
+  int      mode_given = FALSE;
+  int      minimal_given = FALSE;
+  int      speed_given = FALSE;
+  hashform form;
+  char    *c;
+
+  /* default behavior */
+  form.mode = NORMAL_HM;
+  form.hashtype = STRING_HT;
+  form.perfect = MINIMAL_HP;
+  form.speed = SLOW_HS;
+
+  /* let the user override the default behavior */
+  switch (argc)
+  {
+  case 1:
+    break;
+  case 2:
+    if (argv[1][0] != '-')
+    {
+      usage_error();
+      break;
+    }
+    for (c = &argv[1][1]; *c != '\0'; ++c) switch(*c)
+    {
+    case 'n': case 'N':
+    case 'i': case 'I':
+    case 'h': case 'H':
+    case 'd': case 'D':
+    case 'a': case 'A':
+    case 'b': case 'B':
+      if (mode_given == TRUE) 
+	usage_error();
+      switch(*c)
+      {
+      case 'n': case 'N':
+	form.mode = NORMAL_HM;  form.hashtype = STRING_HT; break;
+      case 'i': case 'I':
+	form.mode = INLINE_HM;  form.hashtype = STRING_HT; break;
+      case 'h': case 'H':
+	form.mode = HEX_HM;     form.hashtype = INT_HT; break;
+      case 'd': case 'D':
+	form.mode = DECIMAL_HM; form.hashtype = INT_HT; break;
+      case 'a': case 'A':
+	form.mode = AB_HM;      form.hashtype = AB_HT; break;
+      case 'b': case 'B':
+	form.mode = ABDEC_HM;   form.hashtype = AB_HT; break;
+      }
+      mode_given = TRUE;
+      break;
+    case 'm': case 'M':
+    case 'p': case 'P':
+      if (minimal_given == TRUE)
+	usage_error();
+      switch(*c)
+      {
+      case 'p': case 'P':
+	form.perfect = NORMAL_HP; break;
+      case 'm': case 'M':
+	form.perfect = MINIMAL_HP; break;
+      }
+      minimal_given = TRUE;
+      break;
+    case 'f': case 'F':
+    case 's': case 'S':
+      if (speed_given == TRUE)
+	usage_error();
+      switch(*c)
+      {
+      case 'f': case 'F':
+	form.speed = FAST_HS; break;
+      case 's': case 'S':
+	form.speed = SLOW_HS; break;
+      }
+      speed_given = TRUE;
+      break;
+    default:
+      usage_error();
+    }
+    break;
+  default:
+    usage_error();
+  }
+
+  /* Generate the [minimal] perfect hash */
+  driver(&form);
+
+  return SUCCESS;
+}
diff --git a/tools/codegen/core/perfect/perfect.h b/tools/codegen/core/perfect/perfect.h
new file mode 100644
index 0000000000..fed5296bb7
--- /dev/null
+++ b/tools/codegen/core/perfect/perfect.h
@@ -0,0 +1,132 @@
+/*
+------------------------------------------------------------------------------
+perfect.h: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, September 1996
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfect.h
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef PERFECT
+#define PERFECT
+
+#define MAXKEYLEN 30                              /* maximum length of a key */
+#define USE_SCRAMBLE  4096           /* use scramble if blen >= USE_SCRAMBLE */
+#define SCRAMBLE_LEN ((ub4)1<<16)                    /* length of *scramble* */
+#define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
+#define RETRY_PERFECT 1     /* number of times to try to make a perfect hash */
+#define RETRY_HEX     200               /* RETRY_PERFECT when hex keys given */
+
+/* the generated code for the final hash, assumes initial hash is done */
+struct gencode
+{
+  char **line;                       /* array of text lines, 80 bytes apiece */
+  /*
+   * The code placed here must declare "ub4 rsl" 
+   * and assign it the value of the perfect hash using the function inputs.
+   * Later code will be tacked on which returns rsl or manipulates it according
+   * to the user directives.
+   *
+   * This code is at the top of the routine; it may and must declare any
+   * local variables it needs.
+   *
+   * Each way of filling in **line should be given a comment that is a unique
+   * tag.  A testcase named with that tag should also be found which tests
+   * the generated code.
+   */
+  ub4    len;                    /* number of lines available for final hash */
+  ub4    used;                         /* number of lines used by final hash */
+
+  ub4    lowbit;                          /* for HEX, lowest interesting bit */
+  ub4    highbit;                        /* for HEX, highest interesting bit */
+  ub4    diffbits;                         /* bits which differ for some key */
+  ub4    i,j,k,l,m,n,o;                      /* state machine used in hexn() */
+};
+typedef  struct gencode  gencode;
+
+/* user directives: perfect hash? minimal perfect hash? input is an int? */
+struct hashform
+{
+  enum {
+    NORMAL_HM,                                            /* key is a string */
+    INLINE_HM,    /* user will do initial hash, we must choose salt for them */
+    HEX_HM,              /* key to be hashed is a hexidecimal 4-byte integer */
+    DECIMAL_HM,              /* key to be hashed is a decimal 4-byte integer */
+    AB_HM,      /* key to be hashed is "A B", where A and B are (A,B) in hex */
+    ABDEC_HM                                   /* like AB_HM, but in decimal */
+  } mode;
+  enum {
+    STRING_HT,                                            /* key is a string */
+    INT_HT,                                             /* key is an integer */
+    AB_HT             /* dunno what key is, but input is distinct (A,B) pair */
+  } hashtype;
+  enum {
+    NORMAL_HP,                                   /* just find a perfect hash */
+    MINIMAL_HP                                /* find a minimal perfect hash */
+  } perfect;
+  enum {
+    FAST_HS,                                                    /* fast mode */
+    SLOW_HS                                                     /* slow mode */
+  } speed;
+};
+typedef  struct hashform  hashform;
+
+/* representation of a key */
+struct key
+{
+  ub1        *name_k;                                      /* the actual key */
+  ub4         len_k;                         /* the length of the actual key */
+  ub4         hash_k;                 /* the initial hash value for this key */
+  struct key *next_k;                                            /* next key */
+/* beyond this point is mapping-dependent */
+  ub4         a_k;                            /* a, of the key maps to (a,b) */
+  ub4         b_k;                            /* b, of the key maps to (a,b) */
+  struct key *nextb_k;                               /* next key with this b */
+};
+typedef  struct key  key;
+
+/* things indexed by b of original (a,b) pair */
+struct bstuff
+{
+  ub2  val_b;                                        /* hash=a^tabb[b].val_b */
+  key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
+  ub4  listlen_b;                                        /* length of list_b */
+  ub4  water_b;           /* high watermark of who has visited this map node */
+};
+typedef  struct bstuff  bstuff;
+
+/* things indexed by final hash value */
+struct hstuff
+{
+  key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
+};
+typedef  struct hstuff hstuff;
+
+/* things indexed by queue position */
+struct qstuff
+{
+  bstuff *b_q;                        /* b that currently occupies this hash */
+  ub4     parent_q;     /* queue position of parent that could use this hash */
+  ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
+  ub2     oldval_q;                              /* original value of tab[b] */
+};
+typedef  struct qstuff  qstuff;
+
+/* return ceiling(log based 2 of x) */
+ub4 mylog2(/*_ ub4 x _*/);
+
+/* Given the keys, scramble[], and hash mode, find the perfect hash */
+void findhash(/*_ bstuff **tabb, ub4 *alen, ub4 *blen, ub4 *salt,
+		gencode *final, ub4 *scramble, ub4 smax, key *keys, ub4 nkeys, 
+		hashform *form _*/);
+
+/* private, but in a different file because it's excessively verbose */
+int inithex(/*_ key *keys, ub4 *alen, ub4 *blen, ub4 smax, ub4 nkeys, 
+	      ub4 salt, gencode *final, gencode *form _*/);
+
+#endif /* PERFECT */
diff --git a/tools/codegen/core/perfect/perfhex.c b/tools/codegen/core/perfect/perfhex.c
new file mode 100644
index 0000000000..9c28dc734b
--- /dev/null
+++ b/tools/codegen/core/perfect/perfhex.c
@@ -0,0 +1,1308 @@
+/*
+------------------------------------------------------------------------------
+perfhex.c: code to generate code for a hash for perfect hashing.
+(c) Bob Jenkins, December 31 1999
+You may use this code in any way you wish, and it is free.  No warranty.
+I hereby place this in the public domain.
+Source is http://burtleburtle.net/bob/c/perfhex.c
+
+The task of this file is to do the minimal amount of mixing needed to
+find distinct (a,b) for each key when each key is a distinct ub4.  That
+means trying all possible ways to mix starting with the fastest.  The
+output is those (a,b) pairs and code in the *final* structure for producing
+those pairs.
+------------------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+#ifndef LOOKUPA
+#include "lookupa.h"
+#endif
+#ifndef RECYCLE
+#include "recycle.h"
+#endif
+#ifndef PERFECT
+#include "perfect.h"
+#endif
+
+/* 
+ * Find a perfect hash when there is only one key.  Zero instructions.
+ * Hint: the one key always hashes to 0
+ */
+static void hexone(keys, final)
+key     *keys;
+gencode *final;
+{
+  /* 1 key: the hash is always 0 */
+  keys->a_k = 0;
+  keys->b_k = 0;
+  final->used = 1;
+  sprintf(final->line[0], "  ub4 rsl = 0;\n");                    /* h1a: 37 */
+}
+
+
+
+/*
+ * Find a perfect hash when there are only two keys.  Max 2 instructions.
+ * There exists a bit that is different for the two keys.  Test it.
+ * Note that a perfect hash of 2 keys is automatically minimal.
+ */
+static void hextwo(keys, final)
+key     *keys;
+gencode *final;
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 i;
+  
+  if (a == b)
+  {
+    printf("fatal error: duplicate keys\n");
+    exit(SUCCESS);
+  }
+
+  final->used = 1;
+  
+  /* one instruction */
+  if ((a&1) != (b&1))
+  {
+    sprintf(final->line[0], "  ub4 rsl = (val & 1);\n");         /* h2a: 3,4 */
+    return;
+  }
+
+  /* two instructions */
+  for (i=0; i<UB4BITS; ++i)
+  {
+    if ((a&((ub4)1<<i)) != (b&((ub4)1<<i))) break;
+  }
+  /* h2b: 4,6 */
+  sprintf(final->line[0], "  ub4 rsl = ((val << %ld) & 1);\n", i);
+}
+
+
+
+/*
+ * find the value to xor to a and b and c to make none of them 3 
+ * assert, (a,b,c) are three distinct values in (0,1,2,3).
+ */
+static ub4 find_adder(a,b,c)
+ub4 a;
+ub4 b;
+ub4 c;
+{
+  return (a^b^c^3);
+}
+
+
+
+/*
+ * Find a perfect hash when there are only three keys.  Max 6 instructions.
+ *
+ * keys a,b,c.  
+ * There exists bit i such that a[i] != b[i].
+ * Either c[i] != a[i] or c[i] != b[i], assume c[i] != a[i].
+ * There exists bit j such that b[j] != c[j].  Note i != j.
+ * Final hash should be no longer than val[i]^val[j].
+ *
+ * A minimal perfect hash needs to xor one of 0,1,2,3 afterwards to cause
+ * the hole to land on 3.  find_adder() finds that constant
+ */
+static void hexthree(keys, final, form)
+key      *keys;
+gencode  *final;
+hashform *form;
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 c = keys->next_k->next_k->hash_k;
+  ub4 i,j,x,y,z;
+  
+  final->used = 1;
+
+  if (a == b || a == c || b == c)
+  {
+    printf("fatal error: duplicate keys\n");
+    exit(SUCCESS);
+  }
+  
+  /* one instruction */
+  x = a&3; 
+  y = b&3;
+  z = c&3;
+  if (x != y && x != z && y != z)
+  {
+    if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+    {
+      /* h3a: 0,1,2 */
+      sprintf(final->line[0], "  ub4 rsl = (val & 3);\n");
+    }
+    else
+    {
+      /* h3b: 0,3,2 */
+      sprintf(final->line[0], "  ub4 rsl = ((val & 3) ^ %d);\n",
+	      find_adder(x,y,z));
+    }
+    return;
+  }
+
+  x = a>>(UB4BITS-2); 
+  y = b>>(UB4BITS-2); 
+  z = c>>(UB4BITS-2); 
+  if (x != y && x != z && y != z)
+  {
+    if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3)) 
+    {
+      /* h3c: 3fffffff, 7fffffff, bfffffff */
+      sprintf(final->line[0], "  ub4 rsl = (val >> %ld);\n", (ub4)(UB4BITS-2));
+    }
+    else
+    {
+      /* h3d: 7fffffff, bfffffff, ffffffff */
+      sprintf(final->line[0], "  ub4 rsl = ((val >> %ld) ^ %ld);\n",
+	      (ub4)(UB4BITS-2), find_adder(x,y,z));
+    }
+    return;
+  }
+
+  /* two instructions */
+  for (i=0; i<final->highbit; ++i)
+  {
+    x = (a>>i)&3;
+    y = (b>>i)&3;
+    z = (c>>i)&3;
+    if (x != y && x != z && y != z)
+    {
+      if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+      {
+	/* h3e: ffff3fff, ffff7fff, ffffbfff */
+	sprintf(final->line[0], "  ub4 rsl = ((val >> %ld) & 3);\n", i);
+      }
+      else
+      {
+	/* h3f: ffff7fff, ffffbfff, ffffffff */
+	sprintf(final->line[0], "  ub4 rsl = (((val >> %ld) & 3) ^ %ld);\n", i,
+		find_adder(x,y,z));
+      }
+      return;
+    }
+  }
+
+  /* three instructions */
+  for (i=0; i<=final->highbit; ++i)
+  {
+    x = (a+(a>>i))&3;
+    y = (b+(b>>i))&3;
+    z = (c+(c>>i))&3;
+    if (x != y && x != z && y != z)
+    {
+      if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+      {
+	/* h3g: 0x000, 0x001, 0x100 */
+	sprintf(final->line[0], "  ub4 rsl = ((val+(val>>%ld))&3);\n", i);
+      }
+      else
+      {
+	/* h3h: 0x001, 0x100, 0x101 */
+	sprintf(final->line[0], "  ub4 rsl = (((val+(val>>%ld))&3)^%ld);\n", i,
+		find_adder(x,y,z));
+      }
+      return;
+    }
+  }
+
+  /*
+   * Four instructions: I can prove this will always work.
+   *
+   * If the three values are distinct, there are two bits which 
+   * distinguish them.  Choose the two such bits that are closest together.
+   * If those bits are values 001 and 100 for those three values,
+   * then there either aren't any bits in between
+   * or the in-between bits aren't valued 001, 110, 100, 011, 010, or 101,
+   * because that would violate the closest-together assumption.
+   * So any in-between bits must be 000 or 111, and of 000 and 111 with
+   * the distinguishing bits won't cause them to stop being distinguishing.
+   */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i; j<=final->highbit; ++j)
+    {
+      x = ((a>>i)^(a>>j))&3;
+      y = ((b>>i)^(b>>j))&3;
+      z = ((c>>i)^(c>>j))&3;
+      if (x != y && x != z && y != z)
+      {
+	if (form->perfect == NORMAL_HP || (x != 3 && y != 3 && z != 3))
+	{
+	  /* h3i: 0x00, 0x04, 0x10 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val>>%ld) ^ (val>>%ld)) & 3);\n", i, j);
+	}
+	else
+	{
+	  /* h3j: 0x04, 0x10, 0x14 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((((val>>%ld) ^ (val>>%ld)) & 3) ^ %ld);\n",
+		  i, j, find_adder(x,y,z));
+	}
+	return;
+      }
+    }
+  }
+
+  printf("fatal error: hexthree\n");
+  exit(SUCCESS);
+}
+
+
+
+/*
+ * Check that a,b,c,d are some permutation of 0,1,2,3
+ * Assume that a,b,c,d are all have values less than 32.
+ */
+static int testfour(a,b,c,d)
+ub4 a;
+ub4 b;
+ub4 c;
+ub4 d;
+{
+  ub4 mask = (1<<a)^(1<<b)^(1<<c)^(1<<d);
+  return (mask == 0xf);
+}
+
+
+
+/*
+ * Find a perfect hash when there are only four keys.  Max 10 instructions.
+ * Note that a perfect hash for 4 keys will automatically be minimal.
+ */
+static void hexfour(keys, final)
+key     *keys;
+gencode *final;
+{
+  ub4 a = keys->hash_k;
+  ub4 b = keys->next_k->hash_k;
+  ub4 c = keys->next_k->next_k->hash_k;
+  ub4 d = keys->next_k->next_k->next_k->hash_k;
+  ub4 w,x,y,z;
+  ub4 i,j,k;
+
+  if (a==b || a==c || a==d || b==c || b==d || c==d)
+  {
+    printf("fatal error: Duplicate keys\n");
+    exit(SUCCESS);
+  }
+
+  final->used = 1;
+
+  /* one instruction */
+  if ((final->diffbits & 3) == 3)
+  {
+    w = a&3;
+    x = b&3;
+    y = c&3;
+    z = d&3;
+    if (testfour(w,x,y,z))
+    {
+      sprintf(final->line[0], "  ub4 rsl = (val & 3);\n");   /* h4a: 0,1,2,3 */
+      return;
+    }
+  }
+
+  if (((final->diffbits >> (UB4BITS-2)) & 3) == 3)
+  {
+    w = a>>(UB4BITS-2);
+    x = b>>(UB4BITS-2);
+    y = c>>(UB4BITS-2);
+    z = d>>(UB4BITS-2);
+    if (testfour(w,x,y,z))
+    {                         /* h4b: 0fffffff, 4fffffff, 8fffffff, cfffffff */
+      sprintf(final->line[0], "  ub4 rsl = (val >> %ld);\n", (ub4)(UB4BITS-2));
+      return;
+    }
+  }
+
+  /* two instructions */
+  for (i=final->lowbit; i<final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 3) == 3)
+    {
+      w = (a>>i)&3;
+      x = (b>>i)&3;
+      y = (c>>i)&3;
+      z = (d>>i)&3;
+      if (testfour(w,x,y,z))
+      {                                                      /* h4c: 0,2,4,6 */
+	sprintf(final->line[0], "  ub4 rsl = ((val >> %ld) & 3);\n", i);
+	return;
+      }
+    }
+  }
+
+  /* three instructions (linear with the number of diffbits) */
+  if ((final->diffbits & 3) != 0)
+  {
+    for (i=final->lowbit; i<=final->highbit; ++i)
+    {
+      if (((final->diffbits >> i) & 3) != 0)
+      {
+	w = (a+(a>>i))&3;
+	x = (b+(b>>i))&3;
+	y = (c+(c>>i))&3;
+	z = (d+(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4d: 0,1,2,4 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val + (val >> %ld)) & 3);\n", i);
+	  return;
+	}
+
+	w = (a-(a>>i))&3;
+	x = (b-(b>>i))&3;
+	y = (c-(c>>i))&3;
+	z = (d-(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4e: 0,1,3,5 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val - (val >> %ld)) & 3);\n", i);
+	  return;
+	}
+
+	/* h4f: ((val>>k)-val)&3: redundant with h4e */
+
+	w = (a^(a>>i))&3;
+	x = (b^(b>>i))&3;
+	y = (c^(c>>i))&3;
+	z = (d^(d>>i))&3;
+	if (testfour(w,x,y,z))
+	{                                                    /* h4g: 3,4,5,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val ^ (val >> %ld)) & 3);\n", i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /* four instructions (linear with the number of diffbits) */
+  if ((final->diffbits & 3) != 0)
+  {
+    for (i=final->lowbit; i<=final->highbit; ++i)
+    {
+      if ((((final->diffbits >> i) & 1) != 0) &&
+	  ((final->diffbits & 2) != 0))
+      {
+	w = (a&3)^((a>>i)&1);
+	x = (b&3)^((b>>i)&1);
+	y = (c&3)^((c>>i)&1);
+	z = (d&3)^((d>>i)&1);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4h: 1,2,6,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 3) ^ ((val >> %ld) & 1));\n", i);
+	  return;
+	}
+
+	w = (a&2)^((a>>i)&1);
+	x = (b&2)^((b>>i)&1);
+	y = (c&2)^((c>>i)&1);
+	z = (d&2)^((d>>i)&1);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4i: 1,2,8,a */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 2) ^ ((val >> %ld) & 1));\n", i);
+	  return;
+	}
+      }
+
+      if ((((final->diffbits >> i) & 2) != 0) &&
+	  ((final->diffbits & 1) != 0))
+      {
+	w = (a&3)^((a>>i)&2);
+	x = (b&3)^((b>>i)&2);
+	y = (c&3)^((c>>i)&2);
+	z = (d&3)^((d>>i)&2);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4j: 0,1,3,4 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 3) ^ ((val >> %ld) & 2));\n", i);
+	  return;
+	}
+
+	w = (a&1)^((a>>i)&2);
+	x = (b&1)^((b>>i)&2);
+	y = (c&1)^((c>>i)&2);
+	z = (d&1)^((d>>i)&2);
+	if (testfour(w,x,y,z))
+	{                                                    /* h4k: 1,4,7,8 */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val & 1) ^ ((val >> %ld) & 2));\n", i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /* four instructions (quadratic in the number of diffbits) */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 1) == 1)
+    {
+      for (j=final->lowbit; j<=final->highbit; ++j)
+      {
+	if (((final->diffbits >> j) & 3) != 0)
+	{
+	  /* test + */
+	  w = ((a>>i)+(a>>j))&3;
+	  x = ((b>>i)+(a>>j))&3;
+	  y = ((c>>i)+(a>>j))&3;
+	  z = ((d>>i)+(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4l: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) + (val >> %ld)) & 3);\n", 
+		    i, j);
+	    return;
+	  }
+
+	  /* test - */
+	  w = ((a>>i)-(a>>j))&3;
+	  x = ((b>>i)-(a>>j))&3;
+	  y = ((c>>i)-(a>>j))&3;
+	  z = ((d>>i)-(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4m: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) - (val >> %ld)) & 3);\n",
+		    i, j);
+	    return;
+	  }
+
+	  /* test ^ */
+	  w = ((a>>i)^(a>>j))&3;
+	  x = ((b>>i)^(a>>j))&3;
+	  y = ((c>>i)^(a>>j))&3;
+	  z = ((d>>i)^(a>>j))&3;
+	  if (testfour(w,x,y,z))
+	  {                                                /* h4n: testcase? */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) ^ (val >> %ld)) & 3);\n",
+		    i, j);
+	    return;
+	  }
+	}
+      }
+    }
+  }
+
+  /* five instructions (quadratic in the number of diffbits) */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    if (((final->diffbits >> i) & 1) != 0)
+    {
+      for (j=final->lowbit; j<=final->highbit; ++j)
+      {
+	if (((final->diffbits >> j) & 3) != 0)
+	{
+	  w = ((a>>j)&3)^((a>>i)&1);
+	  x = ((b>>j)&3)^((b>>i)&1);
+	  y = ((c>>j)&3)^((c>>i)&1);
+	  z = ((d>>j)&3)^((d>>i)&1);
+	  if (testfour(w,x,y,z))
+	  {                                                  /* h4o: 0,4,8,a */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 1));\n", 
+		    j, i);
+	    return;
+	  }
+	  
+	  w = ((a>>j)&2)^((a>>i)&1);
+	  x = ((b>>j)&2)^((b>>i)&1);
+	  y = ((c>>j)&2)^((c>>i)&1);
+	  z = ((d>>j)&2)^((d>>i)&1);
+	  if (testfour(w,x,y,z))
+	  {                                   /* h4p: 0x04, 0x08, 0x10, 0x14 */
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) & 2) ^ ((val >> %ld) & 1));\n", 
+		    j, i);
+	    return;
+	  }
+	}
+	
+	if (i==0)
+	{
+	  w = ((a>>j)^(a<<1))&3;
+	  x = ((b>>j)^(b<<1))&3;
+	  y = ((c>>j)^(c<<1))&3;
+	  z = ((d>>j)^(d<<1))&3;
+	}
+	else
+	{
+	  w = ((a>>j)&3)^((a>>(i-1))&2);
+	  x = ((b>>j)&3)^((b>>(i-1))&2);
+	  y = ((c>>j)&3)^((c>>(i-1))&2);
+	  z = ((d>>j)&3)^((d>>(i-1))&2);
+	}
+	if (testfour(w,x,y,z))
+	{
+	  if (i==0)                                          /* h4q: 0,4,5,8 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) ^ (val << 1)) & 3);\n",
+		    j);
+	  }
+	  else if (i==1)                         /* h4r: 0x01,0x09,0x0b,0x10 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) & 3) ^ (val & 2));\n",
+		    j);
+	  }
+	  else                                               /* h4s: 0,2,6,8 */
+	  {
+	    sprintf(final->line[0], 
+		    "  ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 2));\n",
+		    j, (i-1));
+	  }
+	  return;
+	}
+	  
+	w = ((a>>j)&1)^((a>>i)&2);
+	x = ((b>>j)&1)^((b>>i)&2);
+	y = ((c>>j)&1)^((c>>i)&2);
+	z = ((d>>j)&1)^((d>>i)&2);
+	if (testfour(w,x,y,z))                   /* h4t: 0x20,0x14,0x10,0x06 */
+	{                   
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %ld) & 1) ^ ((val >> %ld) & 2));\n",
+		  j, i);
+	  return;
+	}
+      }
+    }
+  }
+
+  /*
+   * OK, bring out the big guns.
+   * There exist three bits i,j,k which distinguish a,b,c,d.
+   * i^(j<<1)^(k*q) is guaranteed to work for some q in {0,1,2,3},
+   *   proven by exhaustive search of all (8 choose 4) cases.
+   * Find three such bits and try the 4 cases.
+   * Linear with the number of diffbits.
+   * Some cases below may duplicate some cases above.  I did it that way
+   *   so that what is below is guaranteed to work, no matter what was
+   *   attempted above.
+   * The generated hash is at most 10 instructions.
+   */
+  for (i=final->lowbit; i<UB4BITS; ++i)
+  {
+    y = (c>>i)&1;
+    z = (d>>i)&1;
+    if (y != z)
+      break;
+  }
+
+  for (j=final->lowbit; j<UB4BITS; ++j)
+  {
+    x = ((b>>i)&1)^(((b>>j)&1)<<1);
+    y = ((c>>i)&1)^(((c>>j)&1)<<1);
+    z = ((d>>i)&1)^(((d>>j)&1)<<1);
+    if (x != y && x != z && y != z)
+      break;
+  }
+
+  for (k=final->lowbit; k<UB4BITS; ++k)
+  {
+    w = ((a>>i)&1)^(((a>>j)&1)<<1)^(((a>>k)&1)<<2);
+    x = ((b>>i)&1)^(((b>>j)&1)<<1)^(((b>>k)&1)<<2);
+    y = ((c>>i)&1)^(((c>>j)&1)<<1)^(((c>>k)&1)<<2);
+    z = ((d>>i)&1)^(((d>>j)&1)<<1)^(((d>>k)&1)<<2);
+    if (w != x && w != y && w != z && x != y && x != z && y != z)
+      break;
+  }
+
+  /* Assert: bits i,j,k were found which distinguish a,b,c,d */
+  if (i==UB4BITS || j==UB4BITS || k==UB4BITS)
+  {
+    printf("Fatal error: hexfour(), i %ld j %ld k %ld\n", i,j,k);
+    exit(SUCCESS);
+  }
+
+  /* now try the four cases */
+  {
+    ub4 m,n,o,p;
+    
+    /* if any bit has two 1s and two 0s, make that bit o */
+    if (((a>>i)&1)+((b>>i)&1)+((c>>i)&1)+((d>>i)&1) != 2)
+      { m=j; n=k; o=i; }
+    else if (((a>>j)&1)+((b>>j)&1)+((c>>j)&1)+((d>>j)&1) != 2)
+      { m=i; n=k; o=j; }
+    else
+      { m=i; n=j; o=k; }
+    if (m > n) {p=m; m=n; n=p; }                          /* guarantee m < n */
+
+    /* printf("m %ld n %ld o %ld  %ld %ld %ld %ld\n", m, n, o, w,x,y,z); */
+
+    /* seven instructions, multiply bit o by 1 */
+    w = (((a>>m)^(a>>o))&1)^((a>>(n-1))&2);
+    x = (((b>>m)^(b>>o))&1)^((b>>(n-1))&2);
+    y = (((c>>m)^(c>>o))&1)^((c>>(n-1))&2);
+    z = (((d>>m)^(d>>o))&1)^((d>>(n-1))&2);
+    if (testfour(w,x,y,z))
+    {
+      if (m>o) {p=m; m=o; o=p;}                 /* make sure m < o and m < n */
+
+      if (m==0)                                                   /* 0,2,8,9 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val^(val>>%ld))&1)^((val>>%ld)&2));\n", o, n-1);
+      }
+      else                                            /* 0x00,0x04,0x10,0x12 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = ((((val>>%ld) ^ (val>>%ld)) & 1) ^ ((val>>%ld) & 2));\n",
+		m, o, n-1);
+      }
+      return;
+    }
+    
+    /* six to seven instructions, multiply bit o by 2 */
+    w = ((a>>m)&1)^((((a>>n)^(a>>o))&1)<<1);
+    x = ((b>>m)&1)^((((b>>n)^(b>>o))&1)<<1);
+    y = ((c>>m)&1)^((((c>>n)^(c>>o))&1)<<1);
+    z = ((d>>m)&1)^((((d>>n)^(d>>o))&1)<<1);
+    if (testfour(w,x,y,z))
+    {
+      if (m==o-1) {p=n; n=o; o=p;}                /* make m==n-1 if possible */
+
+      if (m==0)                                                   /* 0,1,5,8 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = ((val & 1) ^ (((val>>%ld) ^ (val>>%ld)) & 2));\n",
+		n-1, o-1);
+      }
+      else if (o==0)                                  /* 0x00,0x04,0x05,0x10 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val>>%ld) & 2) ^ (((val>>%ld) ^ val) & 1));\n",
+		m-1, n);
+      }
+      else                                            /* 0x00,0x02,0x0a,0x10 */
+      {
+	sprintf(final->line[0], 
+		"  ub4 rsl = (((val>>%ld) & 1) ^ (((val>>%ld) ^ (val>>%ld)) & 2));\n",
+		m, n-1, o-1);
+      }
+      return;
+    }
+    
+    /* multiplying by 3 is a pain: seven or eight instructions */
+    w = (((a>>m)&1)^((a>>(n-1))&2))^((a>>o)&1)^(((a>>o)&1)<<1);
+    x = (((b>>m)&1)^((b>>(n-1))&2))^((b>>o)&1)^(((b>>o)&1)<<1);
+    y = (((c>>m)&1)^((c>>(n-1))&2))^((c>>o)&1)^(((c>>o)&1)<<1);
+    z = (((d>>m)&1)^((d>>(n-1))&2))^((d>>o)&1)^(((d>>o)&1)<<1);
+    if (testfour(w,x,y,z))
+    {
+      final->used = 2;
+      sprintf(final->line[0], "  ub4 b = (val >> %ld) & 1;\n", o);
+      if (m==o-1 && m==0)                             /* 0x02,0x10,0x11,0x18 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 3) ^ ((val >> %ld) & 2) ^ b);\n", n-1);
+      }
+      else if (m==o-1)                                            /* 0,4,6,c */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %ld) & 3) ^ ((val >> %ld) & 2) ^ b);\n",
+		m, n-1);
+      }
+      else if (m==n-1 && m==0)                                /* 02,0a,0b,18 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 3) ^ b ^ (b << 1));\n");
+      }
+      else if (m==n-1)                                            /* 0,2,4,8 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %ld) & 3) ^ b ^ (b << 1));\n", m);
+      }
+      else if (o==n-1 && m==0)                          /* h4am: not reached */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = ((val & 1) ^ ((val >> %ld) & 3) ^ (b <<1 ));\n",
+		o);
+      }
+      else if (o==n-1)                                /* 0x00,0x02,0x08,0x10 */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val >> %ld) & 1) ^ ((val >> %ld) & 3) ^ (b << 1));\n",
+		m, o);
+      }
+      else if ((m != o-1) && (m != n-1) && (o != m-1) && (o != n-1))
+      {
+	final->used = 3;
+	sprintf(final->line[0], "  ub4 newval = val & 0x%lx;\n", 
+		(((ub4)1<<m)^((ub4)1<<n)^((ub4)1<<o)));
+	if (o==0)                                     /* 0x00,0x01,0x04,0x10 */
+	{
+	  sprintf(final->line[1], "  ub4 b = -newval;\n");
+	}
+	else                                          /* 0x00,0x04,0x09,0x10 */
+	{
+	  sprintf(final->line[1], "  ub4 b = -(newval >> %ld);\n", o);
+	}
+	if (m==0)                                     /* 0x00,0x04,0x09,0x10 */
+	{
+	  sprintf(final->line[2], 
+		  "  ub4 rsl = ((newval ^ (newval>>%ld) ^ b) & 3);\n", n-1);
+	}
+	else                                          /* 0x00,0x03,0x04,0x10 */
+	{
+	  sprintf(final->line[2], 
+		  "  ub4 rsl = (((newval>>%ld) ^ (newval>>%ld) ^ b) & 3);\n",
+		  m, n-1);
+	}
+      }
+      else if (o == m-1)
+      {
+	if (o==0)                                     /* 0x02,0x03,0x0a,0x10 */
+	{
+	  sprintf(final->line[0], "  ub4 b = (val<<1) & 2;\n");
+	}
+	else if (o==1)                                /* 0x00,0x02,0x04,0x10 */
+	{
+	  sprintf(final->line[0], "  ub4 b = val & 2;\n");
+	}
+	else                                          /* 0x00,0x04,0x08,0x20 */
+	{
+	  sprintf(final->line[0], "  ub4 b = (val>>%ld) & 2;\n", o-1);
+	}
+
+	if (o==0)                                     /* 0x02,0x03,0x0a,0x10 */
+	{
+	  sprintf(final->line[1],
+		  "  ub4 rsl = ((val & 3) ^ ((val>>%ld) & 1) ^ b);\n",
+		  n);
+	}
+	else                                          /* 0x00,0x02,0x04,0x10 */
+	{
+	  sprintf(final->line[1],
+		  "  ub4 rsl = (((val>>%ld) & 3) ^ ((val>>%ld) & 1) ^ b);\n",
+		  o, n);
+	}
+      }
+      else                         /* h4ax: 10 instructions, but not reached */
+      {
+	sprintf(final->line[1], 
+		"  ub4 rsl = (((val>>%ld) & 1) ^ ((val>>%ld) & 2) ^ b ^ (b<<1));\n",
+		m, n-1);
+      }
+
+      return;
+    }
+
+    /* five instructions, multiply bit o by 0, covered before the big guns */
+    w = ((a>>m)&1)^(a>>(n-1)&2);
+    x = ((b>>m)&1)^(b>>(n-1)&2);
+    y = ((c>>m)&1)^(c>>(n-1)&2);
+    z = ((d>>m)&1)^(d>>(n-1)&2);
+    if (testfour(w,x,y,z))
+    {                                                    /* h4v, not reached */
+      sprintf(final->line[0], 
+	      "  ub4 rsl = (((val>>%ld) & 1) ^ ((val>>%ld) & 2));\n", m, n-1);
+      return;
+    }
+  }
+
+  printf("fatal error: bug in hexfour!\n");
+  exit(SUCCESS);
+  return;
+}
+
+
+/* test if a_k is distinct and in range for all keys */
+static int testeight(keys, badmask)
+key      *keys;                                         /* keys being hashed */
+ub1       badmask;                       /* used for minimal perfect hashing */
+{
+  ub1  mask = badmask;
+  key *mykey;
+
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+  {
+    if (bit(mask, 1<<mykey->a_k)) return FALSE;
+    bis(mask, 1<<mykey->a_k);
+  }
+  return TRUE;
+}
+
+
+
+/*
+ * Try to find a perfect hash when there are five to eight keys.
+ *
+ * We can't deterministically find a perfect hash, but there's a reasonable
+ * chance we'll get lucky.  Give it a shot.  Return TRUE if we succeed.
+ */
+static int hexeight(keys, nkeys, final, form)
+key      *keys;
+ub4       nkeys;
+gencode  *final;
+hashform *form;
+{
+  key *mykey;                                       /* walk through the keys */
+  ub4  i,j,k;
+  ub1  badmask;
+
+  printf("hexeight\n");
+
+  /* what hash values should never be used? */
+  badmask = 0;
+  if (form->perfect == MINIMAL_HP)
+  {
+    for (i=nkeys; i<8; ++i)
+      bis(badmask,(1<<i));
+  }
+
+  /* one instruction */
+  for (mykey=keys; mykey; mykey=mykey->next_k)
+    mykey->a_k = mykey->hash_k & 7;
+  if (testeight(keys, badmask))
+  {                                                                   /* h8a */
+    final->used = 1;
+    sprintf(final->line[0], "  ub4 rsl = (val & 7);\n");
+    return TRUE;
+  }
+
+  /* two instructions */
+  for (i=final->lowbit; i<=final->highbit-2; ++i)
+  {
+    for (mykey=keys; mykey; mykey=mykey->next_k)
+      mykey->a_k = (mykey->hash_k >> i) & 7;
+    if (testeight(keys, badmask))
+    {                                                                 /* h8b */
+      final->used = 1;
+      sprintf(final->line[0], "  ub4 rsl = ((val >> %ld) & 7);\n", i);
+      return TRUE;
+    }
+  }
+
+  /* four instructions */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i+1; j<=final->highbit; ++j)
+    {
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)+(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8c */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val + (val >> %ld)) & 7);\n", j);
+	else                                                          /* h8d */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %ld) + (val >> %ld)) & 7);\n", i, j);
+	return TRUE;
+      }
+
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)^(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8e */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val ^ (val >> %ld)) & 7);\n", j);
+	else                                                          /* h8f */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %ld) ^ (val >> %ld)) & 7);\n", i, j);
+
+	return TRUE;
+      }
+
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+	mykey->a_k = ((mykey->hash_k >> i)-(mykey->hash_k >> j)) & 7;
+      if (testeight(keys, badmask))
+      {
+	final->used = 1;
+	if (i == 0)                                                   /* h8g */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = ((val - (val >> %ld)) & 7);\n", j);
+	else                                                          /* h8h */
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %ld) - (val >> %ld)) & 7);\n", i, j);
+
+	return TRUE;
+      }
+    }
+  }
+
+
+  /* six instructions */
+  for (i=final->lowbit; i<=final->highbit; ++i)
+  {
+    for (j=i+1; j<=final->highbit; ++j)
+    {
+      for (k=j+1; k<=final->highbit; ++k)
+      {
+	for (mykey=keys; mykey; mykey=mykey->next_k)
+	  mykey->a_k  = ((mykey->hash_k >> i) +
+			 (mykey->hash_k >> j) +
+			 (mykey->hash_k >> k)) & 7;
+	if (testeight(keys, badmask))
+	{                                                             /* h8i */
+	  final->used = 1;
+	  sprintf(final->line[0], 
+		  "  ub4 rsl = (((val >> %ld) + (val >> %ld) + (val >> %ld)) & 7);\n", 
+		  i, j, k);
+	  return TRUE;
+	}
+      }
+    }
+  }
+
+
+  return FALSE;
+}
+
+
+
+/*
+ * Guns aren't enough.  Bring out the Bomb.  Use tab[].
+ * This finds the initial (a,b) when we need to use tab[].
+ *
+ * We need to produce a different (a,b) every time this is called.  Try all
+ * reasonable cases, fastest first.
+ *
+ * The initial mix (which this determines) can be filled into final starting
+ * at line[1].  val is set and a,b are declared.  The final hash (at line[7])
+ * is a^tab[b] or a^scramble[tab[b]].
+ *
+ * The code will probably look like this, minus some stuff:
+ *     val += CONSTANT;
+ *     val ^= (val<<16);
+ *     val += (val>>8);
+ *     val ^= (val<<4);
+ *     b = (val >> l) & 7;
+ *     a = (val + (val<<m)) >> 29;
+ *     return a^scramble[tab[b]];
+ * Note that *a* and tab[b] will be computed in parallel by most modern chips.
+ *
+ * final->i is the current state of the state machine.
+ * final->j and final->k are counters in the loops the states simulate.
+ */
+static void hexn(keys, salt, alen, blen, final)
+key     *keys;
+ub4      salt;
+ub4      alen;
+ub4      blen;
+gencode *final;
+{
+  key *mykey;
+  ub4  highbit = final->highbit;
+  ub4  lowbit = final->lowbit;
+  ub4  alog = mylog2(alen);
+  ub4  blog = mylog2(blen);
+
+  for (;;)
+  {
+    switch(final->i)
+    {
+    case 1:
+      /* a = val>>30; b=val&3 */
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->a_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-alog);
+	mykey->b_k = (mykey->hash_k >> lowbit) & (blen-1);
+      }
+      if (lowbit == 0)                                                /* hna */
+	sprintf(final->line[5], "  b = (val & 0x%lx);\n", 
+		blen-1);
+      else                                                            /* hnb */
+	sprintf(final->line[5], "  b = ((val >> %ld) & 0x%lx);\n", 
+		lowbit, blen-1);
+      if (highbit+1 == UB4BITS)                                       /* hnc */
+	sprintf(final->line[6], "  a = (val >> %ld);\n",
+		UB4BITS-alog);
+      else                                                            /* hnd */
+	sprintf(final->line[6], "  a = ((val << %ld ) >> %ld);\n",
+		UB4BITS-(highbit+1), UB4BITS-alog);
+  
+      ++final->i;
+      return;
+
+    case 2:
+      /* a = val&3; b=val>>30 */
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->a_k = (mykey->hash_k >> lowbit) & (alen-1);
+	mykey->b_k = (mykey->hash_k << (UB4BITS-(highbit+1)))>>(UB4BITS-blog);
+      }
+      if (highbit+1 == UB4BITS)                                       /* hne */
+	sprintf(final->line[5], "  b = (val >> %ld);\n",
+		UB4BITS-blog);
+      else                                                            /* hnf */
+	sprintf(final->line[5], "  b = ((val << %ld ) >> %ld);\n",
+		UB4BITS-(highbit+1), UB4BITS-blog);
+      if (lowbit == 0)                                                /* hng */
+	sprintf(final->line[6], "  a = (val & 0x%lx);\n", 
+		alen-1);
+      else                                                            /* hnh */
+	sprintf(final->line[6], "  a = ((val >> %ld) & 0x%lx);\n", 
+		lowbit, alen-1);
+  
+      ++final->i;
+      return;
+
+    case 3:
+      /*
+       * cases 3,4,5:
+       * for (k=lowbit; k<=highbit; ++k)
+       *   for (j=lowbit; j<=highbit; ++j)
+       *     b = (val>>j)&3;
+       *     a = (val<<k)>>30;
+       */
+      final->k = lowbit;
+      final->j = lowbit;
+      ++final->i;
+      break;
+
+    case 4:
+      if (!(final->j < highbit))
+      {
+	++final->i;
+	break;
+      }
+      for (mykey=keys; mykey; mykey=mykey->next_k)
+      {
+	mykey->b_k = (mykey->hash_k >> (final->j)) & (blen-1);
+	mykey->a_k = (mykey->hash_k << (UB4BITS-final->k-1)) >> (UB4BITS-alog);
+      }
+      if (final->j == 0)                                              /* hni */
+	sprintf(final->line[5], "  b = val & 0x%lx;\n",
+		blen-1);
+      else if (blog+final->j == UB4BITS)                             /* hnja */
+	sprintf(final->line[5], "  b = val >> %ld;\n",
+		final->j);
+      else
+	sprintf(final->line[5], "  b = (val >> %ld) & 0x%lx;\n",      /* hnj */
+		final->j, blen-1);
+      if (UB4BITS-final->k-1 == 0)                                    /* hnk */
+	sprintf(final->line[6], "  a = (val >> %ld);\n",
+		UB4BITS-alog);
+      else                                                            /* hnl */
+	sprintf(final->line[6], "  a = ((val << %ld) >> %ld);\n",
+		UB4BITS-final->k-1, UB4BITS-alog);
+      while (++final->j < highbit)
+      {
+	if (((final->diffbits>>(final->j)) & (blen-1)) > 2)
+	  break;
+      }
+      return;
+
+    case 5:
+      while (++final->k < highbit)
+      {
+	if ((((final->diffbits<<(UB4BITS-final->k-1))>>alog) & (alen-1)) > 0)
+	  break;
+      }
+      if (!(final->k < highbit))
+      {
+	++final->i;
+	break;
+      }
+      final->j = lowbit;
+      final->i = 4;
+      break;
+
+
+    case 6:
+      /*
+       * cases 6,7,8:
+       * for (k=0; k<UB4BITS-alog; ++k)
+       *   for (j=0; j<UB4BITS-blog; ++j)
+       *     val = val+f(salt);
+       *     val ^= (val >> 16);
+       *     val += (val << 8);
+       *     val ^= (val >> 4);
+       *     b = (val >> j) & 3;
+       *     a = (val + (val << k)) >> 30;
+       */
+      final->k = 0;
+      final->j = 0;
+      ++final->i;
+      break;
+
+    case 7:
+      /* Just do something that will surely work */
+      {
+	ub4 addk = 0x9e3779b9*salt;
+
+	if (!(final->j <= UB4BITS-blog))
+	{
+	  ++final->i;
+	  break;
+	}
+	for (mykey=keys; mykey; mykey=mykey->next_k)
+	{
+	  ub4 val = mykey->hash_k + addk;
+	  if (final->highbit+1 - final->lowbit > 16)
+	    val ^= (val >> 16);
+	  if (final->highbit+1 - final->lowbit > 8)
+	    val += (val << 8);
+	  val ^= (val >> 4);
+	  mykey->b_k = (val >> final->j) & (blen-1);
+	  if (final->k == 0)
+	    mykey->a_k = val >> (UB4BITS-alog);
+	  else
+	    mykey->a_k = (val + (val << final->k)) >> (UB4BITS-alog);
+	}
+	sprintf(final->line[1], "  val += 0x%lx;\n", addk);
+	if (final->highbit+1 - final->lowbit > 16)                    /* hnm */
+	  sprintf(final->line[2], "  val ^= (val >> 16);\n");
+	if (final->highbit+1 - final->lowbit > 8)                     /* hnn */
+	  sprintf(final->line[3], "  val += (val << 8);\n");
+	sprintf(final->line[4], "  val ^= (val >> 4);\n");
+	if (final->j == 0)              /* hno: don't know how to reach this */
+	  sprintf(final->line[5], "  b = val & 0x%lx;\n", blen-1);
+	else                                                          /* hnp */
+	  sprintf(final->line[5], "  b = (val >> %ld) & 0x%lx;\n",
+		  final->j, blen-1);
+	if (final->k == 0)                                            /* hnq */
+	  sprintf(final->line[6], "  a = val >> %ld;\n", UB4BITS-alog);
+	else                                                          /* hnr */
+	  sprintf(final->line[6], "  a = (val + (val << %ld)) >> %ld;\n",
+		  final->k, UB4BITS-alog);
+
+	++final->j;
+	return;
+      }
+
+    case 8:
+      ++final->k;
+      if (!(final->k <= UB4BITS-alog))
+      {
+	++final->i;
+	break;
+      }
+      final->j = 0;
+      final->i = 7;
+      break;
+
+    case 9:
+      final->i = 6;
+      break;
+    }
+  }
+}
+
+
+
+/* find the highest and lowest bit where any key differs */
+static void setlow(keys, final)
+key     *keys;
+gencode *final;
+{
+  ub4  lowbit;
+  ub4  highbit;
+  ub4  i;
+  key *mykey;
+  ub4  firstkey;
+
+  /* mark the interesting bits in final->mask */
+  final->diffbits = (ub4)0;
+  if (keys) firstkey = keys->hash_k;
+  for (mykey=keys;  mykey!=(key *)0;  mykey=mykey->next_k)
+    final->diffbits |= (firstkey ^ mykey->hash_k);
+
+  /* find the lowest interesting bit */
+  for (i=0; i<UB4BITS; ++i)
+    if (final->diffbits & (((ub4)1)<<i))
+      break;
+  final->lowbit = i;
+
+  /* find the highest interesting bit */
+  for (i=UB4BITS; --i; )
+    if (final->diffbits & (((ub4)1)<<i))
+      break;
+  final->highbit = i;
+}
+
+/* 
+ * Initialize (a,b) when keys are integers.
+ *
+ * Normally there's an initial hash which produces a number.  That hash takes
+ * an initializer.  Changing the initializer causes the initial hash to 
+ * produce a different (uniformly distributed) number without any extra work.
+ *
+ * Well, here we start with a number.  There's no initial hash.  Any mixing
+ * costs extra work.  So we go through a lot of special cases to minimize the
+ * mixing needed to get distinct (a,b).  For small sets of keys, it's often
+ * fastest to skip the final hash and produce the perfect hash from the number
+ * directly.
+ *
+ * The target user for this is switch statement optimization.  The common case
+ * is 3 to 16 keys, and instruction counts matter.  The competition is a 
+ * binary tree of branches.
+ *
+ * Return TRUE if we found a perfect hash and no more work is needed.
+ * Return FALSE if we just did an initial hash and more work is needed.
+ */
+int inithex(keys, nkeys, alen, blen, smax, salt, final, form)
+key      *keys;                                          /* list of all keys */
+ub4       nkeys;                                   /* number of keys to hash */
+ub4       alen;                    /* (a,b) has a in 0..alen-1, a power of 2 */
+ub4       blen;                    /* (a,b) has b in 0..blen-1, a power of 2 */
+ub4       smax;                   /* maximum range of computable hash values */
+ub4       salt;                     /* used to initialize the hash function */
+gencode  *final;                          /* output, code for the final hash */
+hashform *form;                                           /* user directives */
+{
+  setlow(keys, final);
+
+  switch (nkeys)
+  {
+  case 1:
+    hexone(keys, final);
+    return TRUE;
+  case 2:
+    hextwo(keys, final);
+    return TRUE;
+  case 3:
+    hexthree(keys, final, form);
+    return TRUE;
+  case 4:
+    hexfour(keys, final);
+    return TRUE;
+  case 5:  case 6:  case 7:  case 8:
+    if (salt == 1 &&                                  /* first time through */
+	hexeight(keys, nkeys, final, form)) /* get lucky, don't need tab[] ? */
+      return TRUE;
+    /* fall through */
+  default:
+    if (salt == 1)
+    {
+      final->used = 8;
+      final->i = 1;
+      final->j = final->k = final->l = final->m = final->n = final->o = 0;
+      sprintf(final->line[0], "  ub4 a, b, rsl;\n");
+      sprintf(final->line[1], "\n");
+      sprintf(final->line[2], "\n");
+      sprintf(final->line[3], "\n");
+      sprintf(final->line[4], "\n");
+      sprintf(final->line[5], "\n");
+      sprintf(final->line[6], "\n");
+      if (blen < USE_SCRAMBLE)
+      {                                                               /* hns */
+	sprintf(final->line[7], "  rsl = (a^tab[b]);\n");
+      }
+      else
+      {                                                               /* hnt */
+	sprintf(final->line[7], "  rsl = (a^scramble[tab[b]]);\n");
+      }
+    }
+    hexn(keys, salt, alen, blen, final);
+    return FALSE;
+  }
+}
diff --git a/tools/codegen/core/perfect/recycle.c b/tools/codegen/core/perfect/recycle.c
new file mode 100644
index 0000000000..3f857cba7d
--- /dev/null
+++ b/tools/codegen/core/perfect/recycle.c
@@ -0,0 +1,87 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.c
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+# include "standard.h"
+#endif
+#ifndef RECYCLE
+# include "recycle.h"
+#endif
+
+reroot *remkroot(size)
+size_t  size;
+{
+   reroot *r = (reroot *)remalloc(sizeof(reroot), "recycle.c, root");
+   r->list = (recycle *)0;
+   r->trash = (recycle *)0;
+   r->size = align(size);
+   r->logsize = RESTART;
+   r->numleft = 0;
+   return r;
+}
+
+void  refree(r)
+struct reroot *r;
+{
+   recycle *temp;
+   if (temp = r->list) while (r->list)
+   {
+      temp = r->list->next;
+      free((char *)r->list);
+      r->list = temp;
+   }
+   free((char *)r);
+   return;
+}
+
+/* to be called from the macro renew only */
+char  *renewx(r)
+struct reroot *r;
+{
+   recycle *temp;
+   if (r->trash)
+   {  /* pull a node off the trash heap */
+      temp = r->trash;
+      r->trash = temp->next;
+      (void)memset((void *)temp, 0, r->size);
+   }
+   else
+   {  /* allocate a new block of nodes */
+      r->numleft = r->size*((ub4)1<<r->logsize);
+      if (r->numleft < REMAX) ++r->logsize;
+      temp = (recycle *)remalloc(sizeof(recycle) + r->numleft, 
+				 "recycle.c, data");
+      temp->next = r->list;
+      r->list = temp;
+      r->numleft-=r->size;
+      temp = (recycle *)((char *)(r->list+1)+r->numleft);
+   }
+   return (char *)temp;
+}
+
+char   *remalloc(len, purpose)
+size_t  len;
+char   *purpose;
+{
+  char *x = (char *)malloc(len);
+  if (!x)
+  {
+    fprintf(stderr, "malloc of %d failed for %s\n", 
+	    len, purpose);
+    exit(SUCCESS);
+  }
+  return x;
+}
+
diff --git a/tools/codegen/core/perfect/recycle.h b/tools/codegen/core/perfect/recycle.h
new file mode 100644
index 0000000000..7472495e84
--- /dev/null
+++ b/tools/codegen/core/perfect/recycle.h
@@ -0,0 +1,65 @@
+/*
+--------------------------------------------------------------------
+By Bob Jenkins, September 1996.  recycle.h
+You may use this code in any way you wish, and it is free.  No warranty.
+
+This manages memory for commonly-allocated structures.
+It allocates RESTART to REMAX items at a time.
+Timings have shown that, if malloc is used for every new structure,
+  malloc will consume about 90% of the time in a program.  This
+  module cuts down the number of mallocs by an order of magnitude.
+This also decreases memory fragmentation, and freeing all structures
+  only requires freeing the root.
+--------------------------------------------------------------------
+*/
+
+#ifndef STANDARD
+#include "standard.h"
+#endif
+
+#ifndef RECYCLE
+#define RECYCLE
+
+#define RESTART    0
+#define REMAX      32000
+
+struct recycle
+{
+   struct recycle *next;
+};
+typedef  struct recycle  recycle;
+
+struct reroot
+{
+   struct recycle *list;     /* list of malloced blocks */
+   struct recycle *trash;    /* list of deleted items */
+   size_t          size;     /* size of an item */
+   size_t          logsize;  /* log_2 of number of items in a block */
+   word            numleft;  /* number of bytes left in this block */
+};
+typedef  struct reroot  reroot;
+
+/* make a new recycling root */
+reroot  *remkroot(/*_ size_t mysize _*/);
+
+/* free a recycling root and all the items it has made */
+void     refree(/*_ struct reroot *r _*/);
+
+/* get a new (cleared) item from the root */
+#define renew(r) ((r)->numleft ? \
+   (((char *)((r)->list+1))+((r)->numleft-=(r)->size)) : renewx(r))
+
+char    *renewx(/*_ struct reroot *r _*/);
+
+/* delete an item; let the root recycle it */
+/* void     redel(/o_ struct reroot *r, struct recycle *item _o/); */
+#define redel(root,item) { \
+   ((recycle *)item)->next=(root)->trash; \
+   (root)->trash=(recycle *)(item); \
+}
+
+/* malloc, but complain to stderr and exit program if no joy */
+/* use plain free() to free memory allocated by remalloc() */
+char    *remalloc(/*_ size_t len, char *purpose _*/);
+
+#endif  /* RECYCLE */
diff --git a/tools/codegen/core/perfect/run.sh b/tools/codegen/core/perfect/run.sh
new file mode 100755
index 0000000000..8dc5911cbd
--- /dev/null
+++ b/tools/codegen/core/perfect/run.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -e
+cd $(dirname $0)
+gcc -o perfect perfect.c recycle.c lookupa.c perfhex.c 2> compile.txt
+fn=$1
+shift
+./perfect $* < $fn &> hash.txt
diff --git a/tools/codegen/core/perfect/standard.h b/tools/codegen/core/perfect/standard.h
new file mode 100644
index 0000000000..202a5d658c
--- /dev/null
+++ b/tools/codegen/core/perfect/standard.h
@@ -0,0 +1,57 @@
+/*
+------------------------------------------------------------------------------
+Standard definitions and types, Bob Jenkins
+------------------------------------------------------------------------------
+*/
+#ifndef STANDARD
+# define STANDARD
+# ifndef STDIO
+#  include <stdio.h>
+#  define STDIO
+# endif
+# ifndef STDDEF
+#  include <stddef.h>
+#  define STDDEF
+# endif
+typedef  unsigned long long  ub8;
+#define UB8MAXVAL 0xffffffffffffffffLL
+#define UB8BITS 64
+typedef    signed long long  sb8;
+#define SB8MAXVAL 0x7fffffffffffffffLL
+typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
+#define UB4MAXVAL 0xffffffff
+typedef    signed long  int  sb4;
+#define UB4BITS 32
+#define SB4MAXVAL 0x7fffffff
+typedef  unsigned short int  ub2;
+#define UB2MAXVAL 0xffff
+#define UB2BITS 16
+typedef    signed short int  sb2;
+#define SB2MAXVAL 0x7fff
+typedef  unsigned       char ub1;
+#define UB1MAXVAL 0xff
+#define UB1BITS 8
+typedef    signed       char sb1;   /* signed 1-byte quantities */
+#define SB1MAXVAL 0x7f
+typedef                 int  word;  /* fastest type available */
+
+#define bis(target,mask)  ((target) |=  (mask))
+#define bic(target,mask)  ((target) &= ~(mask))
+#define bit(target,mask)  ((target) &   (mask))
+#ifndef min
+# define min(a,b) (((a)<(b)) ? (a) : (b))
+#endif /* min */
+#ifndef max
+# define max(a,b) (((a)<(b)) ? (b) : (a))
+#endif /* max */
+#ifndef align
+# define align(a) (((ub4)a+(sizeof(void *)-1))&(~(sizeof(void *)-1)))
+#endif /* align */
+#ifndef abs
+# define abs(a)   (((a)>0) ? (a) : -(a))
+#endif
+#define TRUE  1
+#define FALSE 0
+#define SUCCESS 0  /* 1 on VAX */
+
+#endif /* STANDARD */
-- 
GitLab