/*
 * Released under GNU General Public Licence as of version ppsearch15.c (Jan 2003)
 * http://www.gnu.org/copyleft/gpl.html
 */
//
// ppsearch.c - primitive polynomial search and primitivity test program
//
//    This program tests a binary polynomial for primitivity by taking a
//    primitive element and raising it to the power 2^n, modulo the
//    polynomial. If the original primitive element is not returned, 
//    the polynomial fails the primitivity test. An alternate test is to
//    raise the candidate primitive to 2^n-1. Any primitive element raised
//    to 2^n-1 returns One. This test uses the 2^n form of the test because
//    it is slightly faster. If 2^n-1 is not prime, additional tests are required
//    because the sequence of polynomials could have repeated 2^n-1 / (prime)
//    times. For each prime factor, the primitive element is raised to the
//    power 2^n-1 / (prime). If one is returned for any of these additional tests,
//    the polynomial is not primitive.
//
//    The polynomial exponentiation algorithm is standard square and multiply.
//    A small optimization is made by choosing the primitive element as a 
//    "right shift" of the primitive polynomial. This makes the multiply part
//    of multiply and square relatively insignificant.
//    
//
//    Note: no attempt at portability has been made. Your processor must
//    support the Pentium III instruction set. The code has been tested
//    only Microsoft Windows 2000, but other windows versions will probably
//    work. Two compilers have been tested. Microsoft Visual C6 with the
//    current service pack and the processor pack. As of late 2000, the
//    processor pack is required because without it there is no SIMD support.
//    Intel's compiler is the other choice and gives better performance
//    when optimized for Pentium III. The Intel compiler version is Version 5.0
//    Beta 2 000712 (Package ID: C5.0B2-B5.1), but other versions may work.
//
//            scott.duplichan#hp.com
//
/* ppsearch15.c
 * Modified 2003, Jean-Luc Cooke
 * jlcooke -a- certainkey -point- com
 *
 * 1) Now includes LaTeX output mode.
 * 2) Now compiles in LINUX with this architecture tune-able Makeflie:
CCC	=gcc
ARCH	=i686 # specify architecture here
VER	=16
OPTS	=-O3 -s -Wall -Werror -march=$(ARCH) -DLINUX
all:	ppsearch$(VER)
clean:
	rm -f ppsearch$(VER)
ppsearch$(VER): ppsearch$(VER).c
	$(CCC) $(OPTS) $@.c -o $@
 */
/* ppsearch 16.c
 * From Scott:
 * Also, I found a version of the program with a small improvement. It is
 * not on the web, but I will attach it. The improvement is that for small
 * polynomials, it can run successfully without the factorization files. It
 * just uses trial division for the small factorizations.
 */


#include <stdio.h>
#include <string.h>
#include <time.h>
#include <math.h>
#if defined WIN64
 #include <ia64intrin.h>
 #include <windows.h>
#elif defined WIN32
 #include <xmmintrin.h>
 #include <windows.h>
#elif defined LINUX // GCC22
#else
 #error No architecture specified WIN32 WIN64 or LINUX
#endif

//#define PENTIUMII

// set MAXBITS to the size of the largest extended integer. This needs to be
// at least one bigger than the highest degree primitive polynomial search or
// test. Shift and xor operations become slower as MAXBITS is increased.

#ifndef MAXBITS
 #define MAXBITS 512 // (this is now set by a make file)
#endif

#if defined (WIN64) || defined (PENTIUMII)
#  define BIG_INT_BITS 64
#  if MAXBITS & 0x3F
#     error MAXBITS must be a multiple of 64
#  endif
#else
#  define BIG_INT_BITS 128
#  if MAXBITS & 0x7F
#     error MAXBITS must be a multiple of 128
#  endif
#endif

#define BIGINT_COUNT ((MAXBITS + BIG_INT_BITS - 1) / BIG_INT_BITS)  // padded out to a multiple of BIG_INT_BITS bits
#define UINT64_COUNT  (BIGINT_COUNT * BIG_INT_BITS / 64)
#define UINT32_COUNT  (BIGINT_COUNT * BIG_INT_BITS / 32)
#define UINT8_COUNT   (BIGINT_COUNT * BIG_INT_BITS / 8)
#define XORINT_COUNT  (BIGINT_COUNT * BIG_INT_BITS / XORINT_BITS)
#define UINTN_COUNT   (BIGINT_COUNT * BIG_INT_BITS / UINTN_BITS)

//----------------------------------------------------------------------------
// compiler and platform dependencies
//
#define INTN int            // longest native integer
#define UINTN unsigned INTN // unsigned version
#define UINTN_BITS 32       // avoid sizeof() here because of C preprocessor rule
#if defined (WIN64)
 #define _mm_slli_si64(arg,index) (arg << index)
 #define _mm_srli_si64(arg,index) (arg >> index)
 #define _mm_or_si64(arg1,arg2) (arg1 | arg2)
 #define _mm_xor_ps(arg1,arg2) (arg1 ^ arg2)
 typedef UINT64 SHIFT64;
 typedef UINT64 XORINT;
 #define XORINT_BITS 64
#elif defined PENTIUMII // WIN32, PENTIUMII
 typedef __m64 SHIFT64;
 typedef __m64 XORINT;
 #define XORINT_BITS 64
 #define _mm_xor_ps(arg1,arg2) _mm_xor_si64(arg1, arg2)
#elif defined WIN32 // WIN32, PENTIUMIII
 typedef __m64 SHIFT64;
 typedef __m128 XORINT;
 #define XORINT_BITS 128
#else // LINUX GCC22
 // tolower()
 #include <ctype.h>

 // calloc, malloc, realloc, ...
 #include <stdlib.h>

 // nice()
 #include <unistd.h>

 // int types
 #include <stdint.h>
 typedef uint8_t UINT8;
 typedef uint32_t UINT32;
 typedef uint64_t UINT64;
 typedef uint64_t __m64;

 #define __forceinline inline

 // va_list
 #include <ncurses/curses.h>
 #include <ncurses/eti.h>

 // windows -izum
 #define strnicmp strncasecmp

 // used to XOR poly
 #define _mm_xor_ps(A,B) ( ((XORINT)A) ^ ((XORINT)B) )

 // used to shift poly
 #define _mm_slli_si64(A,B) ( ((SHIFT64)A) << (B) )
 #define _mm_srli_si64(A,B) ( ((SHIFT64)A) >> (B) )
 #define _mm_or_si64(A,B)   ( ((XORINT)A) | ((XORINT)B) )
 #define min(A,B)           ( ((A) < (B)) ? (A) : (B) )

 // from WIN32 & P2
 typedef __m64 SHIFT64;
 typedef __m64 XORINT;
 #define XORINT_BITS 64
#endif

//----------------------------------------------------------------------------
// large integer structure
//
typedef union
   {
   UINTN   uintn   [UINTN_COUNT];
   UINT8   uint8   [UINT8_COUNT];
   UINT32  uint32  [UINT32_COUNT];
   UINT64  uint64  [UINT64_COUNT];  // 64-bit c language integer
   SHIFT64 shift64 [UINT64_COUNT];  // best 64-bit shift, SIMD (MMX) for IA-32, __int64 for IA-64
   __m64   m64     [UINT64_COUNT];  // 64-bit type for macros
   XORINT  xorint  [XORINT_COUNT];  // best type for fast XOR
   }
INTEGER;

//----------------------------------------------------------------------------
// structure to keep numbers 2^n-1 / (a single prime) in
//
typedef struct
   {
   INTEGER  *divisor;
   unsigned count;
   }
DIVISOR_LIST;

// structure for accessing list of irreducable polynomials kept in disk file
typedef struct
   {
   UINT32  *list;
   unsigned initialized, count;
#if defined WIN32 || defined WIN64
   HANDLE   fileHandle, objectHandle;
#else // LINUX
   FILE *fileHandle, *objectHandle;
   #define INVALID_HANDLE_VALUE NULL
#endif
   }
IRREDUCABLEINFO;

//----------------------------------------------------------------------------
// extended integer constants
static INTEGER IntegerZero;
static INTEGER IntegerOne;
static INTEGER IntegerTwo;

//----------------------------------------------------------------------------
// 
// functions for sequentially returning the binomial coefficients (x choose m)
// quantity of different ways to choose m of x items. Caller calls firstCombination
// with m value and an array of size m. For all the additional combinations,
// nextCombination is called. Array is returned with elements selected from [0, m-1].
// If an array element other than array [m-1] is changed, the global multipleBitUpdate
// is incremented to flag the event. when nextCombination returns < 0, all x choose m
//  combinations have been generated.
//
__forceinline signed firstCombination (unsigned m, unsigned *array)
   {
   unsigned index;

   for (index = 0; index < m; index++)
      array [index] = index;
   return m - 1;
   }

//----------------------------------------------------------------------------

__forceinline signed nextCombination (unsigned x, unsigned m, signed walkingIndex, unsigned *array)
   {
   unsigned index;
   if (++array [walkingIndex] > x - m + walkingIndex)
      {
      for (;;)
         {
         if (--walkingIndex < 0)
            return walkingIndex;
         if (++array [walkingIndex] <= x - m + walkingIndex)
            break;
         }
      for (index = walkingIndex + 1; index < m; index++)
         array [index] = array [walkingIndex] + index - walkingIndex;
      walkingIndex = m - 1;
      }
   return walkingIndex;
   }

//----------------------------------------------------------------------------
//
// findHammingWeight
//
unsigned findHammingWeight (INTEGER *arg)
   {
   unsigned index, bit, weight = 0;

   for (index = 0; index < UINTN_COUNT; index++)
      for (bit = 0; bit < UINTN_BITS; bit++)
         weight += ((arg->uintn [index] >> bit) & 1);
   return weight;
   }

//----------------------------------------------------------------------------
// 
// logError - printf message to stdout and exit
//

void logError (char *message,...)
   {
   va_list Marker;
   char    buffer [100];

   va_start (Marker, message);
   vsprintf(buffer, message, Marker);
   va_end(Marker);
   fprintf (stderr, "\n%s\n", buffer);
   exit (1);
   }

//----------------------------------------------------------------------------
//
// setbit - set a bit in an extended integer
//
__forceinline void setbit (INTEGER *data, unsigned bitnumber)
   {
   #if defined (_DEBUG)
   if (bitnumber >= MAXBITS)
      logError ("need to recompile with more MAXBITS\n");
   #endif

   data->uintn [bitnumber / UINTN_BITS] |= (UINTN) 1 << (bitnumber % UINTN_BITS);
   }

//----------------------------------------------------------------------------
//
// clearbit - clear a bit in an extended integer
//
__forceinline void clearbit (INTEGER *data, unsigned bitnumber)
   {
   #if defined (_DEBUG)
   if (bitnumber >= MAXBITS)
      logError ("need to recompile with more MAXBITS\n");
   #endif

   data->uintn [bitnumber / UINTN_BITS] &= ~((UINTN) 1 << (bitnumber % UINTN_BITS));
   }

//----------------------------------------------------------------------------
//
//  highestSetBit - finds highest set bit in extended integer. x86 optimized
//

#if defined WIN64
__forceinline signed highestSetBit (INTEGER *data)
   {
   signed  bitno = MAXBITS - 1;
   signed  index = UINTN_COUNT - 1;
   INTN    temp;

   while (index > 0)
      {
      if (data->uintn [index]) break;
      index--;
      bitno -= UINTN_BITS;
      }
   
   temp = data->uintn [index];
   for (;;)
      {
      if (temp < 0) break;
      temp <<= 1;
      if (--bitno < 0) break;
      }

   return bitno;
   }

#else // WIN32 or LINUX

__forceinline signed highestSetBit (INTEGER *data)
   {
   signed  bitno = MAXBITS - 1;
   signed  index = UINTN_COUNT - 1;
#if defined WIN32
   static unsigned fail = 0xFFFFFFFF;
#else // LINUX
#endif
   INTN    temp;

   while (index > 0)
      {
      if (data->uintn [index]) break;
      index--;
      bitno -= UINTN_BITS;
      }
   
   temp = data->uintn [index];
#if defined WIN32
   __asm mov ebx, temp
   __asm bsr eax, ebx
   __asm sub eax, 31
   __asm add eax, bitno
   __asm or ebx, ebx
   __asm cmove eax, fail
   __asm mov bitno, eax
#else // LINUX
   for (;;)
      {
      if (temp < 0) break;
      temp <<= 1;
      if (--bitno < 0) break;
      }
#endif
   return bitno;
   }

#endif
//----------------------------------------------------------------------------
//
// pullbit - return the value of a selected bit from a extended integer
//
__forceinline unsigned pullbit (INTEGER *data, unsigned bitNumber)
   {
   return (unsigned) ((data->uintn [bitNumber / UINTN_BITS] >> (bitNumber % UINTN_BITS)) & 1);
   }

//----------------------------------------------------------------------------
//
// shiftLeft - shift left extended integer
//
__forceinline void shiftLeft (INTEGER *source, INTEGER *dest, unsigned shiftCount)
   {
   unsigned sourceIndex = UINTN_COUNT - 1 - shiftCount / UINTN_BITS;
   unsigned destIndex   = UINTN_COUNT - 1;
   unsigned leftShift = shiftCount % UINTN_BITS;
   unsigned rightShift = UINTN_BITS - leftShift;

   #if defined (_DEBUG)
   if (shiftCount == 0)
      logError ("error, shift count is zero\n");
   #endif

   if (!leftShift)
      while (sourceIndex > 0)
         {
         dest->uintn [destIndex] = source->uintn [sourceIndex];
         sourceIndex--;
         destIndex--;
         }
   else
      while (sourceIndex > 0)
         {
         dest->uintn [destIndex] = (source->uintn [sourceIndex - 0] << leftShift) |
                                   (source->uintn [sourceIndex - 1] >> rightShift);
         sourceIndex--;
         destIndex--;
         }

   dest->uintn [destIndex] = source->uintn [0] << leftShift;
   while (destIndex > 0)
      dest->uintn [--destIndex] = 0;
   }

//----------------------------------------------------------------------------
//
// shiftLeftOnce - shift left extended integer once. Optimized for single shift
//
#define shiftSection()                                                                             \
   current = next;                                                                                 \
   sourceIndex--;                                                                                  \
   next = source->shift64 [sourceIndex - 1];                                                       \
   dest->shift64 [sourceIndex] = _mm_or_si64 (_mm_slli_si64 (current, 1), _mm_srli_si64 (next, 63))

__forceinline void shiftLeftOnce (INTEGER *source, INTEGER *dest)
   {
   signed sourceIndex = UINT64_COUNT;
   SHIFT64 current, next;

   next = source->shift64 [sourceIndex - 1];
   
   #if UINT64_COUNT > 1
   shiftSection();
   #endif
   #if UINT64_COUNT > 2
   shiftSection();
   #endif
   #if UINT64_COUNT > 3
   shiftSection();
   #endif
   #if UINT64_COUNT > 4
   shiftSection();
   #endif
   #if UINT64_COUNT > 5
   shiftSection();
   #endif
   #if UINT64_COUNT > 6
   shiftSection();
   #endif
   #if UINT64_COUNT > 7
   shiftSection();
   #endif
   #if UINT64_COUNT > 8
   shiftSection();
   #endif
   #if UINT64_COUNT > 9
   shiftSection();
   #endif
   #if UINT64_COUNT > 10
   shiftSection();
   #endif
   #if UINT64_COUNT > 11
   shiftSection();
   #endif
   #if UINT64_COUNT > 12
   shiftSection();
   #endif
   #if UINT64_COUNT > 13
   shiftSection();
   #endif
   #if UINT64_COUNT > 14
   shiftSection();
   #endif
   #if UINT64_COUNT > 15
   shiftSection();
   #endif
   #if UINT64_COUNT > 16
   shiftSection();
   #endif
   #if UINT64_COUNT > 17
   shiftSection();
   #endif
   #if UINT64_COUNT > 18
   shiftSection();
   #endif
   #if UINT64_COUNT > 19
   shiftSection();
   #endif
   #if UINT64_COUNT > 20
   shiftSection();
   #endif
   #if UINT64_COUNT > 21
   shiftSection();
   #endif
   #if UINT64_COUNT > 22
   shiftSection();
   #endif
   #if UINT64_COUNT > 23
   shiftSection();
   #endif
   #if UINT64_COUNT > 24
   shiftSection();
   #endif
   #if UINT64_COUNT > 25
   shiftSection();
   #endif
   #if UINT64_COUNT > 26
   shiftSection();
   #endif
   #if UINT64_COUNT > 27
   shiftSection();
   #endif
   #if UINT64_COUNT > 28
   shiftSection();
   #endif
   #if UINT64_COUNT > 29
   shiftSection();
   #endif
   #if UINT64_COUNT > 30
   shiftSection();
   #endif
   #if UINT64_COUNT > 31
   shiftSection();
   #endif
   #if UINT64_COUNT > 32
   shiftSection();
   #endif
   #if UINT64_COUNT > 33
   shiftSection();
   #endif
   #if UINT64_COUNT > 34
   shiftSection();
   #endif
   #if UINT64_COUNT > 35
   shiftSection();
   #endif
   #if UINT64_COUNT > 36
   shiftSection();
   #endif
   #if UINT64_COUNT > 37
   shiftSection();
   #endif
   #if UINT64_COUNT > 38
   shiftSection();
   #endif
   #if UINT64_COUNT > 39
   shiftSection();
   #endif
   #if UINT64_COUNT > 40
   shiftSection();
   #endif
   #if UINT64_COUNT > 41
   shiftSection();
   #endif
   #if UINT64_COUNT > 42
   shiftSection();
   #endif
   #if UINT64_COUNT > 43
   shiftSection();
   #endif
   #if UINT64_COUNT > 44
   shiftSection();
   #endif
   #if UINT64_COUNT > 45
   shiftSection();
   #endif
   #if UINT64_COUNT > 46
   shiftSection();
   #endif
   #if UINT64_COUNT > 47
   shiftSection();
   #endif
   #if UINT64_COUNT > 48
   shiftSection();
   #endif
   #if UINT64_COUNT > 49
   shiftSection();
   #endif
   #if UINT64_COUNT > 50
   shiftSection();
   #endif
   #if UINT64_COUNT > 51
   shiftSection();
   #endif
   #if UINT64_COUNT > 52
   shiftSection();
   #endif
   #if UINT64_COUNT > 53
   shiftSection();
   #endif
   #if UINT64_COUNT > 54
   shiftSection();
   #endif
   #if UINT64_COUNT > 55
   shiftSection();
   #endif
   #if UINT64_COUNT > 56
   shiftSection();
   #endif
   #if UINT64_COUNT > 57
   shiftSection();
   #endif
   #if UINT64_COUNT > 58
   shiftSection();
   #endif
   #if UINT64_COUNT > 59
   shiftSection();
   #endif
   #if UINT64_COUNT > 60
   shiftSection();
   #endif
   #if UINT64_COUNT > 61
   shiftSection();
   #endif
   #if UINT64_COUNT > 62
   shiftSection();
   #endif
   #if UINT64_COUNT > 63
   shiftSection();
   #endif
   #if UINT64_COUNT > 64
   shiftSection();
   #endif
   #if UINT64_COUNT > 65
   shiftSection();
   #endif
   #if UINT64_COUNT > 66
   shiftSection();
   #endif
   #if UINT64_COUNT > 67
   shiftSection();
   #endif
   #if UINT64_COUNT > 68
   shiftSection();
   #endif
   #if UINT64_COUNT > 69
   shiftSection();
   #endif
   #if UINT64_COUNT > 70
   shiftSection();
   #endif
   #if UINT64_COUNT > 71
   shiftSection();
   #endif
   #if UINT64_COUNT > 72
   shiftSection();
   #endif
   #if UINT64_COUNT > 73
   shiftSection();
   #endif
   #if UINT64_COUNT > 74
   shiftSection();
   #endif
   #if UINT64_COUNT > 75
   shiftSection();
   #endif
   #if UINT64_COUNT > 76
   shiftSection();
   #endif
   #if UINT64_COUNT > 77
   shiftSection();
   #endif
   #if UINT64_COUNT > 78
   shiftSection();
   #endif
   #if UINT64_COUNT > 79
   shiftSection();
   #endif
   #if UINT64_COUNT > 80
   shiftSection();
   #endif
   #if UINT64_COUNT > 81
   shiftSection();
   #endif
   #if UINT64_COUNT > 82
   shiftSection();
   #endif
   #if UINT64_COUNT > 83
   shiftSection();
   #endif
   #if UINT64_COUNT > 84
   shiftSection();
   #endif
   #if UINT64_COUNT > 85
   shiftSection();
   #endif
   #if UINT64_COUNT > 86
   shiftSection();
   #endif
   #if UINT64_COUNT > 87
   shiftSection();
   #endif
   #if UINT64_COUNT > 88
   shiftSection();
   #endif
   #if UINT64_COUNT > 89
   shiftSection();
   #endif
   #if UINT64_COUNT > 90
   shiftSection();
   #endif
   #if UINT64_COUNT > 91
   shiftSection();
   #endif
   #if UINT64_COUNT > 92
   shiftSection();
   #endif
   #if UINT64_COUNT > 93
   shiftSection();
   #endif
   #if UINT64_COUNT > 94
   shiftSection();
   #endif
   #if UINT64_COUNT > 95
   shiftSection();
   #endif
   #if UINT64_COUNT > 96
   shiftSection();
   #endif
   #if UINT64_COUNT > 97
   shiftSection();
   #endif
   #if UINT64_COUNT > 98
   shiftSection();
   #endif
   #if UINT64_COUNT > 99
   shiftSection();
   #endif
   #if UINT64_COUNT > 100
   #error add code!
   #endif

   // the final shift zero fills
   dest->shift64 [0] = _mm_slli_si64 (next, 1);
   }


//----------------------------------------------------------------------------
//
// addInteger - add extended integers
//

void addInteger (INTEGER *value1, INTEGER *value2, INTEGER *result)
   {
   unsigned index, carry = 0;
   INTEGER total;
   for (index = 0; index < UINT32_COUNT; index++)
      {
      total.uint64 [0] = (UINT64) value1->uint32 [index] + value2->uint32 [index] + carry;
      result->uint32 [index] = total.uint32 [0];
      carry = total.uint32 [1];
      }
   }

//----------------------------------------------------------------------------
// 
// multiplyInteger - multiply extended integers
//

void multiplyInteger (INTEGER *value1, INTEGER *value2, INTEGER *result)
   {
   unsigned index;
   INTEGER  temp, total = IntegerZero;

   if (pullbit (value1, 0))
      addInteger (&total, value2, &total);

   for (index = 1; index < MAXBITS; index++)
      {
      if (!pullbit (value1, index)) continue;
      shiftLeft (value2, &temp, index);
      addInteger (&total, &temp, &total);
      }
   *result = total;
   }

//----------------------------------------------------------------------------
//
// mul10 - multiply extended integer by 10
//

void mul10 (INTEGER *value)
   {
   INTEGER x8, x2;

   shiftLeft (value, &x8, 3);
   shiftLeft (value, &x2, 1);
   addInteger (&x8, &x2, value);
   }

//----------------------------------------------------------------------------
//
// allOnes - returns extended integer with ls bits set to all ones
//

INTEGER allOnes (unsigned bits)
   {
   static INTEGER result;
   unsigned index;
   result = IntegerZero;
   for (index = 0; index < bits; index++)
      setbit (&result, index);
   return result;
   }

//----------------------------------------------------------------------------

char *skipWhiteSpace (char *position)
   {
   while (*position == ' ' || *position == '\t') position++;
   return position;
   }

//----------------------------------------------------------------------------
// 
// findBase - looks at ascii buffer and determines the base. A leadinf 0x forces hex.
//            A trailing b, o, d forces binary, octal, or decimal, respectively.
//            If no prefix or suffix is present, a best guess is made by scanning the digits.
//
unsigned findBase (char *position)
   {
   int  base = 0, maxCharacter = '0';
   char *endOfNumber;
   
   if (position [0] == '0' && tolower (position [1]) == 'x') return 16;

   endOfNumber = position + strspn (position, "0123456789");
   if (tolower (*endOfNumber) == 'b') base = 2;
   if (tolower (*endOfNumber) == 'o') base = 8;
   if (tolower (*endOfNumber) == 'd') base = 10;

   // no suffix, look at digits
   while (position != endOfNumber)
      {
      if (maxCharacter < *position)
         maxCharacter = *position;
      position++;
      }

   if (base)
      {
      if (maxCharacter >= '0' + base)
         logError ("digit %c conflicts with base suffix %c", maxCharacter, *endOfNumber);
      }
   else
      {
      if (maxCharacter <= '1') base = 2; 
      else if (maxCharacter <= '7') base = 8; 
      else if (maxCharacter <= '9') base = 10;
      }
   return base;
   }

//----------------------------------------------------------------------------
//
// scanDecimalDigits - read a extended integer from an ascii buffer of decimal digits.
//
void scanDecimalDigits (char *buffer, INTEGER *value)
   {
   char     *position = buffer;
   INTEGER  dvalue = IntegerZero;

   *value = IntegerZero;

   for (;;)
      {
      unsigned digit = *position++;

      if (!isdigit (digit)) break;
      mul10 (value);
      dvalue.uintn [0] = digit - '0';
      addInteger (value, &dvalue, value);
      }
   }

//----------------------------------------------------------------------------
//
// scanDigits - read a extended integer from an ascii buffer on digits of a selectable base.
//
void scanDigits (char *buffer, INTEGER *integer, unsigned base)
   {
   static int digitBits [] = {0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4};
   char *position, *endOfNumber, *startOfNumber = skipWhiteSpace (buffer);
   unsigned bitno, index;
   unsigned bitsPerDigit;

   if (base == 0) base = findBase (startOfNumber);
   if (base == 10)
      {
      scanDecimalDigits (startOfNumber, integer);
      return;
      }

   if (startOfNumber [0] == '0' && tolower (startOfNumber [1]) == 'x') startOfNumber += 2;
   if (base == 16)
      endOfNumber = startOfNumber + strspn (startOfNumber, "0123456789abcdefABCDEF");
   else if (base == 8)
      endOfNumber = startOfNumber + strspn (startOfNumber, "01234567");
   else
      endOfNumber = startOfNumber + strspn (startOfNumber, "01");
   bitsPerDigit = digitBits [base];
   position = endOfNumber - 1;
   bitno = 0;
   *integer = IntegerZero;

   while (position >= startOfNumber)
      {
      unsigned value;
      value = toupper (*position);
      if (value >= 'A') value = 10 + (value - 'A');
      else value -= '0';
      if (value >= base) logError ("invalid base %u digit (%c)", base, *position);
      for (index = 0; index < bitsPerDigit; index++)
         {
         if (value & 1)
            setbit (integer, bitno);
         bitno++;
         value >>= 1;
         }
      position--;
      }
   }

//----------------------------------------------------------------------------

INTEGER *computeFactors (unsigned *numberOfFactors, unsigned order)
   {
   UINT64 n, p, end, f;
   unsigned factorCount = 0;
   INTEGER *factorList = NULL;

   p = allOnes (order).uint64 [0];
   n = p;
   end = sqrt (n);
   
   for (f = 3; f <= end; )
      {
      if (n % f == 0)
         {
         n /= f;
         end = sqrt(n);
         factorList = realloc (factorList, sizeof (INTEGER) * (factorCount + 1));
         factorList [factorCount] = IntegerZero;
         factorList [factorCount].uint64 [0] = f;
         factorCount++;
         }
      else
         f += 2;
      }

   factorList = realloc (factorList, sizeof (INTEGER) * (factorCount + 1));
   factorList [factorCount] = IntegerZero;
   factorList [factorCount].uint64 [0] = n;
   factorCount++;
   *numberOfFactors = factorCount;
   return factorList;
   }

/*-----------------------------------------------------------------------------
 *
 * Find all the factors of 2^polynomialDegree-1 by reading them from a disk file
 * Return a list of 2^polynomialDegree-1 / (each single prime)
 * 
 */

unsigned findFactors (unsigned polynomialDegree, DIVISOR_LIST *divisorList, unsigned verbose)
   {
   FILE      *fp;
   INTEGER   factor, *factorList = NULL, result, total = IntegerOne, org = allOnes (polynomialDegree);
   unsigned  numberOfFactors = 0, index1, index2;
   char      filename [100];
   char      *buffer = calloc (1, 10000);

   divisorList->count = 0;
   divisorList->divisor = 0;

   sprintf (filename, "factor2n-1/%u.txt", polynomialDegree);
   fp = fopen (filename, "r");
   if (!fp)
      {
      if (verbose) printf ("\nfailed to open %s\n", filename);
      if (polynomialDegree > 64)
         {
         printf ("need factor file for more that 64 bits\n");
         return 0;
         }
      factorList = computeFactors (&numberOfFactors, polynomialDegree);
      total = org; // suppress sanity check
      }
   else
      {
      while (!feof (fp))
         {
         if (!fgets (buffer, 10000, fp)) break;
         scanDecimalDigits (buffer, &factor);
         multiplyInteger (&total, &factor, &result);
         total = result;
         factorList = realloc (factorList, sizeof (INTEGER) * (numberOfFactors + 1));
         factorList [numberOfFactors++] = factor;
         }
      fclose (fp);
      }
   free (buffer);

   // do a sanity check on the factorization
   if (memcmp (&org, &total, sizeof (INTEGER)) != 0)
      {
      if (verbose) printf ("factor file for 2^%u - 1 is missing or incorrect\n", polynomialDegree);
      free (factorList);
      return 0;
      }
   for (index1 = 0; index1 < numberOfFactors; index1++)
      {
      INTEGER result, total = IntegerOne;
      for (index2 = 0; index2 < numberOfFactors; index2++)
         {
         if (index2 == index1) continue;
         multiplyInteger (&total, &factorList [index2], &result);
         total = result;
         }
      if (divisorList->count)
         if (memcmp (&total, &divisorList->divisor [divisorList->count - 1], sizeof (INTEGER)) == 0) continue;
      divisorList->divisor = realloc (divisorList->divisor, sizeof (INTEGER) * (divisorList->count + 1));
      divisorList->divisor [divisorList->count++] = total;
      }
   free (factorList);
   return 1;
   }

//----------------------------------------------------------------------------
//
// xorInteger - exclusive-or two extended integers
//              uses SIMD 128-bit xor, unrolled and inline
//
#define xorPair(result,arg1,arg2,index) (result->xorint [index] = _mm_xor_ps (arg1->xorint [index], arg2->xorint [index]))

__forceinline INTEGER *xorInteger (INTEGER *arg1, INTEGER *arg2, INTEGER *result)
   {
#if (XORINT_COUNT > 0)
      xorPair (result, arg1, arg2, 0);
#endif
#if (XORINT_COUNT > 1)
      xorPair (result, arg1, arg2, 1);
#endif
#if (XORINT_COUNT > 2)
      xorPair (result, arg1, arg2, 2);
#endif
#if (XORINT_COUNT > 3)
      xorPair (result, arg1, arg2, 3);
#endif
#if (XORINT_COUNT > 4)
      xorPair (result, arg1, arg2, 4);
#endif
#if (XORINT_COUNT > 5)
      xorPair (result, arg1, arg2, 5);
#endif
#if (XORINT_COUNT > 6)
      xorPair (result, arg1, arg2, 6);
#endif
#if (XORINT_COUNT > 7)
      xorPair (result, arg1, arg2, 7);
#endif
#if (XORINT_COUNT > 8)
      xorPair (result, arg1, arg2, 8);
#endif
#if (XORINT_COUNT > 9)
      xorPair (result, arg1, arg2, 9);
#endif
#if (XORINT_COUNT > 10)
      xorPair (result, arg1, arg2, 10);
#endif
#if (XORINT_COUNT > 11)
      xorPair (result, arg1, arg2, 11);
#endif
#if (XORINT_COUNT > 12)
      xorPair (result, arg1, arg2, 12);
#endif
#if (XORINT_COUNT > 13)
      xorPair (result, arg1, arg2, 13);
#endif
#if (XORINT_COUNT > 14)
      xorPair (result, arg1, arg2, 14);
#endif
#if (XORINT_COUNT > 15)
      xorPair (result, arg1, arg2, 15);
#endif
#if (XORINT_COUNT > 16)
      xorPair (result, arg1, arg2, 16);
#endif
#if (XORINT_COUNT > 17)
      xorPair (result, arg1, arg2, 17);
#endif
#if (XORINT_COUNT > 18)
      xorPair (result, arg1, arg2, 18);
#endif
#if (XORINT_COUNT > 19)
      xorPair (result, arg1, arg2, 19);
#endif
#if (XORINT_COUNT > 20)
      xorPair (result, arg1, arg2, 20);
#endif
#if (XORINT_COUNT > 21)
      xorPair (result, arg1, arg2, 21);
#endif
#if (XORINT_COUNT > 22)
      xorPair (result, arg1, arg2, 22);
#endif
#if (XORINT_COUNT > 23)
      xorPair (result, arg1, arg2, 23);
#endif
#if (XORINT_COUNT > 24)
      xorPair (result, arg1, arg2, 24);
#endif
#if (XORINT_COUNT > 25)
      xorPair (result, arg1, arg2, 25);
#endif
#if (XORINT_COUNT > 26)
      xorPair (result, arg1, arg2, 26);
#endif
#if (XORINT_COUNT > 27)
      xorPair (result, arg1, arg2, 27);
#endif
#if (XORINT_COUNT > 28)
      xorPair (result, arg1, arg2, 28);
#endif
#if (XORINT_COUNT > 29)
      xorPair (result, arg1, arg2, 29);
#endif
#if (XORINT_COUNT > 30)
      xorPair (result, arg1, arg2, 30);
#endif
#if (XORINT_COUNT > 31)
      xorPair (result, arg1, arg2, 31);
#endif
#if (XORINT_COUNT > 32)
      xorPair (result, arg1, arg2, 32);
#endif
#if (XORINT_COUNT > 33)
      xorPair (result, arg1, arg2, 33);
#endif
#if (XORINT_COUNT > 34)
      xorPair (result, arg1, arg2, 34);
#endif
#if (XORINT_COUNT > 35)
      xorPair (result, arg1, arg2, 35);
#endif
#if (XORINT_COUNT > 36)
      xorPair (result, arg1, arg2, 36);
#endif
#if (XORINT_COUNT > 37)
      xorPair (result, arg1, arg2, 37);
#endif
#if (XORINT_COUNT > 38)
      xorPair (result, arg1, arg2, 38);
#endif
#if (XORINT_COUNT > 39)
      xorPair (result, arg1, arg2, 39);
#endif
#if (XORINT_COUNT > 40)
      xorPair (result, arg1, arg2, 40);
#endif
#if (XORINT_COUNT > 41)
      xorPair (result, arg1, arg2, 41);
#endif
#if (XORINT_COUNT > 42)
      xorPair (result, arg1, arg2, 42);
#endif
#if (XORINT_COUNT > 43)
      xorPair (result, arg1, arg2, 43);
#endif
#if (XORINT_COUNT > 44)
      xorPair (result, arg1, arg2, 44);
#endif
#if (XORINT_COUNT > 45)
      xorPair (result, arg1, arg2, 45);
#endif
#if (XORINT_COUNT > 46)
      xorPair (result, arg1, arg2, 46);
#endif
#if (XORINT_COUNT > 47)
      xorPair (result, arg1, arg2, 47);
#endif
#if (XORINT_COUNT > 48)
      xorPair (result, arg1, arg2, 48);
#endif
#if (XORINT_COUNT > 49)
      xorPair (result, arg1, arg2, 49);
#endif
#if (XORINT_COUNT > 50)
      xorPair (result, arg1, arg2, 50);
#endif
#if (XORINT_COUNT > 51)
      xorPair (result, arg1, arg2, 51);
#endif
#if (XORINT_COUNT > 52)
      xorPair (result, arg1, arg2, 52);
#endif
#if (XORINT_COUNT > 53)
      xorPair (result, arg1, arg2, 53);
#endif
#if (XORINT_COUNT > 54)
      xorPair (result, arg1, arg2, 54);
#endif
#if (XORINT_COUNT > 55)
      xorPair (result, arg1, arg2, 55);
#endif
#if (XORINT_COUNT > 56)
      xorPair (result, arg1, arg2, 56);
#endif
#if (XORINT_COUNT > 57)
      xorPair (result, arg1, arg2, 57);
#endif
#if (XORINT_COUNT > 58)
      xorPair (result, arg1, arg2, 58);
#endif
#if (XORINT_COUNT > 59)
      xorPair (result, arg1, arg2, 59);
#endif
#if (XORINT_COUNT > 60)
      xorPair (result, arg1, arg2, 60);
#endif
#if (XORINT_COUNT > 61)
      xorPair (result, arg1, arg2, 61);
#endif
#if (XORINT_COUNT > 62)
      xorPair (result, arg1, arg2, 62);
#endif
#if (XORINT_COUNT > 63)
      xorPair (result, arg1, arg2, 63);
#endif
#if (XORINT_COUNT > 64)
      xorPair (result, arg1, arg2, 64);
#endif
#if (XORINT_COUNT > 65)
      xorPair (result, arg1, arg2, 65);
#endif
#if (XORINT_COUNT > 66)
      xorPair (result, arg1, arg2, 66);
#endif
#if (XORINT_COUNT > 67)
      xorPair (result, arg1, arg2, 67);
#endif
#if (XORINT_COUNT > 68)
      xorPair (result, arg1, arg2, 68);
#endif
#if (XORINT_COUNT > 69)
      xorPair (result, arg1, arg2, 69);
#endif
#if (XORINT_COUNT > 70)
      xorPair (result, arg1, arg2, 70);
#endif
#if (XORINT_COUNT > 71)
      xorPair (result, arg1, arg2, 71);
#endif
#if (XORINT_COUNT > 72)
      xorPair (result, arg1, arg2, 72);
#endif
#if (XORINT_COUNT > 73)
      xorPair (result, arg1, arg2, 73);
#endif
#if (XORINT_COUNT > 74)
      xorPair (result, arg1, arg2, 74);
#endif
#if (XORINT_COUNT > 75)
      xorPair (result, arg1, arg2, 75);
#endif
#if (XORINT_COUNT > 76)
      xorPair (result, arg1, arg2, 76);
#endif
#if (XORINT_COUNT > 77)
      xorPair (result, arg1, arg2, 77);
#endif
#if (XORINT_COUNT > 78)
      xorPair (result, arg1, arg2, 78);
#endif
#if (XORINT_COUNT > 79)
      xorPair (result, arg1, arg2, 79);
#endif
#if (XORINT_COUNT > 80)
      xorPair (result, arg1, arg2, 80);
#endif
#if (XORINT_COUNT > 81)
      xorPair (result, arg1, arg2, 81);
#endif
#if (XORINT_COUNT > 82)
      xorPair (result, arg1, arg2, 82);
#endif
#if (XORINT_COUNT > 83)
      xorPair (result, arg1, arg2, 83);
#endif
#if (XORINT_COUNT > 84)
      xorPair (result, arg1, arg2, 84);
#endif
#if (XORINT_COUNT > 85)
      xorPair (result, arg1, arg2, 85);
#endif
#if (XORINT_COUNT > 86)
      xorPair (result, arg1, arg2, 86);
#endif
#if (XORINT_COUNT > 87)
      xorPair (result, arg1, arg2, 87);
#endif
#if (XORINT_COUNT > 88)
      xorPair (result, arg1, arg2, 88);
#endif
#if (XORINT_COUNT > 89)
      xorPair (result, arg1, arg2, 89);
#endif
#if (XORINT_COUNT > 90)
      xorPair (result, arg1, arg2, 90);
#endif
#if (XORINT_COUNT > 91)
      xorPair (result, arg1, arg2, 91);
#endif
#if (XORINT_COUNT > 92)
      xorPair (result, arg1, arg2, 92);
#endif
#if (XORINT_COUNT > 93)
      xorPair (result, arg1, arg2, 93);
#endif
#if (XORINT_COUNT > 94)
      xorPair (result, arg1, arg2, 94);
#endif
#if (XORINT_COUNT > 95)
      xorPair (result, arg1, arg2, 95);
#endif
#if (XORINT_COUNT > 96)
      xorPair (result, arg1, arg2, 96);
#endif
#if (XORINT_COUNT > 97)
      xorPair (result, arg1, arg2, 97);
#endif
#if (XORINT_COUNT > 98)
      xorPair (result, arg1, arg2, 98);
#endif
#if (XORINT_COUNT > 99)
      xorPair (result, arg1, arg2, 99);
#endif
#if (XORINT_COUNT > 100)
#error add code!     
#endif
   return result;
   }

//----------------------------------------------------------------------------
//
// setbit set a bit in a extended integer
//
__forceinline unsigned pullbits (INTEGER *data, unsigned lsb, unsigned msb)
   {
   unsigned index, total = 0;
   for (index = lsb; index <= msb; index++)
      if (pullbit (data, index))
         total |= 1 << (index - lsb);
   return total;
   }

//----------------------------------------------------------------------------
//
// binaryAscii - return in binary ascii representation of extended integer
//               in a static buffer. Four calls can be made before overwriting.
//
char *binaryAscii (INTEGER *data, unsigned bits)
   {
   static char buffer [4] [MAXBITS + 2];
   static unsigned cycle;
   char *position = buffer [cycle];
   char *result = position;

   if (bits == 0) bits = highestSetBit (data) + 1;
   if (bits == 0) bits++;

   while (bits)
      *position++ = (char) ('0' + pullbit (data, --bits));
   
   *position++ = '\0';
   if (++cycle == 4) cycle = 0;
   return result;
   }

//----------------------------------------------------------------------------
//
// hexAscii - return in hexadecimal ascii representation of extended integer
//            in a static buffer. Four calls can be made before overwriting.
//

char *hexAscii (INTEGER *data, unsigned bits)
   {
   static char buffer [4] [MAXBITS * 4 + 2];
   static unsigned cycle;
   unsigned index;

   char *position = buffer [cycle];
   char *result = position;

   if (bits == 0) bits = highestSetBit (data) + 1;
   if (bits == 0) bits++;

   index = (bits + 7) / 8;
   while (index--)
      position += sprintf (position, "%02X", data->uint8 [index]);
   *position++ = '\0';
   if ((((bits - 1) / 4) & 1) == 0) result++;
   if (++cycle == 4) cycle = 0;
   return result;
   }

//----------------------------------------------------------------------------
//
// octalAscii - return in octal ascii representation of extended integer
//              in a static buffer. Four calls can be made before overwriting.
//

char *octalAscii (INTEGER *data, unsigned bits)
   {
   static char buffer [4] [MAXBITS / 3 + 2];
   static unsigned cycle;
   char *position = buffer [cycle];
   char *result = position;

   if (bits == 0) bits = highestSetBit (data) + 1;
   bits = (bits + 2) / 3 * 3;

   while (bits)
      {
      *position = (char) ('0' + pullbits (data, bits - 3, bits - 1));
      position++;
      bits -= 3;
      }

   *position++ = '\0';
   if (++cycle == 4) cycle = 0;
   return result;
   }

//----------------------------------------------------------------------------
//
// polynomialText - return text string containing ascii description of
//                  the polynomial represented by the extended integer 
//

char *polynomialText (INTEGER *polynomial, unsigned displayMode)
   {
   static char text [500];
   unsigned    index;
   char        *position = text;

   if (displayMode == 'b')
      return binaryAscii (polynomial, 0);
   else if (displayMode == 'o')
      return octalAscii (polynomial, 0);
   else if (displayMode == 'h')
      return hexAscii (polynomial, 0);
   else if (displayMode == 'l') {
     position += sprintf (position, "$ ");
     for (index = MAXBITS - 1; index > 1; index--)
        if (pullbit (polynomial, index))
           position += sprintf (position, "x^{%u} + ", index);
     if (pullbit (polynomial, 1))
        position += sprintf (position, "x + ");
     position += sprintf (position, "1 $");
     return text;
   }
   else {
     for (index = MAXBITS - 1; index > 1; index--)
        if (pullbit (polynomial, index))
           position += sprintf (position, "x^%u + ", index);
     if (pullbit (polynomial, 1))
        position += sprintf (position, "x + ");
     position += sprintf (position, "1");
     return text;
     }
  }

//----------------------------------------------------------------------------
//
// shiftRight - shift right extended integer
//

__forceinline void shiftRight (INTEGER *integer, unsigned shiftCount)
   {
   unsigned destIndex = 0;
   unsigned sourceIndex = shiftCount / UINTN_BITS;
   unsigned rightShift = shiftCount % UINTN_BITS;
   unsigned leftShift = UINTN_BITS - rightShift;

   #if (UINTN_COUNT > 1)
   if (!rightShift)
      while (sourceIndex < UINTN_COUNT - 1)
         {
         integer->uintn [destIndex] = integer->uintn [sourceIndex] >> rightShift;
         sourceIndex++;
         destIndex++;
         }

      while (sourceIndex < UINTN_COUNT - 1)
         {
         integer->uintn [destIndex] = (integer->uintn [sourceIndex + 0] >> rightShift) |
                                      (integer->uintn [sourceIndex + 1] << leftShift);
         sourceIndex++;
         destIndex++;
         }
   #endif

   integer->uintn [destIndex] = integer->uintn [UINTN_COUNT - 1] >> rightShift;
   #if (UINTN_COUNT > 1)
   while (destIndex < UINTN_COUNT - 1)
      integer->uintn [++destIndex] = 0;
   #endif
   }

//----------------------------------------------------------------------------
// 
// dividePolynomial - divide binary polynomial, coefficients are kept in 
//                    extended integers
//
void dividePolynomial (INTEGER *numerator, INTEGER *denominator, INTEGER *quotient)
   {
   signed numeratorPower = highestSetBit (numerator);
   signed denominatorPower = highestSetBit (denominator);
   signed denominatorShift = numeratorPower - denominatorPower;
   signed bitsRetired;

   if ((signed) denominatorShift < 0) return;

   if (denominatorPower == -1)
      logError ("polynomial division by zero\n");

   else if (denominatorPower == 0)
      {
      *quotient = *numerator;
      *numerator = IntegerZero;
      return;
      }

   shiftLeft (denominator, denominator, denominatorShift);

   for (;;)
      {
      setbit (quotient, denominatorShift);
      xorInteger (numerator, denominator, numerator);
      bitsRetired = numeratorPower - highestSetBit (numerator);
      denominatorShift -= bitsRetired;
      numeratorPower -= bitsRetired;
      if (numeratorPower < denominatorPower) break;
      shiftRight (denominator, bitsRetired);
      }
   }

//----------------------------------------------------------------------------

unsigned findSmallPolynomialFactor (INTEGER *value, IRREDUCABLEINFO *irreducableInfo, unsigned polynomialDegree)
   {
   UINT32   *irreducables;
   INTEGER  numerator, quotient, denominator;
   unsigned count, index, numeratorPower, denominatorPower;

   if (irreducableInfo->fileHandle == INVALID_HANDLE_VALUE) return 0; // irreducables.bin not available
   irreducables = irreducableInfo->list;

   // this screen gets less effective with big polynomials, so adjust accordingly
   count = min (100000 / polynomialDegree, irreducableInfo->count);

   for (index = 0; index < count; index++)
      {
      denominator = IntegerZero;
      denominator.uint32 [0] = irreducables [index];
      numerator = *value;
      quotient = IntegerZero;
      numeratorPower = highestSetBit (value);
      denominatorPower = highestSetBit (&denominator);
      if (denominatorPower * 2 > numeratorPower)
         {
         // the remaining bits must be prime because a minimum of two factors with a
         // total of at least denominatorPower bits would exceed the numerator
         return 0;
         }
      else
         dividePolynomial (&numerator, &denominator, &quotient);
      if (numerator.uint64 [0] != 0) continue;
      return 1;
      }
   return 0;
   }

//----------------------------------------------------------------------------
//
// when searching for primitive polynomials, this function returns the next candidate
//
void nextPolynomial (INTEGER *polynomial, unsigned polynomialWeight, unsigned polynomialDegree)
   {
   unsigned weight;
   for (;;)
      {
      if (polynomialWeight)
         {
         unsigned index;
         static signed walkingIndex;
         static unsigned inProgress, rowPointer [MAXBITS];
         static unsigned previousPolynomialDegree;

         // force reinitialization if called with different degree before previous completed
         if (previousPolynomialDegree != polynomialDegree)
            {
            previousPolynomialDegree = polynomialDegree;
            inProgress = 0;
            }

         if (!inProgress)
            {
            inProgress++;
            walkingIndex = firstCombination (polynomialWeight - 2, rowPointer);
            }
         else
            {
            walkingIndex = nextCombination (polynomialDegree - 1, polynomialWeight - 2, walkingIndex, rowPointer);
            if (walkingIndex < 0)
               {
               *polynomial = IntegerZero;
               inProgress = 0;
               return;
               }
            }

         *polynomial = IntegerOne;
         setbit (polynomial, polynomialDegree);
         for (index = 0; index < polynomialWeight - 2; index++)
            setbit (polynomial, rowPointer [index] + 1);
         return;
         }

      addInteger (polynomial, &IntegerTwo, polynomial);

      // from ??? we know that a binary primitive polynomial
      // has an odd number of non-zero coefficients. Skip the evens.
      weight = findHammingWeight (polynomial);
      if (~weight & 1) continue;
      break;
      }
   }

//----------------------------------------------------------------------------
// 
// modularMultiplyPolynomial - modular multiply binary polynomial, coefficients
//                           are kept in extended integers
//
__forceinline void modularMultiplyPolynomial (INTEGER *factor1, INTEGER *factor2, INTEGER *modulo, INTEGER *product, unsigned polynomialDegree)
   {
   signed index, factor1Power = highestSetBit (factor1);
   INTEGER  result, factor2copy = *factor2;
   memset(result.uintn, 0, sizeof(result));
   result.uintn[0] = 0;

   #if defined (_DEBUG)
   if (factor1Power < 0)
      logError ("unexpected multiply by zero\n");
   #endif

   for (index = 0; index <= factor1Power; index++)
      {
      if (pullbit (factor1, index))
         xorInteger (&result, &factor2copy, &result);

      shiftLeftOnce (&factor2copy, &factor2copy);
      if (pullbit (&factor2copy, polynomialDegree))
         xorInteger (&factor2copy, modulo, &factor2copy);
      }
   *product = result;
   }

//----------------------------------------------------------------------------
// 
// modularPowerPolynomial - modular exponentiation for binary polynomial,
//                          coefficients are kept in extended integers
//
void modularPowerPolynomial (INTEGER *primitiveElement, INTEGER *power, INTEGER *modulo, INTEGER *result, unsigned polynomialDegree)
   {
   signed bitno, totalBits = highestSetBit (power) - 1;

   *result = *primitiveElement;
   for(bitno = totalBits; bitno >= 0; bitno--)
      {
      modularMultiplyPolynomial (result, result, modulo, result, polynomialDegree);
      if (pullbit (power, bitno))
         {
         // a general algorithm must execute:
         //     modularMultiplyPolynomial (result, primitiveElement, modulo, result, polynomialDegree);
         // But because the first argument is the right shift of the modulo, we emulate a shift register multiplier 
         // this is not a major optimization, but it helps
         unsigned lsb = pullbit (result, 0);
         shiftRight (result, 1);
         if (lsb) xorInteger (result, primitiveElement, result);
         }
      }
   }

//----------------------------------------------------------------------------
//
// read a few small irreducable polynomials from disk
// 
void readIrreducablePolynomaials (IRREDUCABLEINFO *irreducableInfo)
   {
   unsigned bytes;

   if (irreducableInfo->initialized) return;
#if defined WIN64 || defined WIN32
   irreducableInfo->fileHandle = CreateFile ("irreducables.bin", GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, 0);
#else // LINUX
   irreducableInfo->fileHandle = fopen("irreducables.bin", "rb");
#endif
   if (irreducableInfo->fileHandle == INVALID_HANDLE_VALUE)
      {
      irreducableInfo->initialized++;
      return;
      }
   else
      {
#if defined WIN64 || defined WIN32
      bytes = GetFileSize (irreducableInfo->fileHandle, 0);
      irreducableInfo->count = bytes / sizeof (irreducableInfo->list [0]);
      irreducableInfo->objectHandle = CreateFileMapping (irreducableInfo->fileHandle, NULL, PAGE_READONLY, 0, 0, "irreducablePolynomials");
      irreducableInfo->list = MapViewOfFile (irreducableInfo->objectHandle, FILE_MAP_READ, 0, 0, 0); 
#else // LINUX GCC22
      fseek(irreducableInfo->fileHandle, 0, SEEK_END);
      bytes = ftell(irreducableInfo->fileHandle);
      fseek(irreducableInfo->fileHandle, 0, SEEK_SET);
      irreducableInfo->count = bytes / sizeof (irreducableInfo->list [0]);
      irreducableInfo->objectHandle = irreducableInfo->fileHandle;
      irreducableInfo->list = (UINT32*) malloc(irreducableInfo->count * sizeof(irreducableInfo->list[0]));
      fread(irreducableInfo->list, sizeof(irreducableInfo->list[0]), irreducableInfo->count, irreducableInfo->fileHandle);
#endif
      }

   irreducableInfo->initialized++;
   }

//----------------------------------------------------------------------------

void freeIrreducables (IRREDUCABLEINFO *irreducableInfo)
   {
   if (irreducableInfo->objectHandle)
      {
#if defined WIN64 || defined WIN32
      UnmapViewOfFile (irreducableInfo->list);
      CloseHandle (irreducableInfo->objectHandle);
      CloseHandle (irreducableInfo->fileHandle);
#else // LINUX GCC22
      fclose(irreducableInfo->fileHandle); irreducableInfo->fileHandle=NULL;
      irreducableInfo->objectHandle=NULL;
      free(irreducableInfo->list);  irreducableInfo->list=NULL;
#endif
      }

   free (irreducableInfo);
   }

//----------------------------------------------------------------------------
//
// primitivityTest - return non-zero if the polynomial is primitive
//

unsigned primitivityTest (INTEGER *polynomial, unsigned polynomialDegree, DIVISOR_LIST *divisorList, unsigned displayMode, unsigned verbose)
   {
   // "quick" test to determine if the period might be 2^polynomialDegree-1
   // only guaranteed for prime 2**polynomialDegree-1, but quickly screens many others
   INTEGER  primitiveElement, power, result;

   power = IntegerZero;
   setbit (&power, polynomialDegree);
   primitiveElement = *polynomial;
   shiftRight (&primitiveElement, 1);
   modularPowerPolynomial (&primitiveElement, &power, polynomial, &result, polynomialDegree);
   if (memcmp (&result, &primitiveElement, sizeof (INTEGER)) == 0)
      {
      if (divisorList->count == 1)
         {
         if (!verbose) printf ("\n%s", polynomialText (polynomial, displayMode));
         else printf ("is primitive");
         return 1;
         }
      else
         {
         unsigned k;
         for (k = 0; k < divisorList->count; k++) 
            {
            power = divisorList->divisor [k];
            modularPowerPolynomial (&primitiveElement, &power, polynomial, &result, polynomialDegree);
            if (memcmp (&result, &IntegerOne, sizeof (INTEGER)) == 0)
               break;
            }
         if (k == divisorList->count)
            {
            if (!verbose) printf ("\n%s", polynomialText (polynomial, displayMode));
            else printf ("is primitive");
            return 1;
            }
         else if (verbose) printf ("fails on order test %u of %u", k + 1, divisorList->count);
         }
      }
   else
      if (verbose) printf ("fails initial test");
   return 0;
   }

//----------------------------------------------------------------------------
//
// findPrimitivePolynomials - search for primitive polymonials of the specified degree
//

void findPrimitivePolynomials (INTEGER *polynomial, IRREDUCABLEINFO *irreducableInfo, unsigned polynomialDegree, unsigned targetCount, unsigned polynomialWeight, unsigned displayMode, unsigned verbose)
   {
   DIVISOR_LIST divisorList;
   unsigned     factorizationAvailable = findFactors (polynomialDegree, &divisorList, verbose);
   unsigned     count = 0;

   // if no factorization is available, nothing can be done
   if (!factorizationAvailable) return;

   if (verbose) printf ("\n%s", polynomialText (polynomial, displayMode));
   for (;;)
      {
      unsigned isPrimitive;
      signed hsb;
      INTEGER temp = *polynomial;

      if (findSmallPolynomialFactor (&temp, irreducableInfo, polynomialDegree))
         {
         signed hsb;
         if (verbose) printf (" is reducable");
         nextPolynomial (polynomial, polynomialWeight, polynomialDegree);
         hsb = highestSetBit (polynomial);
         if ((unsigned) hsb == polynomialDegree + 1) break;
         if (hsb == -1) break;
         if (verbose) printf ("\n%s", polynomialText (polynomial, displayMode));
         continue;
         }
      else if (verbose) printf (" "); // display end of irreducability check

      isPrimitive = primitivityTest (polynomial, polynomialDegree, &divisorList, displayMode, verbose);
      count += isPrimitive;
      if (count >= targetCount) break;
      nextPolynomial (polynomial, polynomialWeight, polynomialDegree);
      hsb = highestSetBit (polynomial);
      if (hsb == -1) break;
      if ((unsigned) hsb == polynomialDegree + 1) break;
      if (verbose) printf ("\n%s", polynomialText (polynomial, displayMode));
      }
   free (divisorList.divisor);
   }

//----------------------------------------------------------------------------
//
// command line help goes here
//
void helpScreen (void)
   {
   printf("use ppsearch options\n\n");
   printf ("options: bits=d         set polynomial size to d (decimal) bits\n");
   printf ("         poly=0xnnnn    set initial polynomial (hex, ms bit not required)\n");
   printf ("         poly=nnnnb     set initial polynomial (binary, ms bit not required)\n");
   printf ("         poly=nnnno     set initial polynomial (octal, ms bit not required)\n");
   printf ("         poly=\"x^d+...\" set initial polynomial by non-zero coefficients & powers\n");
   printf ("         search=d       search for next d primitive polynomials\n");
   printf ("         weight=d       search for polynomials with d non-zero coefficients only\n");
   printf ("         binary         results in binary\n");
   printf ("         octal          results in octal\n");
   printf ("         hex            results in hex\n");
   printf ("         loop           when search completes, restart with bits+1\n");
   printf ("         verbose        show more details\n");
   exit (1);
   }

//----------------------------------------------------------------------------

int main (int argc, char *argv [])
   {
   INTEGER         polynomial = IntegerZero;
   unsigned        verbose = 0, loopMode = 0, polynomialWeight = 0;
   unsigned        displayMode = 0;
   unsigned        polynomialDegree = 0, targetCount = 1;
   IRREDUCABLEINFO *irreducableInfo;

   memset(IntegerOne.uintn, 0, sizeof(IntegerOne));
   memset(IntegerTwo.uintn, 0, sizeof(IntegerTwo));
   IntegerOne.uintn[0] = 1;
   IntegerTwo.uintn[0] = 2;

   if (argc == 1) helpScreen ();

#if defined WIN32 || defined WIN64
   SetPriorityClass (GetCurrentProcess (), IDLE_PRIORITY_CLASS);
#else // LINUX GCC22
   nice(-19);
#endif

   while (--argc)
      {
      char *position = argv [argc];
      
      if (strnicmp (position, "search=", 7) == 0)
         targetCount = atoi (position + 7);
      
      else if (strnicmp (position, "poly=x", 6) == 0)
         {
         unsigned degree;
         polynomial = IntegerZero;
         position += 5;
         while (*position)
            {
            unsigned power = 0;
            if (*position == '1')
               {
               power = 0;
               position++;
               }
            else if (tolower (*position) == 'x')
               {
               if (position [1] == '^')
                  {
                  position += 2;
                  power = atoi (position);
                  position += strspn (position, "0123456789");
                  }
               else
                  {
                  power = 1;
                  position++;
                  }
               }
            else logError ("polynomial syntax error (%s)\n", position);
            if (pullbit (&polynomial, power))
               logError ("duplicate polynomial power\n");
            setbit (&polynomial, power);

            position = skipWhiteSpace (position);
            if (*position == '+')
               {
               position = skipWhiteSpace (position + 1);
               continue;
               }
            if (*position == '\0') break;            
            logError ("polynomial syntax error (%s)\n", position);
            }
         degree = highestSetBit (&polynomial);
         if (!polynomialDegree)
            polynomialDegree = degree;
         else
            if (polynomialDegree != degree)
               logError ("bits= conflicts with poly=x^...\n");
         }
      
      // this polynomial entry mode handles only polynomials
      else if (strnicmp (position, "poly=", 5) == 0)
         scanDigits (position + 5, &polynomial, 0);
      
      else if (strnicmp (position, "verbose", 7) == 0)
         verbose++;
      
      else if (strnicmp (position, "loop", 4) == 0)
         loopMode++;
      
      else if (strnicmp (position, "binary", 6) == 0)
         displayMode = 'b';
      
      else if (strnicmp (position, "octal", 5) == 0)
         displayMode = 'o';
      
      else if (strnicmp (position, "hex", 3) == 0)
         displayMode = 'h';

      else if (strnicmp (position, "latex", 3) == 0)
         displayMode = 'l';
      
      else if (strnicmp (position, "weight=", 7) == 0)
         {
         polynomialWeight = atol (position + 7);
         if (polynomialWeight < 3) logError ("minumum weight is 3");
         }
      
      else if (strnicmp (position, "bits=", 5) == 0)
         polynomialDegree = atol (position + 5);
            
      else
         logError ("ERROR: invalid command line argument %s\n", position);
      }


   if (polynomialDegree >= MAXBITS) 
      logError ("this build is limited to %u bits\n", MAXBITS - 1);
   
   if (polynomialDegree == 0)
      logError ("specify either bits= or ploy=x^...\n");

   irreducableInfo = calloc (sizeof (irreducableInfo), 32);
   readIrreducablePolynomaials (irreducableInfo);

   // if no polynomial given, or just low bits, set ms bit
   setbit (&polynomial, 0);
   setbit (&polynomial, polynomialDegree);

   for (;;)
      {
      findPrimitivePolynomials (&polynomial, irreducableInfo, polynomialDegree, targetCount, polynomialWeight, displayMode, verbose);
      if (!loopMode) break;
      polynomialDegree++;
      if (polynomialDegree == MAXBITS) break;
      nextPolynomial (&polynomial, polynomialWeight, polynomialDegree);
      }
   printf("\n");

   freeIrreducables (irreducableInfo);
   return 0;
   }

//-------------------------end of file-----------------------------