//----------------------------------------------------------------------------
//
// bc - a Beal conjecture counterexample search utility
//      copyright 2013 Scott Duplichan
//      This program is free software: you can redistribute it and/or modify
//      it under the terms of the GNU General Public License as published by
//      the Free Software Foundation, either version 3 of the License, or
//      (at your option) any later version.
//
//----------------------------------------------------------------------------
#include "project.h"
//---------------------------------------------------------------------------
// modulo arithmetic functions, supports modulo to 32-bits, operands to 64 bits
//---------------------------------------------------------------------------

static uint32_t moduloReduce (uint32_t modulus, uint64_t value)
    {
    value %= modulus;
    return value;
    }

//---------------------------------------------------------------------------

static uint32_t modmul (uint32_t modulus, uint64_t value1, uint64_t value2)
    {
    return moduloReduce (modulus, value1 * value2);
    }

//---------------------------------------------------------------------------

static uint32_t modpower (uint32_t modulus, uint32_t base, uint32_t exponent)
    {
    uint32_t result = 1;

    while (exponent--) result = modmul (modulus, result, base);
    return result;
    }

//----------------------------------------------------------------------------

static void setBit (void *bitMap, uint64_t bitNumber)
    {
    uint32_t *table = bitMap;
    uint64_t index = bitNumber >> 5;
    uint32_t bit = bitNumber & 0x1F;
    table [index] |= 1u << bit;
    }

//----------------------------------------------------------------------------
//
// Thread main loop
//
static char *workerThread (THREAD_STRUCT *threadStruct)
    {
    int a, searches;
    uint64_t elapsed;

    for (;;)
        {
        // completion check: when no more sections are available, the thread returns
        if (threadStruct->a > threadStruct->maxBase)
            {
            // Increment global completion counter. If completion count indicates this
            // is the last thread to complete, signal the dispatcher before returning.
            _InterlockedIncrement ((volatile long *) &threadStruct->completions);
            if (threadStruct->completions == threadStruct->threadCount)
                {
                BOOL success = SetEvent (threadStruct->allThreadsComplete);
                if (!success) return formatMessage ("SetEvent: %s", winErrorText (0));
                }
            return NULL;
            }

        // grab the next 'a' value for processing by this thread
        a = _InterlockedIncrement ((volatile long *) &threadStruct->a);
        a--;
        searches = bealSearchCore (a, threadStruct);
        elapsed = queryPerformanceCounter () - threadStruct->progStartTime;
        if (!threadStruct->quiet)
            {
            WaitForSingleObject (threadStruct->printMutex, INFINITE);
            printf ("a=%-5u searches=%u %s\n", a, searches, elapsedTimeText ((double) elapsed / queryPerformanceFrequency ()));
            ReleaseMutex (threadStruct->printMutex);
            }
        }
    
    return NULL;
    }

//----------------------------------------------------------------------------
//
// Thread entry point: get arguments and pass to thread processing function
//
static unsigned int __stdcall workerThreadEntry (void *arg)
    {
    char *error;
    THREAD_STRUCT *threadStruct = arg;

    error = workerThread (threadStruct);
    if (error) printf ("%s\n", error);

    return 0;
    }

//----------------------------------------------------------------------------
//
// command line help goes here
//
char *helpScreen (void)
   {
   printf ("\nbeal counterexample search 1.0, copyright 2013 Scott Duplichan\n");
   printf ("searches for a^x + b^y = c^z where a,b,c have no common factor\n");
   printf ("use bc [options]\n\n");
   printf ("options:\n");
   printf ("   threads=    number of threads to use default=0 (one per core)\n");
   printf ("   minbase=    starting value for a,b,c default=1\n");
   printf ("   maxbase=    ending value for a,b,c   default=100\n");
   printf ("   minpower=   starting value for x,y,z default=3\n");
   printf ("   maxpower=   ending value for x,y,z   default=100\n");
   printf ("   high        run at high priority     default=low\n");
   printf ("   verbose     log cases that pass first screen\n");
   printf ("   quiet       log passing cases only\n");
   return NULL;
   }

//----------------------------------------------------------------------------
// main entry point for search

// czTable - a C^Z lookup table for all values of C and Z to test. The lookup table
//           keeps only a modulo of C^Z.
// bitMap  - array of bits for every possible modulo value. A bit is set if the
//           corresponding modulo value is present in czTable. The same modulo
//           value may occur multiple times in czTable.
// modulus - divisor for modulo operations.
// 

char *runMain (int argc, char* argv [])
    {
    uint32_t power, base, modBits, czTableElements, czTableSize;
    uint32_t *bitMap1, *bitMap2, *bitMap3, *czTable;
    uint32_t *bitMap1c, *bitMap2c, *bitMap3c;
    int index, argCount, newPriority, threads;
    THREAD_STRUCT *threadStruct;
    DWORD result, priority;
    uint32_t minBase, maxBase, minPower, maxPower, highPriority, quiet, verbose;
    uint64_t elapsed;

    // defaults for command line options
    minBase = 1;
    maxBase = 100;
    minPower = 3;
    maxPower = 100;
    modBits = 32;
    threads = 0;
    highPriority = 0;
    quiet = 0;
    verbose = 0;

    if (argc == 1) return helpScreen ();
    argCount = argc;
    while (--argCount)
        {
        char *position = argv [argCount];
        if (memcmp (position, "threads=", 8) == 0)
            threads = strtoul (position + 8, NULL, 0);
        else if (strcmp (position, "high") == 0)
            highPriority = 1;
        else if (strcmp (position, "quiet") == 0)
            quiet = 1;
        else if (strcmp (position, "verbose") == 0)
            verbose = 1;
        else if (memcmp (position, "minbase=", 8) == 0)
            minBase = strtoul (position + 8, NULL, 0);
        else if (memcmp (position, "maxbase=", 8) == 0)
            maxBase = strtoul (position + 8, NULL, 0);
        else if (memcmp (position, "minpower=", 9) == 0)
            minPower = strtoul (position + 9, NULL, 0);
        else if (memcmp (position, "maxpower=", 9) == 0)
            maxPower = strtoul (position + 9, NULL, 0);
        }

    threadStruct = calloc (1, sizeof *threadStruct);
    if (!threadStruct) return "memory allocation failed";

    if (threads == 0) threads = atoi (getenv ("NUMBER_OF_PROCESSORS"));
    threadStruct->threadCount = threads;
    threadStruct->allThreadsComplete = CreateEvent (NULL, FALSE, FALSE, NULL);
    if (!threadStruct->allThreadsComplete) return formatMessage ("CreateEvent: %s", winErrorText (0));

    czTableElements = (maxBase + 1) * (maxPower + 1);
    czTableSize = czTableElements * sizeof czTable [0];

    czTable  = calloc (1, czTableSize);
    bitMap1  = calloc (1, 1ull << (modBits - 3));
    bitMap2  = calloc (1, 1ull << (modBits - 3));
    bitMap3  = calloc (1, 1ull << (modBits - 3));
    bitMap1c = calloc (1, 1ull << (modBits - 3 - bitMapCompress));
    bitMap2c = calloc (1, 1ull << (modBits - 3 - bitMapCompress));
    bitMap3c = calloc (1, 1ull << (modBits - 3 - bitMapCompress));
    if (!czTable || !bitMap1 || !bitMap2 || !bitMap3 || !bitMap1c || !bitMap2c || !bitMap3c) return "alloc failed";
   
    for (base = minBase; base <= maxBase; base++)
        {
        uint32_t cz1 = modpower (modulus1, base, minPower);
        uint32_t cz2 = modpower (modulus2, base, minPower);
        uint32_t cz3 = modpower (modulus3, base, minPower);
        for (power = minPower; power <= maxPower; power++)
            {
            setBit (bitMap1, cz1);
            setBit (bitMap2, cz2);
            setBit (bitMap3, cz3);
            setBit (bitMap1c, cz1 >> bitMapCompress);
            setBit (bitMap2c, cz2 >> bitMapCompress);
            setBit (bitMap3c, cz3 >> bitMapCompress);
            czTable [base * maxPower + power] = cz1;
            cz1 = modmul (modulus1, cz1, base);
            cz2 = modmul (modulus2, cz2, base);
            cz3 = modmul (modulus3, cz3, base);
            }
        }

    // lower our priority (default) or raise (command line option)
    priority = GetPriorityClass (GetCurrentProcess ());
    newPriority = BELOW_NORMAL_PRIORITY_CLASS;
    if (highPriority) newPriority = ABOVE_NORMAL_PRIORITY_CLASS;
    SetPriorityClass (GetCurrentProcess (), newPriority);

    threadStruct->progStartTime = queryPerformanceCounter ();
    threadStruct->minPower = minPower;
    threadStruct->minBase = minBase;
    threadStruct->maxBase = maxBase;
    threadStruct->maxPower = maxPower;
    threadStruct->czTable = czTable;
    threadStruct->bitMap1 = bitMap1;
    threadStruct->bitMap2 = bitMap2;
    threadStruct->bitMap3 = bitMap3;
    threadStruct->bitMap1c = bitMap1c;
    threadStruct->bitMap2c = bitMap2c;
    threadStruct->bitMap3c = bitMap3c;
    threadStruct->quiet = quiet;
    threadStruct->verbose = verbose;
    threadStruct->a = minBase;
    threadStruct->printMutex = CreateMutex (NULL, FALSE, NULL);
    if (threadStruct->printMutex == NULL) return formatMessage ("CreateMutex: %s", winErrorText (0));

    // run the search
    for (index = 0; index < threads; index++)
        {
        uintptr_t handle = _beginthreadex (NULL, 0, &workerThreadEntry, threadStruct, 0, NULL);
        if (!handle) return formatMessage ("_beginthreadex: %s", winErrorText (0));
        }

    // wait for completion
    result = WaitForSingleObject (threadStruct->allThreadsComplete, INFINITE);
    if (result == 0xFFFFFFFF) return formatMessage ("WaitForSingleObject: %s", winErrorText (0));
    if (result != WAIT_OBJECT_0) return formatMessage ("WaitForSingleObject: %Xh", result);

    // restore priority
    SetPriorityClass (GetCurrentProcess (), priority);

    elapsed = queryPerformanceCounter () - threadStruct->progStartTime;
    printf ("%s candidates checked, %s pass first screen, %s pass second screen\n",
        commanumber (threadStruct->candidates),
        commanumber (threadStruct->searches),
        commanumber (threadStruct->passes));
    printf ("elapsed time: %s\n", elapsedTimeText ((double) elapsed / queryPerformanceFrequency ()));
    free (czTable);
    free (bitMap1);
    free (bitMap2);
    free (bitMap3);
    free (bitMap1c);
    free (bitMap2c);
    free (bitMap3c);
    free (threadStruct);
    return 0;
    }

//----------------------------------------------------------------------------

int main (int argc, char *argv [])
   {
   char *error = runMain (argc, argv);
   if (error) return printf ("%s\n", error), 1;
   return 0;
   }

//---------------------------------------------------------------------------
