/* Objective Modula-2 Compiler (objm2c)
 *
 *  objm2_reserved_words.c
 *  Minimal Perfect Hash for Objective Modula-2 reserved words
 *
 *  Author: Benjamin Kowarsch
 *
 *  Copyright (C) 2009 Sunrise Telephone Systems KK. All rights reserved.
 *
 *  License:
 *
 *  Permission is hereby granted to review and test this software for the sole
 *  purpose of supporting the effort by the licensor to define and develop the
 *  Objective Modula-2 language. It is not permissible under any circumstances
 *  to  use the software  for the purpose  of creating derivative languages or 
 *  dialects.  This permission is valid until 31 December 2009, 24:00h GMT.
 *
 *  Future licensing:
 *
 *  The licensor undertakes to eventually release this software under a proper
 *  open source license  AFTER  the Objective Modula-2 language definition has
 *  been finalised and a conforming and working reference compiler completed.
 *  
 *  Version history:
 *
 *   2.00   2009-01-31   BK   new file
 */


// ---------------------------------------------------------------------------
// ObjM2 project imports
// ---------------------------------------------------------------------------

#include "common_macros.h"
#include "objm2_hash.h"
#include "objm2_reserved_words.h"


// ---------------------------------------------------------------------------
// Base hash algorithm to use for minimal perfect hash
// ---------------------------------------------------------------------------
//
// The  minimal  perfect  hash  function has been designed to use the original
// stored hash algorithm  from  objm2_hash.c  as a base.  If the algorithm for
// stored hashes has been  replaced,  then the macros defined in  objm2_hash.h
// can  no longer be used for the calculation of MPH values.  In this case the
// MPH function will  need  to  use  both  algorithms,  the original algorithm
// to calculate MPH values for the table index,  and the replacement algorithm
// to calculate stored hashes for input string verification.
//
// Macro  OBJM2_HASH_ALGORITHM_IS_ORIGINAL  to be defined or left undefined in
// objm2_hash.c determines whether the  original algorithm  has been replaced. 
// If it is defined,  the original algorithm is in use,  otherwise not.
//
// The MPH function uses its own local macros to calculate the MPH values.  If
// macro  OBJM2_HASH_ALGORITHM_IS_ORIGINAL  is  defined,  these macros will be
// mapped to their respective counterparts defined in objm2_hash.h,  otherwise
// the  local macros  will be defined using the  original algorithm  and  both
// algorithms will then be used.

// if storage hash alogrithm is orignal, map to it
#ifdef OBJM2_HASH_ALGORITHM_IS_ORIGINAL
#define _OBJM2_MPH_INITIAL OBJM2_HASH_INITIAL
#define _OBJM2_MPH_NEXT_CHAR(_hash,_ch) OBJM2_HASH_NEXT_CHAR(_hash,_ch)
#define _OBJM2_MPH_FINAL(_hash) OBJM2_HASH_FINAL(_hash)
#endif

// if storage hash algorithm is not original, define locally
#ifndef OBJM2_HASH_ALGORITHM_IS_ORIGINAL
#define _OBJM2_MPH_INITIAL 0
#define _OBJM2_MPH_NEXT_CHAR(_hash,_ch) \
    (_ch + (_hash << 6) + (_hash << 16) - _hash)
#define _OBJM2_MPH_FINAL(_hash) (hash & 0x7FFFFFFF)
#endif


// ---------------------------------------------------------------------------
// Reserved word hash table
// ---------------------------------------------------------------------------

#define _add_reserved_word(_suffix, _hash) _hash,

static const cardinal objm2_reserved_word[] = {
0, // leading null-entry
// insert hashes from master table
#include "objm2_reserved_word_table.h"
0 // trailing null-entry
} /* end objm2_reserved_word */;

#undef _add_reserved_word


// ---------------------------------------------------------------------------
// MPH mapping table
// ---------------------------------------------------------------------------
//
// This table maps  intermediate  perfect hash codes to alphabetically ordered
// minimal perfect hash codes between 1 and OBJM2_NUMBER_OF_RESERVED_WORDS - 1,
// it further maps all invalid codes to OBJM2_NUMBER_OF_RESERVED_WORDS.

#define _N _OBJM2_NUMBER_OF_RESERVED_WORDS

static const char _objm2_mph_to_index_map[] = {
//            0    1    2    3    4    5    6    7    8    9
/*  00 */    _N,  21,   9,  10,  _N,  _N,  _N,  _N,  _N,  _N,
/*  10 */    50,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/*  20 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/*  30 */    _N,  47,  _N,  _N,  35,  _N,  _N,  _N,  _N,  54,
/*  40 */    23,  _N,  _N,   5,  _N,  _N,  _N,   7,  _N,  _N,
/*  50 */    _N,  _N,  30,  15,   8,  _N,  _N,  _N,  _N,  _N,
/*  60 */    _N,  _N,  26,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/*  70 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  13,  56,
/*  80 */    _N,  _N,  _N,  39,  _N,  19,   4,  _N,  _N,  _N,
/*  90 */    _N,  48,  _N,  _N,  _N,  _N,  44,  _N,  _N,  _N,
/* 100 */    _N,  _N,  33,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/* 110 */    34,  _N,  12,  _N,  36,  _N,  _N,  _N,  _N,  _N,
/* 120 */    57,  _N,  43,  _N,  _N,  _N,  41,  _N,  _N,  31,
/* 130 */    _N,  _N,  _N,  _N,  _N,  53,   3,  _N,  _N,  38,
/* 140 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  29,  _N,  _N,
/* 150 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/* 160 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  52,  _N,
/* 170 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/* 180 */    _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/* 190 */    37,  _N,  _N,  _N,  _N,  40,  20,  16,  32,  55,
/* 200 */    14,  _N,  _N,  _N,  _N,  27,  49,  _N,  _N,  _N,
/* 210 */    _N,  _N,  _N,  _N,   2,  18,  _N,  _N,  _N,  _N,
/* 220 */    _N,  11,  _N,  _N,  _N,  _N,  _N,  _N,  _N,  _N,
/* 230 */    51,  _N,  42,  _N,  45,  _N,  _N,  _N,  46,  _N,
/* 240 */    _N,  _N,  22,  17,  _N,  _N,  _N,   1,  _N,  28,
/* 250 */    25,   6,  _N,  _N,  24,  _N // last index is 255
}; // end _objm2_mph_to_index_map

#undef _N


// ---------------------------------------------------------------------------
// function:  objm2_hash_for_reserved_word(index)
// ---------------------------------------------------------------------------
//
// Returns the  stored hash value for the reserved word  associated with index
// <index>.  If <index> is  greater than  or equal to  the  number of reserved
// words as defined by OBJM2_NUMBER_OF_RESERVED_WORDS, then zero is returned.

cardinal objm2_hash_for_reserved_word(cardinal index) {

    if (index >= _OBJM2_NUMBER_OF_RESERVED_WORDS)
        return 0;
    else
        return objm2_reserved_word[index];

} // end objm2_hash_for_reserved_word


// ---------------------------------------------------------------------------
// function:  objm2_index_for_reserved_word(lexeme, hash)
// ---------------------------------------------------------------------------
//
// Returns the hash table index for the reserved word  represented by <lexeme>
// and <hash>.  If zero is passed for <hash>, the its value will be calculated
// from <lexeme>.  The function executes  slightly faster  if it does not need
// to calculate the storage hash value.
//
// Return values are between 1 and OBJM2_NUMBER_OF_RESERVED_WORDS - 1.  If the
// hash value passed  does not represent  a valid reserved word,  or,  if zero
// was passed for <hash>  and  the string passed in <lexeme>  is  not  a valid
// reserved word,  then  OBJM2_NUMBER_OF_RESERVED_WORDS  is returned.

cardinal objm2_index_for_reserved_word(const char *lexeme, cardinal hash) {
    int storage_hash = OBJM2_HASH_INITIAL;
    int index_hash = _OBJM2_MPH_INITIAL;
    int index = 0;

    if (lexeme == NULL)
        return _OBJM2_NUMBER_OF_RESERVED_WORDS;

    if (*lexeme == ASCII_NUL)
        return _OBJM2_NUMBER_OF_RESERVED_WORDS;
    
    // if zero was passed in for hash or algorithm has been replaced
    if ((OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0) || (hash == 0)) {
    
        // calculate the index hash from the lexeme
        while (lexeme[index] != 0) {
            if (index > OBJM2_MAX_KEYWORD_LENGTH)
                return _OBJM2_NUMBER_OF_KEYWORDS;
        
            storage_hash = OBJM2_HASH_NEXT_CHAR(storage_hash, lexeme[index]);
        
            #if (OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0)
            index_hash = _OBJM2_MPH_NEXT_CHAR(index_hash, lexeme[index]);
            #endif

            index++;
        } // end while
        
        storage_hash = OBJM2_HASH_FINAL(storage_hash);
        index_hash = _OBJM2_MPH_FINAL(index_hash);
        
        #if (OBJM2_HASH_ALGORITHM_IS_ORIGINAL == 0)
        index_hash = ((storage_hash - lexeme[0]) & 0x1FF);
        #else /* storage hash algorithm has been replaced */
        index_hash = ((index_hash - lexeme[0]) & 0x1FF);
        #endif
    }
    else /* neither zero was passed nor has the algorithm been replaced */ {
        // use the passed in hash
        storage_hash = hash;
        index_hash = ((hash - lexeme[0]) & 0x1FF);
    } // end if
    
    // finalise index hash
    index_hash = index_hash + (lexeme[0] & 1) + (lexeme[1] & 1);

    // resolve collision between AND and END
    if (index_hash == 247) {
        index_hash = index_hash - (lexeme[0] & 4);
    }
    // resolve collision between OPTIONAL and OR
    else if (index_hash == 114) {
        index_hash = index_hash - (lexeme[1]);
    } // end if

    // fold to fit all values into 255 cells
    if (index_hash > 255)
        index_hash = index_hash - 259;

    // retrieve index from mph-to-index mapping table
    index = _objm2_mph_to_index_map[index_hash];

    // compare calculated storage hash with stored hash
    if (storage_hash == objm2_keyword[index]) {
        // index is valid if they match
        return index;
    }
    else {
        // index is invalid if they don't
        return _OBJM2_NUMBER_OF_RESERVED_WORDS;
    } // end if
} // objm2_index_for_reserved_word


// END OF FILE