/*
 * TLSH is provided for use under two licenses: Apache OR BSD.
 * Users may opt to use either license depending on the license
 * restictions of the systems with which they plan to integrate
 * the TLSH code.
 */ 

/* ==============
 * Apache License
 * ==============
 * Copyright 2013 Trend Micro Incorporated
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* ===========
 * BSD License
 * ===========
 * Copyright (c) 2013, Trend Micro Incorporated
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.

 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "tlsh.h"
#include "tlsh_impl.h"
#include "tlsh_util.h"

#include <string>
#include <cassert>
#include <cstdio>
#include <cmath>
#include <algorithm>
#include <string.h>
#include <errno.h>

#define RANGE_LVALUE 256
#define RANGE_QRATIO 16

static void find_quartile(unsigned int *q1, unsigned int *q2, unsigned int *q3, const unsigned int * a_bucket);
static unsigned int partition(unsigned int * buf, unsigned int left, unsigned int right);

////////////////////////////////////////////////////////////////////////////////////////////////

TlshImpl::TlshImpl() : a_bucket(NULL), data_len(0), lsh_code(NULL), lsh_code_valid(false)
{
    memset(this->slide_window, 0, sizeof this->slide_window);
    memset(&this->lsh_bin, 0, sizeof this->lsh_bin);

    assert (sizeof (this->lsh_bin.Q.QR) == sizeof (this->lsh_bin.Q.QB));
}

TlshImpl::~TlshImpl()
{
    delete [] this->a_bucket;
    delete [] this->lsh_code;
}

void TlshImpl::reset()
{
    delete [] this->a_bucket; this->a_bucket = NULL;
    memset(this->slide_window, 0, sizeof this->slide_window);
    delete [] this->lsh_code; this->lsh_code = NULL; 
    memset(&this->lsh_bin, 0, sizeof this->lsh_bin);
    this->data_len = 0;
    this->lsh_code_valid = false;   
}

////////////////////////////////////////////////////////////////////////////////////////////

// Pearson's sample random table
static unsigned char v_table[256] = {
    1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
    14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
    110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
    25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
    97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
    174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
    132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
    119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
    138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
    170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
    125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
    118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
    27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
    233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
    140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
    51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209
};

// Pearson's algorithm
unsigned char b_mapping(unsigned char salt, unsigned char i, unsigned char j, unsigned char k) {
    unsigned char h = 0;
    
    h = v_table[h ^ salt];
    h = v_table[h ^ i];
    h = v_table[h ^ j];
    h = v_table[h ^ k];
    return h;
}

/*
NEVER USED - showing a step in the optimization sequence
unsigned char faster_b_mapping(unsigned char mod_salt, unsigned char i, unsigned char j, unsigned char k) {
    unsigned char h;
    
    h = v_table[mod_salt ^ i];
    h = v_table[h ^ j];
    h = v_table[h ^ k];
    return h;
}
*/

#define fast_b_mapping(ms,i,j,k) (v_table[ v_table[ v_table[ms^i] ^ j] ^ k ])

////////////////////////////////////////////////////////////////////////////////////////////

#if SLIDING_WND_SIZE==5
	#define SLIDING_WND_SIZE_M1	4
#elif SLIDING_WND_SIZE==4
	#define SLIDING_WND_SIZE_M1	3
#elif SLIDING_WND_SIZE==6
	#define SLIDING_WND_SIZE_M1	5
#elif SLIDING_WND_SIZE==7
	#define SLIDING_WND_SIZE_M1	6
#elif SLIDING_WND_SIZE==8
	#define SLIDING_WND_SIZE_M1	7
#endif

void TlshImpl::update(const unsigned char* data, unsigned int len) 
{
    if (this->lsh_code_valid) {
      fprintf(stderr, "call to update() on a tlsh that is already valid\n");
      return;
    }   

    #define RNG_SIZE    	SLIDING_WND_SIZE
    #define RNG_IDX(i)	((i+RNG_SIZE)%RNG_SIZE)
	
    unsigned int fed_len = this->data_len;

    if (this->a_bucket == NULL) {
        this->a_bucket = new unsigned int [BUCKETS];
        memset(this->a_bucket, 0, sizeof(int)*BUCKETS);
    }

#if SLIDING_WND_SIZE==5
    if (TLSH_CHECKSUM_LEN == 1) {
	fast_update(data, len);
	return;
    }
#endif
    int j = (int)(this->data_len % RNG_SIZE);

    for( unsigned int i=0; i<len; i++, fed_len++, j=RNG_IDX(j+1) ) {
        this->slide_window[j] = data[i];
        
        if ( fed_len >= SLIDING_WND_SIZE_M1 ) {
            //only calculate when input >= 5 bytes
            int j_1 = RNG_IDX(j-1);
            int j_2 = RNG_IDX(j-2);
            int j_3 = RNG_IDX(j-3);
#if SLIDING_WND_SIZE>=5
            int j_4 = RNG_IDX(j-4);
#endif
#if SLIDING_WND_SIZE>=6
            int j_5 = RNG_IDX(j-5);
#endif
#if SLIDING_WND_SIZE>=7
            int j_6 = RNG_IDX(j-6);
#endif
#if SLIDING_WND_SIZE>=8
            int j_7 = RNG_IDX(j-7);
#endif
           
#ifndef CHECKSUM_0B
            for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {
		if (k == 0) {
			//				 b_mapping(0, ... )
		 	this->lsh_bin.checksum[k] = fast_b_mapping(1, this->slide_window[j], this->slide_window[j_1], this->lsh_bin.checksum[k]);
		} else {
			// use calculated 1 byte checksums to expand the total checksum to 3 bytes
			this->lsh_bin.checksum[k] = b_mapping(this->lsh_bin.checksum[k-1], this->slide_window[j], this->slide_window[j_1], this->lsh_bin.checksum[k]);
		}
            }
#endif

            unsigned char r;
	    //	     b_mapping(2, ... )
	    r = fast_b_mapping(49, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_2]);
            this->a_bucket[r]++;
	    //	     b_mapping(3, ... )
	    r = fast_b_mapping(12, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_3]);
            this->a_bucket[r]++;
	    //	     b_mapping(5, ... )
	    r = fast_b_mapping(178, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_3]);
            this->a_bucket[r]++;
#if SLIDING_WND_SIZE>=5
	    //	     b_mapping(7, ... )
	    r = fast_b_mapping(166, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_4]);
            this->a_bucket[r]++;
	    //	     b_mapping(11, ... )
	    r = fast_b_mapping(84, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_4]);
            this->a_bucket[r]++;
	    //	     b_mapping(13, ... )
	    r = fast_b_mapping(230, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_4]);
            this->a_bucket[r]++;
#endif
#if SLIDING_WND_SIZE>=6
	    //	     b_mapping(17, ... )
	    r = fast_b_mapping(197, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_5]);
	    this->a_bucket[r]++;
	    //	     b_mapping(19, ... )
	    r = fast_b_mapping(181, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_5]);
	    this->a_bucket[r]++;
	    //	     b_mapping(23, ... )
	    r = fast_b_mapping(80, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_5]);
	    this->a_bucket[r]++;
	    //	     b_mapping(29, ... )
	    r = fast_b_mapping(142, this->slide_window[j], this->slide_window[j_4], this->slide_window[j_5]);
	    this->a_bucket[r]++;
#endif
#if SLIDING_WND_SIZE>=7
	    //	     b_mapping(31, ... )
	    r = fast_b_mapping(200, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_6]);
	    this->a_bucket[r]++;
	    //	     b_mapping(37, ... )
	    r = fast_b_mapping(253, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_6]);
	    this->a_bucket[r]++;
	    //	     b_mapping(41, ... )
	    r = fast_b_mapping(101, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_6]);
	    this->a_bucket[r]++;
	    //	     b_mapping(43, ... )
	    r = fast_b_mapping(18, this->slide_window[j], this->slide_window[j_4], this->slide_window[j_6]);
	    this->a_bucket[r]++;
	    //	     b_mapping(47, ... )
	    r = fast_b_mapping(222, this->slide_window[j], this->slide_window[j_5], this->slide_window[j_6]);
	    this->a_bucket[r]++;
#endif
#if SLIDING_WND_SIZE>=8
	    //	     b_mapping(53, ... )
	    r = fast_b_mapping(237, this->slide_window[j], this->slide_window[j_1], this->slide_window[j_7]);
	    this->a_bucket[r]++;
	    //	     b_mapping(59, ... )
	    r = fast_b_mapping(214, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_7]);
	    this->a_bucket[r]++;
	    //	     b_mapping(61, ... )
	    r = fast_b_mapping(227, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_7]);
	    this->a_bucket[r]++;
	    //	     b_mapping(67, ... )
	    r = fast_b_mapping(22, this->slide_window[j], this->slide_window[j_4], this->slide_window[j_7]);
	    this->a_bucket[r]++;
	    //	     b_mapping(71, ... )
	    r = fast_b_mapping(175, this->slide_window[j], this->slide_window[j_5], this->slide_window[j_7]);
	    this->a_bucket[r]++;
	    //	     b_mapping(73, ... )
	    r = fast_b_mapping(5, this->slide_window[j], this->slide_window[j_6], this->slide_window[j_7]);
	    this->a_bucket[r]++;
#endif
        }
    }
    this->data_len += len;
}

/////////////////////////////////////////////////////////////////////////////
// update for the case when SLIDING_WND_SIZE==5 && (TLSH_CHECKSUM_LEN == 1)
/////////////////////////////////////////////////////////////////////////////

void TlshImpl::fast_update(const unsigned char* data, unsigned int len) 
{
	unsigned int fed_len = this->data_len;
	int j = (int)(this->data_len % RNG_SIZE);
	unsigned char checksum = this->lsh_bin.checksum[0];

	for( unsigned int i=0; i<len;  ) {
		if ( fed_len >= SLIDING_WND_SIZE_M1 ) {
			//only calculate when input >= 5 bytes
			if ((i >= 4) && (i+5 < len)) {
				unsigned a0 = data[i-4];
				unsigned a1 = data[i-3];
				unsigned a2 = data[i-2];
				unsigned a3 = data[i-1];
				unsigned a4 = data[i];
				unsigned a5 = data[i+1];
				unsigned a6 = data[i+2];
				unsigned a7 = data[i+3];
				unsigned a8 = data[i+4];

				checksum = fast_b_mapping(1, a4, a3, checksum );
				this->a_bucket[ fast_b_mapping(49,  a4, a3, a2 ) ]++;
				this->a_bucket[ fast_b_mapping(12,  a4, a3, a1 ) ]++;
				this->a_bucket[ fast_b_mapping(178, a4, a2, a1 ) ]++;
				this->a_bucket[ fast_b_mapping(166, a4, a2, a0 ) ]++;
				this->a_bucket[ fast_b_mapping(84,  a4, a3, a0 ) ]++;
				this->a_bucket[ fast_b_mapping(230, a4, a1, a0 ) ]++;

				checksum = fast_b_mapping(1, a5, a4, checksum );
				this->a_bucket[ fast_b_mapping(49,  a5, a4, a3 ) ]++;
				this->a_bucket[ fast_b_mapping(12,  a5, a4, a2 ) ]++;
				this->a_bucket[ fast_b_mapping(178, a5, a3, a2 ) ]++;
				this->a_bucket[ fast_b_mapping(166, a5, a3, a1 ) ]++;
				this->a_bucket[ fast_b_mapping(84,  a5, a4, a1 ) ]++;
				this->a_bucket[ fast_b_mapping(230, a5, a2, a1 ) ]++;

				checksum = fast_b_mapping(1, a6, a5, checksum );
				this->a_bucket[ fast_b_mapping(49,  a6, a5, a4 ) ]++;
				this->a_bucket[ fast_b_mapping(12,  a6, a5, a3 ) ]++;
				this->a_bucket[ fast_b_mapping(178, a6, a4, a3 ) ]++;
				this->a_bucket[ fast_b_mapping(166, a6, a4, a2 ) ]++;
				this->a_bucket[ fast_b_mapping(84,  a6, a5, a2 ) ]++;
				this->a_bucket[ fast_b_mapping(230, a6, a3, a2 ) ]++;

				checksum = fast_b_mapping(1, a7, a6, checksum );
				this->a_bucket[ fast_b_mapping(49,  a7, a6, a5 ) ]++;
				this->a_bucket[ fast_b_mapping(12,  a7, a6, a4 ) ]++;
				this->a_bucket[ fast_b_mapping(178, a7, a5, a4 ) ]++;
				this->a_bucket[ fast_b_mapping(166, a7, a5, a3 ) ]++;
				this->a_bucket[ fast_b_mapping(84,  a7, a6, a3 ) ]++;
				this->a_bucket[ fast_b_mapping(230, a7, a4, a3 ) ]++;

				checksum = fast_b_mapping(1, a8, a7, checksum );
				this->a_bucket[ fast_b_mapping(49,  a8, a7, a6 ) ]++;
				this->a_bucket[ fast_b_mapping(12,  a8, a7, a5 ) ]++;
				this->a_bucket[ fast_b_mapping(178, a8, a6, a5 ) ]++;
				this->a_bucket[ fast_b_mapping(166, a8, a6, a4 ) ]++;
				this->a_bucket[ fast_b_mapping(84,  a8, a7, a4 ) ]++;
				this->a_bucket[ fast_b_mapping(230, a8, a5, a4 ) ]++;

				i=i+5;
				fed_len=fed_len+5;
				j=RNG_IDX(j+5);
			} else {
				this->slide_window[j] = data[i];
				int j_1 = RNG_IDX(j-1); if (i >= 1) { this->slide_window[j_1] = data[i-1]; }
				int j_2 = RNG_IDX(j-2); if (i >= 2) { this->slide_window[j_2] = data[i-2]; }
				int j_3 = RNG_IDX(j-3); if (i >= 3) { this->slide_window[j_3] = data[i-3]; }
				int j_4 = RNG_IDX(j-4); if (i >= 4) { this->slide_window[j_4] = data[i-4]; }

				checksum = fast_b_mapping(1, this->slide_window[j], this->slide_window[j_1], checksum );
				this->a_bucket[ fast_b_mapping(49,  this->slide_window[j], this->slide_window[j_1], this->slide_window[j_2] ) ]++;
				this->a_bucket[ fast_b_mapping(12,  this->slide_window[j], this->slide_window[j_1], this->slide_window[j_3] ) ]++;
				this->a_bucket[ fast_b_mapping(178, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_3] ) ]++;
				this->a_bucket[ fast_b_mapping(166, this->slide_window[j], this->slide_window[j_2], this->slide_window[j_4] ) ]++;
				this->a_bucket[ fast_b_mapping(84,  this->slide_window[j], this->slide_window[j_1], this->slide_window[j_4] ) ]++;
				this->a_bucket[ fast_b_mapping(230, this->slide_window[j], this->slide_window[j_3], this->slide_window[j_4] ) ]++;
				i++;
				fed_len++;
				j=RNG_IDX(j+1);
			}
		} else {
			i++;
			fed_len++;
			j=RNG_IDX(j+1);
		}
	}
	this->lsh_bin.checksum[0] = checksum;
	this->data_len += len;
}

/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////

/* to signal the class there is no more data to be added */
void TlshImpl::final(int fc_cons_option) 
{
    if (this->lsh_code_valid) {
      fprintf(stderr, "call to final() on a tlsh that is already valid\n");
      return;
    }   
    // incoming data must more than or equal to MIN_DATA_LENGTH bytes
    if ((fc_cons_option <= 1) && (this->data_len < MIN_DATA_LENGTH)) {
      // this->lsh_code be empty
      delete [] this->a_bucket; this->a_bucket = NULL;
      return;
    }
    if ((fc_cons_option == 2) && (this->data_len < MIN_CONSERVATIVE_DATA_LENGTH)) {
      // this->lsh_code be empty
      delete [] this->a_bucket; this->a_bucket = NULL;
      return;
    }

    unsigned int q1, q2, q3;
    find_quartile(&q1, &q2, &q3, this->a_bucket);

    // buckets must be more than 50% non-zero
    int nonzero = 0;
    for(unsigned int i=0; i<CODE_SIZE; i++) {
      for(unsigned int j=0; j<4; j++) {
        if (this->a_bucket[4*i + j] > 0) {
          nonzero++;
        }
      }
    }
#if defined BUCKETS_48
    if (nonzero < 18) {
      // printf("nonzero=%d\n", nonzero);
      delete [] this->a_bucket; this->a_bucket = NULL;
      return;
    }
#else
    if (nonzero <= 4*CODE_SIZE/2) {
      delete [] this->a_bucket; this->a_bucket = NULL;
      return;
    }
#endif
    
    for(unsigned int i=0; i<CODE_SIZE; i++) {
        unsigned char h=0;
        for(unsigned int j=0; j<4; j++) {
            unsigned int k = this->a_bucket[4*i + j];
            if( q3 < k ) {
                h += 3 << (j*2);  // leave the optimization j*2 = j<<1 or j*2 = j+j for compiler
            } else if( q2 < k ) {
                h += 2 << (j*2);
            } else if( q1 < k ) {
                h += 1 << (j*2);
            }
        }
        this->lsh_bin.tmp_code[i] = h;
    }

    //Done with a_bucket so deallocate
    delete [] this->a_bucket; this->a_bucket = NULL;
    
    this->lsh_bin.Lvalue = l_capturing(this->data_len);
    this->lsh_bin.Q.QR.Q1ratio = (unsigned int) ((float)(q1*100)/(float) q3) % 16;
    this->lsh_bin.Q.QR.Q2ratio = (unsigned int) ((float)(q2*100)/(float) q3) % 16;
    this->lsh_code_valid = true;   
}

int TlshImpl::fromTlshStr(const char* str)
{
    // Validate input string
    for( int i=0; i < TLSH_STRING_LEN; i++ )
        if (!( 
            (str[i] >= '0' && str[i] <= '9') || 
            (str[i] >= 'A' && str[i] <= 'F') ||
            (str[i] >= 'a' && str[i] <= 'f') ))
        {
            return 1;
        }

    this->reset();
    
    lsh_bin_struct tmp;
    from_hex( str, TLSH_STRING_LEN, (unsigned char*)&tmp );
    
    // Reconstruct checksum, Qrations & lvalue
    for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {    
        this->lsh_bin.checksum[k] = swap_byte(tmp.checksum[k]);
    }
    this->lsh_bin.Lvalue = swap_byte( tmp.Lvalue );
    this->lsh_bin.Q.QB = swap_byte(tmp.Q.QB);
    for( int i=0; i < CODE_SIZE; i++ ){
        this->lsh_bin.tmp_code[i] = (tmp.tmp_code[CODE_SIZE-1-i]);
    }
    this->lsh_code_valid = true;   

    return 0;
}

const char* TlshImpl::hash(char *buffer, unsigned int bufSize) const
{
    if (bufSize < TLSH_STRING_LEN + 1) {
        strncpy(buffer, "", bufSize);
        return buffer;
    }
    if (this->lsh_code_valid == false) {
        strncpy(buffer, "", bufSize);
        return buffer;
    }

    lsh_bin_struct tmp;
    for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {    
      tmp.checksum[k] = swap_byte( this->lsh_bin.checksum[k] );
    }
    tmp.Lvalue = swap_byte( this->lsh_bin.Lvalue );
    tmp.Q.QB = swap_byte( this->lsh_bin.Q.QB );
    for( int i=0; i < CODE_SIZE; i++ ){
        tmp.tmp_code[i] = (this->lsh_bin.tmp_code[CODE_SIZE-1-i]);
    }

    to_hex( (unsigned char*)&tmp, sizeof(tmp), buffer);
    return buffer;
}

/* to get the hex-encoded hash code */
const char* TlshImpl::hash() const
{
    if (this->lsh_code != NULL) {
        // lsh_code has been previously calculated, so just return it
        return this->lsh_code;
    }

    this->lsh_code = new char [TLSH_STRING_LEN+1];
    memset(this->lsh_code, 0, TLSH_STRING_LEN+1);
	
    return hash(this->lsh_code, TLSH_STRING_LEN+1);
}


// compare
int TlshImpl::compare(const TlshImpl& other) const
{
    return (memcmp( &(this->lsh_bin), &(other.lsh_bin), sizeof(this->lsh_bin)));
}

////////////////////////////////////////////
// the default for these parameters is 12
////////////////////////////////////////////

static int length_mult = 12;
static int qratio_mult = 12;

#ifdef TLSH_DISTANCE_PARAMETERS

       int hist_diff1_add = 1;
       int hist_diff2_add = 2;
       int hist_diff3_add = 6;

void set_tlsh_distance_parameters(int length_mult_value, int qratio_mult_value, int hist_diff1_add_value, int hist_diff2_add_value, int hist_diff3_add_value)
{
	if (length_mult_value != -1) {
		length_mult = length_mult_value;
	}
	if (qratio_mult_value != -1) {
		qratio_mult = qratio_mult_value;
	}
	if (hist_diff1_add_value != -1) {
		hist_diff1_add = hist_diff1_add_value;
	}
	if (hist_diff2_add_value != -1) {
		hist_diff2_add = hist_diff2_add_value;
	}
	if (hist_diff3_add_value != -1) {
		hist_diff3_add = hist_diff3_add_value;
	}
}
#endif

int TlshImpl::Lvalue()
{
	return(this->lsh_bin.Lvalue);
}
int TlshImpl::Q1ratio()
{
	return(this->lsh_bin.Q.QR.Q1ratio);
}
int TlshImpl::Q2ratio()
{
	return(this->lsh_bin.Q.QR.Q2ratio);
}
int TlshImpl::Checksum(int k)
{
	if ((k >= TLSH_CHECKSUM_LEN) || (k < 0)) {
		return(0);
	}
	return(this->lsh_bin.checksum[k]);
}
int TlshImpl::BucketValue(int bucket)
{
int idx;
int elem;
unsigned char bv;
//  default TLSH
//  #define EFF_BUCKETS         128
//  #define CODE_SIZE           32   // 128 * 2 bits = 32 bytes

	idx	= (CODE_SIZE - (bucket / 4)) - 1;
//	if ((idx < 0) || (idx >= CODE_SIZE)) {
//		printf("error in BucketValue: idx=%d\n", idx);
//		exit(1);
//	}
	elem	= bucket % 4;
	bv = this->lsh_bin.tmp_code[idx];
	int h1	= bv  / 16;
	int h2	= bv  % 16;
	int p1	= h1 / 4;
	int p2	= h1 % 4;
	int p3	= h2 / 4;
	int p4	= h2 % 4;
	if (elem == 0) {
		return(p1);
	}
	if (elem == 1) {
		return(p2);
	}
	if (elem == 2) {
		return(p3);
	}
	return(p4);
}

int TlshImpl::totalDiff(const TlshImpl& other, bool len_diff) const
{
    int diff = 0;
    
    if (len_diff) {
        int ldiff = mod_diff( this->lsh_bin.Lvalue, other.lsh_bin.Lvalue, RANGE_LVALUE);
        if ( ldiff == 0 )
            diff = 0;
        else if ( ldiff == 1 )
            diff = 1;
        else
           diff += ldiff*length_mult;
    }
    
    int q1diff = mod_diff( this->lsh_bin.Q.QR.Q1ratio, other.lsh_bin.Q.QR.Q1ratio, RANGE_QRATIO);
    if ( q1diff <= 1 )
        diff += q1diff;
    else           
        diff += (q1diff-1)*qratio_mult;
    
    int q2diff = mod_diff( this->lsh_bin.Q.QR.Q2ratio, other.lsh_bin.Q.QR.Q2ratio, RANGE_QRATIO);
    if ( q2diff <= 1)
        diff += q2diff;
    else
        diff += (q2diff-1)*qratio_mult;
    
    for (int k = 0; k < TLSH_CHECKSUM_LEN; k++) {    
      if (this->lsh_bin.checksum[k] != other.lsh_bin.checksum[k] ) {
        diff ++;
        break;
      }
    }
    
    diff += h_distance( CODE_SIZE, this->lsh_bin.tmp_code, other.lsh_bin.tmp_code );

    return (diff);
}



#define SWAP_UINT(x,y) do {\
    unsigned int int_tmp = (x);  \
    (x) = (y); \
    (y) = int_tmp; } while(0)

void find_quartile(unsigned int *q1, unsigned int *q2, unsigned int *q3, const unsigned int * a_bucket) 
{
    unsigned int bucket_copy[EFF_BUCKETS], short_cut_left[EFF_BUCKETS], short_cut_right[EFF_BUCKETS], spl=0, spr=0;
    unsigned int p1 = EFF_BUCKETS/4-1;
    unsigned int p2 = EFF_BUCKETS/2-1;
    unsigned int p3 = EFF_BUCKETS-EFF_BUCKETS/4-1;
    unsigned int end = EFF_BUCKETS-1;

    for(unsigned int i=0; i<=end; i++) {
        bucket_copy[i] = a_bucket[i];
    }

    for( unsigned int l=0, r=end; ; ) {
        unsigned int ret = partition( bucket_copy, l, r );
        if( ret > p2 ) {
            r = ret - 1;
            short_cut_right[spr] = ret;
            spr++;
        } else if( ret < p2 ){
            l = ret + 1;
            short_cut_left[spl] = ret;
            spl++;
        } else {
            *q2 = bucket_copy[p2];
            break;
        }
    }
    
    short_cut_left[spl] = p2-1;
    short_cut_right[spr] = p2+1;

    for( unsigned int i=0, l=0; i<=spl; i++ ) {
        unsigned int r = short_cut_left[i];
        if( r > p1 ) {
            for( ; ; ) {
                unsigned int ret = partition( bucket_copy, l, r );
                if( ret > p1 ) {
                    r = ret-1;
                } else if( ret < p1 ) {
                    l = ret+1;
                } else {
                    *q1 = bucket_copy[p1];
                    break;
                }
            }
            break;
        } else if( r < p1 ) {
            l = r;
        } else {
            *q1 = bucket_copy[p1];
            break;
        }
    }

    for( unsigned int i=0, r=end; i<=spr; i++ ) {
        unsigned int l = short_cut_right[i];
        if( l < p3 ) {
            for( ; ; ) {
                unsigned int ret = partition( bucket_copy, l, r );
                if( ret > p3 ) {
                    r = ret-1;
                } else if( ret < p3 ) {
                    l = ret+1;
                } else {
                    *q3 = bucket_copy[p3];
                    break;
                }
            }
            break;
        } else if( l > p3 ) {
            r = l;
        } else {
            *q3 = bucket_copy[p3];
            break;
        }
    }

}

unsigned int partition(unsigned int * buf, unsigned int left, unsigned int right) 
{
    if( left == right ) {
        return left;
    }
    if( left+1 == right ) {
        if( buf[left] > buf[right] ) {
            SWAP_UINT( buf[left], buf[right] );
        }
        return left;
    }
        
    unsigned int ret = left, pivot = (left + right)>>1;
    
    unsigned int val = buf[pivot];
    
    buf[pivot] = buf[right];
    buf[right] = val;
    
    for( unsigned int i = left; i < right; i++ ) {
        if( buf[i] < val ) {
            SWAP_UINT( buf[ret], buf[i] );
            ret++;
        }
    }
    buf[right] = buf[ret];
    buf[ret] = val;
    
    return ret;
}


