/*
 *  Copyright (c) 2012 Shirou Maruyama
 * 
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 * 
 *   1. Redistributions of source code must retain the above Copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above Copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 *   3. Neither the name of the authors nor the names of its contributors
 *      may be used to endorse or promote products derived from this
 *      software without specific prior written permission.
 */

#include <vector>
#include <string>
#include <iostream>
#include <algorithm>
#include <stdint.h>
#include "HufWatTree.hpp"

using namespace std;

namespace cpi00 {

  void HufWatTree::Build(vector<uint8_t>& str) {
    Size_ = str.size();
    vector<uint64_t> count(kMaxAlphabetSize, 0);
    for (vector<uint8_t>::iterator itr = str.begin();
         itr != str.end(); ++itr) {
      ++count[*itr];
    }
    for (uint64_t i = 0; i < kMaxAlphabetSize; ++i) {
      if (count[i] == 0) {
        count[i] = UINT64_MAX;
      }
    }
    vector<uint64_t>(kMaxAlphabetSize, kMaxAlphabetSize).swap(Mapping_);
    vector<uint64_t> mapped_count;
    for (uint64_t i = 0; i < kMaxAlphabetSize; ++i) {
      vector<uint64_t>::iterator min_itr = 
        min_element(count.begin(), count.end());
      if (*min_itr == UINT64_MAX) break;
      uint64_t min_idx = min_itr - count.begin();
      Mapping_[min_idx] = i;
      RMapping_.push_back(min_idx);
      mapped_count.push_back(*min_itr);
      *min_itr = UINT64_MAX;
    }
    for (vector<uint8_t>::iterator itr = str.begin();
         itr != str.end(); ++itr) {
      *itr = Mapping_[*itr];
    }
    BuildWatTree(str, mapped_count);
  }

  void HufWatTree::Build(const string& str) {
    vector<uint8_t> str_uint8_t;
    for (string::const_iterator itr = str.begin();
         itr != str.end(); ++itr) {
      str_uint8_t.push_back(static_cast<uint8_t>(*itr));
    }
    Build(str_uint8_t);
  }

  void HufWatTree::InitNode(const uint64_t target, 
                            const uint64_t left, const uint64_t right) {
    Parent_[target]           = UINT64_MAX;
    Children_[2 * target]     = left;
    Children_[2 * target + 1] = right;
    Parent_[left]             = target;
    Parent_[right]            = target;
  }

  bool HufWatTree::IsLeaf(uint64_t node_id) const {
    if (node_id >= NumAlpha_ - 1) return true;
    else return false;
  }

  void HufWatTree::BuildHufTree(const vector<uint64_t>& count) {
    vector<uint64_t> node_weight(2 * NumAlpha_ - 1, UINT64_MAX);
    for (uint64_t i = 0; i != count.size(); ++i) {
      node_weight[NumAlpha_ - 1 + i] = count[i];
    }
    for (uint64_t i = NumAlpha_ - 2;;) {
      vector<uint64_t>::iterator left_itr = 
        min_element(node_weight.begin(), node_weight.end());
      uint64_t num = *left_itr;
      uint64_t left_idx = left_itr - node_weight.begin();
      *left_itr = UINT64_MAX;
      vector<uint64_t>::iterator right_itr = 
        min_element(node_weight.begin(), node_weight.end());
      num += *right_itr;
      uint64_t right_idx = right_itr - node_weight.begin();
      *right_itr = UINT64_MAX;
      InitNode(i, left_idx, right_idx);
      node_weight[i] = num;
      if (i-- == 0) break;
    }
  }

  void HufWatTree::SetCodes() {
    for (uint64_t i = NumAlpha_ - 1; i < 2 * NumAlpha_ - 1; ++i) {
      uint64_t code = 0;
      uint64_t v, u = i; 
      while ((v = Parent_[u]) != UINT64_MAX) { 
        code = code << 1;
        if (u == Children_[2 * v + 1]) {
          code |= 1;
        }
        u = v;
       }
       Codes_[i - (NumAlpha_ - 1)] = code;
    }
  }

  void HufWatTree::Insert(const uint64_t code) {
    uint64_t node  = 0;
    uint64_t depth = 0;
    uint64_t c = Codes_[code];
    while (!IsLeaf(node)) {
      uint64_t bit = ((c >> depth) & 1);
      BitVectors_[node].PushBack(bit);
      if (bit == 1)
        node = Children_[node * 2 + 1];
      else
        node = Children_[node * 2];
      ++depth;
    }
  }

  void HufWatTree::SetBitVectors(const vector<uint8_t>& str) {
    for (vector<uint8_t>::const_iterator itr = str.begin();
         itr != str.end(); ++itr) {
      Insert(*itr);
    }
  }

  void HufWatTree::BuildWatTree(const vector<uint8_t>& str,
                                const vector<uint64_t>& count) {
    NumAlpha_ = RMapping_.size();
    vector<uint64_t>(NumAlpha_, UINT64_MAX).swap(Codes_);
    vector<uint64_t>(2 *  NumAlpha_ - 1,  0).swap(Parent_);
    vector<uint64_t>(2 * (NumAlpha_ - 1), 0).swap(Children_);
    vector<BitVec>(NumAlpha_ - 1).swap(BitVectors_);
    BuildHufTree(count);
    SetCodes();
    SetBitVectors(str);
  }

  uint64_t HufWatTree::Access(const uint64_t pos) const {
    if (pos >= Size_) return UINT64_MAX;
    uint64_t node = 0;
    uint64_t i    = pos;
    while (!IsLeaf(node)) {
      uint64_t bit = BitVectors_[node].GetBit(i);
      if (bit == 0)
        i = BitVectors_[node].Rank0(i);
      else
        i = BitVectors_[node].Rank1(i);
      node = Children_[2 * node + bit];
    }
    return RMapping_[node - (NumAlpha_ - 1)];
  }

  uint64_t HufWatTree::Rank(const uint64_t code, 
                            const uint64_t pos) const {
    if (code >= kMaxAlphabetSize || 
        code >= Mapping_.size()) return 0;
    uint64_t mapped_code = Mapping_[code];
    if (mapped_code == kMaxAlphabetSize) return 0;
    uint64_t encode = Codes_[mapped_code];
    uint64_t node = 0;
    uint64_t i    = pos;
    for (uint64_t depth = 0;; ++depth) {
      uint64_t bit = ((encode >> depth) & 1);
      if (bit == 0)
        i = BitVectors_[node].Rank0(i+1);
      else
        i = BitVectors_[node].Rank1(i+1);
      node = Children_[2 * node + bit];
      if (i == 0 || IsLeaf(node)) break;
      --i;
    }
    return i;
  }

  uint64_t HufWatTree::Size() const {
    return Size_;
  }

  void HufWatTree::Clear() {
    NumAlpha_ = 0;
    Size_     = 0;
    vector<uint64_t>().swap(Parent_);
    vector<uint64_t>().swap(Children_);
    vector<uint64_t>().swap(Mapping_);
    vector<uint64_t>().swap(RMapping_);
    vector<BitVec>().swap(BitVectors_);
  }

  uint64_t HufWatTree::Read(istream& is) {
    is.read((char*)&NumAlpha_, sizeof(NumAlpha_));
    is.read((char*)&Size_,     sizeof(Size_));
    if (Size_ == 0) return 0;
    vector<uint64_t>(2 * NumAlpha_ - 1).swap(Parent_);
    is.read((char*)&Parent_[0],   sizeof(Parent_[0])   * Parent_.size());
    vector<uint64_t>(2 * (NumAlpha_ - 1)).swap(Children_);
    is.read((char*)&Children_[0], sizeof(Children_[0]) * Children_.size());
    vector<uint64_t>(kMaxAlphabetSize).swap(Mapping_);
    is.read((char*)&Mapping_[0],  sizeof(Mapping_[0])  * Mapping_.size());
    vector<uint64_t>(NumAlpha_).swap(RMapping_);
    is.read((char*)&RMapping_[0], sizeof(RMapping_[0]) * RMapping_.size());
    vector<uint64_t>(NumAlpha_).swap(Codes_);
    is.read((char*)&Codes_[0],    sizeof(Codes_[0])    * Codes_.size());
    vector<BitVec>(NumAlpha_ - 1).swap(BitVectors_);
    uint64_t bytes = 0;
    for (vector<BitVec>::iterator itr = BitVectors_.begin();
         itr != BitVectors_.end(); ++itr) {
      bytes += (*itr).Read(is);
    }
    bytes += sizeof(Parent_[0])   * Parent_.size();
    bytes += sizeof(Children_[0]) * Children_.size();
    bytes += sizeof(Mapping_[0])  * Mapping_.size();
    bytes += sizeof(RMapping_[0]) * RMapping_.size();
    bytes += sizeof(Codes_[0])    * Codes_.size();
    return bytes;
  }

  uint64_t HufWatTree::Write(ostream& os) const {
    os.write((const char*)&NumAlpha_, sizeof(NumAlpha_));
    os.write((const char*)&Size_,     sizeof(Size_));
    if (Size_ == 0) return 0;
    os.write((const char*)&Parent_[0],   sizeof(Parent_[0])   * Parent_.size());
    os.write((const char*)&Children_[0], sizeof(Children_[0]) * Children_.size());
    os.write((const char*)&Mapping_[0],  sizeof(Mapping_[0])  * Mapping_.size());
    os.write((const char*)&RMapping_[0], sizeof(RMapping_[0]) * RMapping_.size());
    os.write((const char*)&Codes_[0],    sizeof(Codes_[0])    * Codes_.size());
    uint64_t bytes = 0;
    for (vector<BitVec>::const_iterator itr = BitVectors_.begin();
         itr != BitVectors_.end(); ++itr) {
      bytes += (*itr).Write(os);
    }
    bytes += sizeof(Parent_[0])   * Parent_.size();
    bytes += sizeof(Children_[0]) * Children_.size();
    bytes += sizeof(Mapping_[0])  * Mapping_.size();
    bytes += sizeof(RMapping_[0]) * RMapping_.size();
    bytes += sizeof(Codes_[0])    * Codes_.size();
    return bytes;
  }

} // namespace cpi00
