/*
This file is part of khmer, https://github.com/dib-lab/khmer/, and is
Copyright (C) 2014-2015, Michigan State University.
Copyright (C) 2015-2016, The Regents of the University of California.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.

    * Neither the name of the Michigan State University nor the names
      of its contributors may be used to endorse or promote products
      derived from this software without specific prior written
      permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LICENSE (END)

Contact: khmer-project@idyll.org
*/
#ifndef HLLCOUNTER_HH
#define HLLCOUNTER_HH

#include <string>
#include <vector>

#include "oxli.hh"
#include "read_parsers.hh"

namespace oxli
{

namespace read_parsers
{
    template<typename SeqIO> class ReadParser;
    class FastxReader;
}

class HLLCounter
{
public:
    HLLCounter(double error_rate, WordLength ksize);
    HLLCounter(int p, WordLength ksize);

    void add(const std::string &);
    unsigned int consume_string(const std::string &);
    template<typename SeqIO>
    void consume_seqfile(std::string const &,
                         bool,
                         unsigned int &,
                         unsigned long long &);
    template<typename SeqIO>
    void consume_seqfile(read_parsers::ReadParserPtr<SeqIO>&,
                         bool,
                         unsigned int &,
                         unsigned long long &);
    unsigned int check_and_process_read(std::string &,
                                        bool &);
    bool check_and_normalize_read(std::string &) const;
    uint64_t estimate_cardinality();
    void merge(HLLCounter &);
    virtual ~HLLCounter() {}

    double get_alpha()
    {
        return alpha;
    }
    int get_p()
    {
        return ncounters_log2;
    }
    int get_ncounters()
    {
        return ncounters;
    }
    void set_ksize(WordLength new_ksize);
    int get_ksize()
    {
        return _ksize;
    }
    std::vector<uint8_t> get_counters()
    {
        return counters;
    }
    void set_counters(std::vector<uint8_t> new_counters);
    double get_erate();
    void set_erate(double new_erate);
private:
    double _Ep();
    double alpha;
    int ncounters;
    int ncounters_log2;
    WordLength _ksize;
    std::vector<uint8_t> counters;

    void init(int, WordLength);
};

}

#ifdef __cplusplus
extern "C" {
#endif
/*
   For a description of following constants see
   HyperLogLog in Practice: Algorithmic Engineering of a State of The Art
      Cardinality Estimation Algorithm
   Stefan Heule, Marc Nunkesser and Alex Hall
   dx.doi.org/10.1145/2452376.2452456
*/
const int THRESHOLD_DATA[] = {
    10, 20, 40, 80, 220, 400, 900, 1800, 3100,
    6500, 11500, 20000, 50000, 120000, 350000
};


#ifdef __cplusplus
}
#endif


#endif // HLLCOUNTER_HH

// vim: set sts=2 sw=2:
