/*
 * 
 *  Copyright (c) 2008, 2011, 2012 David Hernandez, Patrice Francois, Jacques Schrenzel
 * 
 *  This file is part of EDENA.
 *
 *  EDENA is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  EDENA is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with EDENA.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <algorithm>
#include <iomanip>

#include "stat.h"

void weightedHistogram(vector<int> &samples, vector<int> &weights, ostream &out)
{
    int nBins;
    int maxBin = 0;
    int nEchant = samples.size();
    if (nEchant == 0)
        return;
    if (nEchant != weights.size())
        return;
    
    int minEchant = samples[0], maxEchant=samples[0];
    double mean = 0;
    unsigned int sum = 0;
    double weightedSum=0.0;
    
    for (size_t i=0; i<samples.size(); i++)
    {
        if (samples[i] > maxEchant)
            maxEchant = samples[i];
        if (samples[i] < minEchant)
            minEchant = samples[i];
        
        sum+=samples[i];
        weightedSum+= (samples[i]*weights[i]);        
    }
    maxEchant=2500;
    //sturges rule for the number of bins
    double val = 1 + (log((double) nEchant) / log(2.0));
    nBins = (int) val;
    if (val - (int) val >= 0.5)
        nBins++;

    vector<int> bar;
    bar.assign(nBins,0);
    double inter = (double) (maxEchant - minEchant) / nBins;
    
    int b = 0;

    for (size_t i=0; i<samples.size(); i++)
    {
        if (inter == 0)
            b = 0;
        else
            b = (int) ((samples[i] - minEchant) / inter);

        if (b == nBins)
            b--;
        
        if (b > nBins)
            continue;
        bar[b]+= weights[i];
    }
    
    int nInterTot = b > nBins ? b : nBins;

    for (int i = 0; i < nBins; i++)
        if (bar[i] > maxBin)
            maxBin = bar[i];

    float prop = 35.0 / maxBin;


    if (nInterTot > 2) {
        for (int i = 0; i < nInterTot; i++) {

            if (i == 0) // min
            {
                out << "min ";
            }
            else if (i == nBins - 1) // max
            {
                out << "max ";
            }
            else {
                out << "    ";
            }

            out << "   |";

            if (i < nBins) {
                for (int y = 0; y < (int) ((double) bar[i] * prop); y++)
                    out << "*";
            }

            out << endl;
        }
    }
    
}

void histogram(vector<int>::iterator start, vector<int>::iterator end, ostream &out) {

    int nBins;
    int maxBin = 0;
    int nEchant = end - start;
    if (nEchant == 0)
        return;
    int minEchant = *start, maxEchant = *start;
    double mean = 0;
    unsigned int sum = 0, N50 = 0;
    unsigned int cumul = 0;

    sort(start, end);

    for (vector<int>::iterator iIt = start; iIt != end; iIt++) {

        if (*iIt < minEchant)
            minEchant = *iIt;
        if (*iIt > maxEchant)
            maxEchant = *iIt;

        sum += *iIt;
    }

    mean = (double) sum / nEchant;
    //sd = sqrt( (1.0/(nEchant-1))*(sd-nEchant*mean*mean) );


    for (vector<int>::iterator iIt = start; iIt != end; iIt++) {
        cumul += *iIt;
        if (cumul > sum / 2) {
            N50 = *iIt;
            break;
        }
    }

    //sturges rule for the number of bins
    double val = 1 + (log((double) nEchant) / log(2.0));
    nBins = (int) val;
    if (val - (int) val >= 0.5)
        nBins++;

    int *bar = new int[nBins];
    memset(bar, 0, sizeof (int) *nBins);
    double inter = (double) (maxEchant - minEchant) / nBins;

    int b = 0;

    for (vector<int>::iterator iIt = start; iIt != end; iIt++) {
        if (inter == 0)
            b = 0;
        else
            b = (int) ((*iIt - minEchant) / inter);

        if (b == nBins)
            b--;
        bar[b]++;
    }

    int nInterTot = b > nBins ? b : nBins;

    for (int i = 0; i < nBins; i++)
        if (bar[i] > maxBin)
            maxBin = bar[i];

    float prop = 25.0 / maxBin;


    if (nInterTot > 2) {
        for (int i = 0; i < nInterTot; i++) {

            if (i == 0) // min
            {
                out << "min ";
            }
            else if (i == nBins - 1) // max
            {
                out << "max ";
            }
            else {
                out << "    ";
            }

            out << "   |";

            if (i < nBins) {
                for (int y = 0; y < (int) ((double) bar[i] * prop); y++)
                    out << "*";
            }

            out << endl;
        }
    }
    delete[] bar;
}

void printDNALength(double l, ostream &out)
{
    vector <string> units;
    units.push_back(" bp ");
    units.push_back(" Kbp");
    units.push_back(" Mbp");
    units.push_back(" Gbp");
    units.push_back(" Tbp"); //Ouch!

    float d=1000;
    size_t index=0;

    while (l >= 1000)
    {
        l/=d;
        index++;
        if (index == 4)
            break;
    }

    if (index==0)
        out << setprecision(0) << fixed << l << units.at(index);
    else
        out << setprecision(2) << fixed << l << units.at(index);


}

void stats(vector<int>::iterator start, vector<int>::iterator end, ostream &out) {
    int nEchant = end - start;
    if (nEchant == 0)
        return;

    int minEchant = *start, maxEchant = *start;
    double mean = 0;
    unsigned int sum = 0, N50 = 0;
    unsigned int cumul = 0;

    sort(start, end);

    for (vector<int>::iterator iIt = start; iIt != end; iIt++) {

        if (*iIt < minEchant)
            minEchant = *iIt;
        if (*iIt > maxEchant)
            maxEchant = *iIt;

        sum += *iIt;
    }

    mean = (double) sum / nEchant;

    for (vector<int>::iterator iIt = start; iIt != end; iIt++) {
        cumul += *iIt;
        if (cumul > sum / 2) {
            N50 = *iIt;
            break;
        }
    }

    out << "   sum:  " << sum << endl;
    out << "   N50:  " << N50 << endl;
    out << "   mean: " << mean << endl;
    out << "   max:  " << maxEchant << endl;
    out << "   min:  " << minEchant << endl;
}

//geometric distribution of overHanging sizes
//p  probability (=1/E(X))
//k  overhanging value
//c  correction factor
double cdfOH(double p, unsigned int k, double c)
{
    if (k==0)
        return 1.0;
    else
        return c*pow(1-p,(int)k);
}

double pdfOH(double p, unsigned int k, double c)
{
    if (k==0)
        return p*c -c+1;
    else
        return p*pow((1-p),(int)k)*c;
}