package be.ac.ulb.mlg.utils.discretizer;

import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet;

import be.ac.ulb.mlg.utils.Discretizer;

/*
 * The MIT License (MIT)
 * 
 * Copyright (c) 2013 Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * 
 * @author Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * @version 1.00, 30/08/2013
 */

/**
 * The UniformFrequencyDiscretizer 
 */
public class UniformFrequencyDiscretizer implements Discretizer{
	private static final Integer ONE = Integer.valueOf(1);
	private final Mode mode;
	private final int range;
	private final boolean preprocess;
	private double[] thresholds;
	private static final int ALL_COLUMN = -1;
	/**
	 * Constructor of UniformDiscretizer which setup the mode and the range of discretization without preprocessing
	 * @param mode The mode of discretization column/row wise or by using the whole matrix
	 * @param range The number of value between min/max values of each row/column
	 */
	public UniformFrequencyDiscretizer(Mode mode,int range){
		this(mode,range,false);
	}
	/**
	 * Constructor of UniformDiscretizer which setup the mode and the range of discretization
	 * @param mode The mode of discretization column/row wise or by using the whole matrix
	 * @param range The number of value between min/max values of each row/column
	 * @param specify if the method has to preprocess (compute measure before normalization, required for GLOBAL {@see Mode})
	 */
	public UniformFrequencyDiscretizer(Mode mode,int range,final boolean preprocess){
		this.mode	= mode;
		this.range	= range;
		this.preprocess = preprocess;
	}
	@Override
	public double[][] discretize(double[][] matrix){
		final double[][] result = new double[matrix.length][];

		for(int i=0;i<matrix.length;i++)
			result[i] = new double[matrix[i].length];
		
		if(mode.equals(Mode.ROW_WISE))
			rowWise(matrix,result);
		else if(mode.equals(Mode.COLUMN_WISE))
			columnWise(matrix,result);
		else {
			if(!preprocess) preprocess(matrix);
			int i,j;
			for(i=0;i<matrix.length;i++)
				for(j=0;j<matrix[i].length;j++)
					result[i][j] = rangeIndex(matrix[i][j],thresholds);
		}
		return result;
	}
	private void columnWise(double[][] matrix,double[][] result) {//need to have the same number of columns
		int row,column;
		for(column=0;column<result[0].length;column++){
			thresholds = sortedCount(matrix,column);
			for(row=0;row<result.length;row++)
				result[row][column] = rangeIndex(matrix[row][column], thresholds);
		}
	}

	private double[] sortedCount(final double[][] x,final int column){
		final double[] threshold = new double[(int) range-1];//from [-;first], ]first, second[ ...
		Map<Double,Integer> elementCounter = new HashMap<Double,Integer>((int)Math.sqrt(x.length*x[0].length));
		long globalCounter = 0;
		if(column < 0){//ALL_COLUMNS
			for(double[] sub:x)
				for(double d:sub)
					if(!Double.isNaN(d)){
						Double dd = new Double(d);
						elementCounter.put(dd, elementCounter.containsKey(dd)?new Integer(elementCounter.get(dd).intValue()+1):ONE);
						globalCounter++;
					}
		}else{
			for(int row=0;row<x.length;row++)
				if(!Double.isNaN(x[row][column])){
					Double dd = new Double(x[row][column]);
					elementCounter.put(dd, elementCounter.containsKey(dd)?new Integer(elementCounter.get(dd).intValue()+1):ONE);
					globalCounter++;
				}
		}
		int need = (int) Math.round(globalCounter/((double)range));
		int pos = 0;
		for(Double d:new TreeSet<Double>(elementCounter.keySet())){
			need -= elementCounter.get(d).intValue();
			if(need <= 0){
				threshold[pos++] = d.doubleValue();
				if(pos == threshold.length)
					return threshold;
				need = (int)Math.round(globalCounter/((double)range));
			}
		}
		return threshold;
	}
	private void rowWise(double[][] matrix,double[][] result) {
		int row,column;
		double[][] ref = {{}};
		for(row=0;row<result.length;row++){
			ref[0] = matrix[row];
			thresholds = sortedCount(ref, ALL_COLUMN);
			for(column=0;column<result[row].length;column++)
				result[row][column] = rangeIndex(matrix[row][column],thresholds);
		}
	}
	private double rangeIndex(final double value,final double[] thresholds){
		if(Double.isNaN(value)) return Double.NaN;
		int end		= thresholds.length;
		int start	= 0;
		if(value < thresholds[0]) return 1.0;
		
		int middle;
		while(start<end){
			middle = (end-start)>>1;
			if(value <= thresholds[middle]){//go left
				end = middle;
			}else{///go right
				start = end-middle;
			}
		}
		return (double)(start+1);
	}
	@Override
	public boolean hasNativeImplementation() {
		return false;//TODO implement it
	}
	@Override
	public boolean requirePreprocessing() {
		return preprocess;
	}
	@Override
	public void preprocess(double[][] matrix) {
		if(!mode.equals(Mode.GLOBAL))return;
		thresholds = sortedCount(matrix,ALL_COLUMN);
	}
}
