package be.ac.ulb.mlg.utils;

/*
 * The MIT License (MIT)
 * 
 * Copyright (c) 2013 Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * 
 * @author Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * @version 1.00, 24/06/2013
 */

import java.util.Arrays;
import static java.lang.Math.abs;
import static java.lang.Math.exp;

public final class MeasureUtils {
	/**
	 * The precision value
	 */
	public static final double EPSILON = 1E-10;
	/**
	 * Compute the sum by summing all elements of the vector in left-right manner
	 * @param vector The data vector
	 * @param computable The boolean array which specifies if indexes must be taken
	 * @return The mean of the data vector
	 */
	public static final double fastSum(final double[] vector,final boolean[] computable){//Two identical impl for the stack
		double sum = 0.0;
		if(computable != null){
			for(int i=0;i<vector.length;i++)
				if(computable[i]) sum += vector[i];
		}else{
			for(double d:vector)
				sum += d;
		}
		return sum;
	}
	/**
	 * Compute the mean by summing all elements of the vector in left-right manner
	 * @param vector The data vector
	 * @param computable The boolean array which specifies if indexes must be taken
	 * @return The mean of the data vector
	 */
	public static final double fastMean(final double[] vector,final boolean[] computable){
		double sum = 0.0;
		int count = vector.length;
		if(computable != null){
			for(int i=0;i<vector.length;i++)
				if(computable[i]) sum += vector[i];
				else count--;
		}else{
			for(double d:vector)
				sum += d;
		}
		return sum/count;
	}
	
	/**
	 * Compute the mean by summing all elements of the vector in left-right manner
	 * @param vector The data vector
	 * @param hasMissing True if missing values are contained in the vector
	 * @return The mean of the data vector
	 */
	public static final double fastMean(final double[] vector,final boolean hasMissing){
		double sum = 0.0;
		int count = vector.length;
		if(hasMissing){
			for(int i=0;i<vector.length;i++)
				if(Double.isNaN(vector[i])) count--;
				else sum += vector[i];
		}else{
			for(double d:vector)
				sum += d;
		}
		return sum/count;
	}
	/**
	 * Apply bubble sort to sort indices (toSort) array based on the values (comparable) array which is also sorted 
	 * @param comparable The value array
	 * @param toSort The indices array
	 */
	public static void bubbleSort(double[] comparable,int[] toSort){
		double tmpDouble;
		int		end		= toSort.length,
				index,
				currentEnd,
				tmpInt;
		
	    do{
	    	currentEnd = 0;
	    	for(index = 1;index<end;index++)
	    		if(comparable[index-1] > comparable[index]){
	    			//swap value and index
	    			tmpDouble			= comparable[index-1];
	    			comparable[index-1]	= comparable[index];
	    			comparable[index]	= tmpDouble;
	    			
	    			tmpInt			= toSort[index-1];
	    			toSort[index-1]	= toSort[index];
	    			toSort[index]	= tmpInt;
	    			
	    			currentEnd = index;
	    		}
	    	end = currentEnd;
	    }while(end>0);
	}
	/**
	 * Apply bubble sort to another (toSort) array based on the values (comparable) array which is also sorted 
	 * @param comparable The value array
	 * @param toSort The other array
	 */
	public static void bubbleSort(double[] comparable,double[] toSort){
		double tmpDouble;
		int		end		= toSort.length,
				index,
				currentEnd;
		
	    do{
	    	currentEnd = 0;
	    	for(index = 1;index<end;index++)
	    		if(comparable[index-1] > comparable[index]){
	    			//swap value and index
	    			tmpDouble			= comparable[index-1];
	    			comparable[index-1]	= comparable[index];
	    			comparable[index]	= tmpDouble;
	    			
	    			tmpDouble		= toSort[index-1];
	    			toSort[index-1]	= toSort[index];
	    			toSort[index]	= tmpDouble;
	    			
	    			currentEnd = index;
	    		}
	    	end = currentEnd;
	    }while(end>0);
	}
	/**
	 * TODO
	 * @param comparable
	 * @return
	 */
	public static int countSwap(double[] comparable){
		double tmpDouble;
		int		end		= comparable.length,
				index,
				currentEnd,
				swaps = 0;
		
	    do{
	    	currentEnd = 0;
	    	for(index = 1;index<end;index++)
	    		if(comparable[index-1] > comparable[index]){
	    			//swap value and index
	    			swaps++;
	    			tmpDouble			= comparable[index-1];
	    			comparable[index-1]	= comparable[index];
	    			comparable[index]	= tmpDouble;
	    			
	    			currentEnd = index;
	    		}
	    	end = currentEnd;
	    }while(end>0);
	    return swaps;
	}
	/**
	 * Computed the natural rank of values
	 * @param computable The boolean array which specifies if indexes must be taken
	 * @param vector The data vector (values) to rank
	 * @param ranks The rank destination vector (must have the same size that the vector argument)
	 * @return The number of tied values
	 */
	public static int naturalRank(final boolean[] computable,final double[] vector,final double[] ranks) {
		return naturalRank(computable,vector, ranks,null);
	}
	/**
	 * Computed the natural rank of values
	 * @param computable The boolean array which specifies if indexes must be taken
	 * @param vector The data vector (values) to rank
	 * @param ranks The rank destination vector (must have the same size that the vector argument)
	 * @param untied Copy the natural rank attribution without dealing with ties (sort index from 1 to length)
	 * @return The number of tied values
	 */
	public static int naturalRank(final boolean[] computable,final double[] vector,final double[] ranks,final int[] untied) {
		int[] position = untied != null?untied:new int[ranks.length];
		
		int index,count=0,ties=0;
		// copy values of vector into ranks array and initialize position from 0 to length-1
		if(computable != null){
			for(index = 0;index < vector.length; index++) {
				position[index]	= index;
				ranks[index]	= computable[index]?vector[index]:Double.POSITIVE_INFINITY;
			}
		}else{
			for(index = 0;index < vector.length; index++) {
				position[index]	= index;
				ranks[index]	= vector[index];
			}
		}
		
		bubbleSort(ranks,position);
			
		int pos = 1, previous = 0;
		int startTie;
		
		if(computable != null){
			while(!computable[position[previous]]) previous++;
			startTie = previous;
			
			ranks[position[previous]] = pos;
			
			for(index=previous+1; index<vector.length; index++){
				if(computable[position[index]]){
					if(vector[position[index]] > vector[position[previous]]){// different but > it's ok because it's sorted
				    	pos++;
				    	if (count > 0){
				    		pos+= count;
				    		final double average = (2.0 * ranks[position[previous]] + count) * 0.5;
				    		previous = 0;// TODO EDITED in java not in C
				    		count++;
				    		for(count=startTie;count<index ; count++){
								if(computable[position[count]]){// TODO EDITED in java not in C
					    			ranks[position[count]] = average;
					    			previous++;
								}
				    		}
				    		ties	+= (previous*(previous-1));// TODO EDITED in java not in C
				    		count	=  0;
				    	}
				    	startTie=index;
					} else {
						count++;
				    }
					ranks[position[index]]	= pos;
					previous				= index;
				}
			}
			if(count > 0){//TODO edited in java not in c   (loop added)
	    		pos+= count;
	    		final double average = (2.0 * ranks[position[previous]] + count) * 0.5;
	    		previous = 0;// TODO EDITED in java not in C
	    		count++;
	    		for(count=startTie;count<index ; count++){
	    			if(computable[position[count]]){//TODO edited in java not in c // FIXME need to put into both loops because we use computable
	    				ranks[position[count]] = average;
	    				previous++;
	    			}
	    		}
	    		ties	+= (previous*(previous-1));// TODO EDITED in java not in C
	    		count	=  0;
	    	}
		}else{
			ranks[position[0]] = pos;
			for(index=1; index<vector.length; index++) {
				if(vector[position[index]] > vector[position[index-1]]){// different but > it's ok because it's sorted
			    	pos = index + 1;
			    	if(count > 0){
			    		final double average = (2.0 * ranks[position[index-1]] + count) * 0.5;
			    		count++;
			    		ties	+= (count*(count-1));///TODO edited in java not in C
			    		for(;count > 0 ; count--){
			    			ranks[position[index-count]] = average;
			    		}
			    	}
				} else {
					count++;
			    }
				ranks[position[index]] = pos;
			}
			if (count > 0){//TODO edited in java not in C  (loop added)
	    		pos+= count;
	    		final double average = (2.0 * ranks[position[index-1]] + count) * 0.5;
	    		ties	+= (count*(count-1));
	    		count++;
	    		for(;count > 0 ; count--){
	    			ranks[position[index-count]] = average;
	    		}
	    	}
		}
		//TODO edited in java not in c  (loop removed)
		return ties>>1;
	}
	
	/**
	 * Compute the missing value array for the matrix (samples x vars)
	 * @param vars The matrix
	 * @return The missing value array: true if computable, false if missing
	 */
	public static boolean[] constructComputable(double[][] vars) {
		final boolean[] computable = new boolean[vars[0].length];
		Arrays.fill(computable,true);
		for(double[] var:vars){
			for(int i=0;i<var.length;i++)
				if(Double.isNaN(var[i]))
					computable[i] = false;
		}
		return computable;
	}
	/**
	 * Compute the number of occurrences of each distinct value with a precision of EPSILON (consider integers).
	 * @param vector The vector of values
	 * @param frequencies The occurrences matrix, 0 for NaN if hasMissing is true, 0 for redundant value
	 * (because n times a same distinct value result in a occurrence value of n)
	 * @param hasMissing if true, check for missing value in order to ignore them
	 * @return The number of distinct values for the first part of the long (<<32) and the number of values for the second part, (avoiding missing if hasMissing)
	 */
	public static final long countOccurencies(double[] vector,double[] frequencies,boolean hasMissing){
		int index,distinct = 0,size = vector.length;
		int[] position	= new int[vector.length];
		
		if(hasMissing){
			for(index = 0;index < vector.length; index++) {
				position[index]	= index;
				if(Double.isNaN(vector[index])){
					frequencies[index] = Double.POSITIVE_INFINITY;
					size--;
				}else{
					frequencies[index]	= vector[index];
				}
			}
		}else{
			for(index = 0;index < vector.length; index++) {
				position[index]		= index;
				frequencies[index]	= vector[index];
			}
		}
		
		bubbleSort(frequencies,position);
		Arrays.fill(frequencies,0.0);
		
		int count = 1;
		int sum = 0;
		for(index=1;index<size;index++){
			if(abs(vector[position[index-1]]-vector[position[index]])<EPSILON){//it is the same
				count++;
			}else{
				distinct++;
				sum += count;
				frequencies[position[index-count]] = count;
				count = 1;
			}
		}
		distinct++;
		sum += count;
		frequencies[position[index-count]] = count;
		
		return (((long)distinct) << 32) | sum;
	}
	
	/**
	 * Compute the number of occurrences of each distinct value with a precision of EPSILON (consider integers).
	 * @param vectors The matrix of values, have to be composed of two vectors of the same size
	 * @param frequencies The occurrences matrix, 0 for NaN if hasMissing is true, 0 for redundant value
	 * (because n times a same distinct value result in a occurrence value of n)
	 * @param computable The vector of missing value for the vector indexes
	 * @return The number of distinct values for the first part of the long (<<32) and the number of values for the second part, (avoiding missing if hasMissing)
	 */
	public static final long countOccurencies(double[][] vectors,double[] frequencies,boolean[] computable){
		final double[] jointValues = new double[vectors[0].length];//auto fill with 0
		{
			int j;
			if(computable != null){
				for(j=0;j<jointValues.length;j++)
					jointValues[j] = computable[j]?(((long)vectors[0][j])<<32) | ((long)vectors[1][j]):Double.NaN;//FIXME test with negative values
			}else{
				for(j=0;j<jointValues.length;j++)
					jointValues[j] = (((long)vectors[0][j])<<32) | ((long)vectors[1][j]);//FIXME test with negative values
			}
		}
		return countOccurencies(jointValues, frequencies, computable != null);
	}
	
	/**
	 * Compute the square root value by using the formula  exp(0.5*log(x)) <=> sqrt(x) in order to avoid underflow.
	 * @param value The value for which the function compute the square root
	 * @return The square root of the given value
	 */
	public static double sqrt(final double value){//underflow (Numerical Analysis course) ( exp(0.5*log(x)) <=> sqrt(x) )
		if(Double.isNaN(value)||value == 0.0) return 0.0;
		return exp(0.5*Math.log(value));
	}
	/**
	 * Compute the log, base e but return 0 if the value is less than EPSILON
	 * @param value The value for which the function compute the square root
	 * @return The square root of the given value
	 */
	public static double log(final double value){
		if(value < EPSILON) return 0.0;
		return Math.log(value);
	}
}
