package be.ac.ulb.mlg.utils.renormalizer;

/*
 * The MIT License (MIT)
 * 
 * Copyright (c) 2013 Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * 
 * @author Jean-Sebastien Lerat (Jean-Sebastien.Lerat@ulb.ac.be)
 * @version 1.00, 24/06/2013
 */

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import java.util.regex.Pattern;

import be.ac.ulb.mlg.utils.DefaultRenormalizer;
import be.ac.ulb.mlg.utils.Measure;


// tell that it highest is present, we use it because we assume that the row respect the normalization. if it'exists more than one, we use the first.

//case insensitive according to the implementation of string

/**
 * This class represents a normalization with taxa.
 * Each main taxon must be defined by calling <i>addTaxa()</i> method in order to normalize them and assemble them to their own root.
 * Thus, if it exists a main root, you need to pre-remove it before to add taxons otherwise all data will be reduced to one row containing only an array of 1.0.
 * For each row of the next input matrix, the lowest taxa level must be specified with the <i>setTaxa()</i> method after the definition of taxons.
 * 
 * To design this class, some hypothesis are assumed:
 *  - Taxon are fully specified
 *  - Taxon doesn't contain overlaps (can be represented with a Tree: parents to children) => For each taxa, only one highest level
 * 
 * All taxa/taxon is not case sensitive (according to the {@link String} case specification).
 */
public class TaxonRenormalizer extends DefaultRenormalizer {
	/**
	 * Default Taxa separator
	 */
	public static final String DEFAULT_SEPARATOR = "--";
	private int[][] topDown; 
	private int[][] bottomUp;
	private final Map<String,Integer> str2ID;
	private int[] taxa;
	private boolean[] ignored;
	private int[] resultMapping,groupMapping;
	private double[][] input;
	private static final int IGNORE_ROW = -1;
	
	/**
	 * Construct a new TaxonRenormalizer without predefined values
	 * Default applied renormalization is the default renormalization of the {@link DefaultRenormalizer}
	 */
	public TaxonRenormalizer(){
		str2ID	= new HashMap<String,Integer>();
		reset();
	}
	private static String standardize(final String lineage,final String separator){
		if(separator == DEFAULT_SEPARATOR) return standardize(lineage);
		return standardize(lineage.replaceAll(Pattern.quote(separator),"\n").replaceAll("\n", DEFAULT_SEPARATOR));
	}
	private static String standardize(final String taxa){
		return taxa.replaceAll("\\s","").toUpperCase();
	}
	/**
	 * Reset only (no call to inherited reset methods) all data relative to taxa/taxon and mappings of last results
	 */
	public void reset(){
		str2ID.clear();
		topDown			= new int[0][];
		bottomUp		= new int[0][];
		resultMapping	= groupMapping = null;
	}
	/**
	 * 
	 * @param taxa The name of a defined taxa level
	 * @return The corresponding numeric taxa's identifier
	 */
	public int getTaxaIdentifier(String taxa){
		return str2ID.get(taxa.toUpperCase()).intValue();
	}
	/**
	 * 
	 * @param taxa The name of a defined taxa level
	 * @return true iff the taxa is one of the lowest levels, false otherwise
	 */
	public boolean isLowLevel(String taxa){
		return isLowLevel(getTaxaIdentifier(taxa));
	}
	/**
	 * 
	 * @param taxa The name of a defined taxa level
	 * @return true iff the taxa is one of the higher levels, false if it is a lowest level
	 */
	public boolean isHighLevel(String taxa){
		return isHighLevel(getTaxaIdentifier(taxa));
	}
	/**
	 * 
	 * @param taxa The index (according to the ordre of definition, see getTaxaIdentifier() method) of a definedtaxa level
	 * @return true iff the taxa is one of the highest levels, false otherwise
	 */
	public boolean isHighestLevel(String taxa){
		return isHighestLevel(getTaxaIdentifier(taxa));
	}
	/**
	 * 
	 * @param taxa The index (according to the ordre of definition, see getTaxaIdentifier() method) of a defined taxa level
	 * @return true iff the taxa is one of the lowest levels, false otherwise
	 */
	public boolean isLowLevel(int taxa){
		return topDown[taxa].length<1;
	}
	/**
	 * 
	 * @param taxa The index (according to the ordre of definition, see getTaxaIdentifier() method) of a defined taxa level
	 * @return true iff the taxa is one of the higher levels, false if it is a lowest level
	 */
	public boolean isHighLevel(int taxa){
		return topDown[taxa].length>0;
	}
	/**
	 * 
	 * @param taxa The name of a defined taxa level
	 * @return true iff the taxa is one of the highest levels, false otherwise
	 */
	public boolean isHighestLevel(int taxa){
		return bottomUp[taxa].length<1;
	}
	/**
	 * Set the taxa for all row (null or out of taxon results in considering the row as a feature)
	 * @param taxas Taxa names of all row, one lowest level mapping for each row of the next input matrix
	 */
	public void setTaxa(String[] taxas){
		this.taxa = new int[taxas.length];
		Integer current;
		for(int i=0;i<taxas.length;i++){
			current = taxas[i] != null?str2ID.get(standardize(taxas[i])) : null;
			if(current != null){
				taxa[i] = current.intValue();
			}else{
				taxa[i] = IGNORE_ROW;
				//addFeature(i);
			}
		}
	}
	/**
	 * Add a taxa defintion of the main taxon according to the string format with the DEFAULT_SEPARATOR
	 * @param taxa The taxa definition from the highest level to the lowest level
	 */
	public void addTaxa(String taxa){
		addTaxa(taxa,DEFAULT_SEPARATOR);
	}
	/**
	 * Add a taxa defintion of the main taxon according to the string format
	 * @param lineage The taxa definition from the highest level to the lowest level (cannot contains spaces see \s in RegularExpression)
	 * @param separator Specify the used separator (literal string)
	 */
	public void addTaxa(String lineage,final String separator){
		//System.err.println("LINEAGE: "+lineage);//TODO remove
		final String[] hierarchical = standardize(lineage,separator).split(DEFAULT_SEPARATOR);

		int newTaxa = 0;
		for(int i=0;i<hierarchical.length;i++){
			if(!str2ID.containsKey(hierarchical[i]))
				newTaxa++;
		}
		if(newTaxa<1)return;// no unknown taxa...
		grow(newTaxa);
		newTaxa = topDown.length-newTaxa;
		
		str2ID.put(hierarchical[hierarchical.length-1],new Integer(newTaxa));
	
		int pos = hierarchical.length-2;
		while(pos >= 0){
			final int lower	= str2ID.get(hierarchical[pos+1]);
			final int upper;
			if(str2ID.containsKey(hierarchical[pos])){//high level already exists
				upper = str2ID.get(hierarchical[pos]).intValue();
				topDown[upper] = Arrays.copyOf(topDown[upper],topDown[upper].length+1);
				pos = 0;//TODO check: exiting because all the upper hierarchy already exists (but it is really relevent to put it here? Because if the first already exists... the values is duplicated)
			}else{//high level, need to create a new entry
				upper = ++newTaxa;
				str2ID.put(hierarchical[pos], new Integer(upper));
			}
			topDown[upper][topDown[upper].length-1]		= lower;
			bottomUp[lower][/*bottomUp[lower].length-1*/0]	= upper;///FIXME always 0?
			--pos;
		}
		//we remove the allowed space for topDown (no leaf for low level taxa)
		topDown[str2ID.get(hierarchical[hierarchical.length-1]).intValue()] = new int[0];
		//assuming that the highest level of the lineage is not contained in any other taxa (it's a root)
		bottomUp[str2ID.get(hierarchical[0]).intValue()] = new int[0];
	}
	private void grow(final int size){
		topDown		= grow(size,topDown);
		bottomUp	= grow(size,bottomUp);
	}
	private int[][] grow(final int size,int[][] array){
		final int[][] expanded = new int[array.length+size][];
		int index = 0;
		for(;index<array.length;index++)
			expanded[index] = array[index];
		for(;index<expanded.length;index++)
			//better with an init with 1 position if taxa contains more higher levels than one per low level
			//but need to override the int[1] by int[0] for lower levels
			//Because the all taxa is specified, no overlaps, ... we can assume that when a low level occurs
			//we don't need to check the contained level because each low level is unique and don't contains
			//other taxa
			expanded[index] = new int[1];
		return expanded;
	}
	
	@Override
	public double[][] normalizeInput(final double[][] input){//taxa not represented by inputs are not processed
		this.input	= input;
		ignored		= new boolean[str2ID.size()];//Auto-filled with 0 (Java)
		return super.normalizeInput(input);
		/*
		final int COLUMN_COUNT = input[0].length;
		int index,row;
		
		final Map<Integer,Integer> lowerHighest = new HashMap<Integer,Integer>();
		@SuppressWarnings("unchecked")
		final Set<Integer>[] highests				= (Set<Integer>[]) new Set<?>[countGroups()];
		for(index=0;index<highests.length;index++)
			highests[index]	= new TreeSet<Integer>();
		
		if(isUsingGroups()){
			for(row=0;row<input.length;row++)
				if(isLowLevel(this.taxa[row])){
					final int group		= groupOf(row);
					final Integer taxa	= new Integer(this.taxa[row]);
					Integer highest;
					
					if(!lowerHighest.containsKey(taxa)){
						highest = seekHighest(taxa.intValue());
						lowerHighest.put(taxa, highest);
					}else{
						highest	= lowerHighest.get(taxa);
					}
					highests[group].add(highest);
				}else{
					//ensure to not process higher values
					addFeature(row);
				}
		}else{
			for(row=0;row<input.length;row++)
				if(isLowLevel(this.taxa[row])){
					final Integer taxa	= new Integer(this.taxa[row]);
					Integer highest;
					
					if(!lowerHighest.containsKey(taxa)){
						highest = seekHighest(taxa.intValue());
						lowerHighest.put(taxa, highest);
						highests[0].add(highest);//default, all data into the same group
					}
				}else{
					//ensure to not process higher values
					addFeature(row);
				}
		}
		//no need to affect input because it's faster with final modifier
		//and the DefaultRenormalizer don't change the array reference
		super.normalizeInput(input);
		
		index = 0;
		for(Set<Integer> highest:highests)
			index += highest.size();
		
		resultMapping	= new int[index];
		groupMapping	= new int[index];
		
		@SuppressWarnings("unchecked")
		final Map<Integer,double[]>[]	newInput = (Map<Integer, double[]>[]) new Map<?,?>[countGroups()];
		@SuppressWarnings("unchecked")
		final Map<Integer,Boolean>[]	finished = (Map<Integer, Boolean>[]) new Map<?,?>[countGroups()];
		
		index=0;
		for(row = 0;row <highests.length;row++){
			newInput[row]	= new HashMap<Integer,double[]>();
			finished[row]	= new HashMap<Integer,Boolean>();
			for(Integer highest:highests[row]){
				resultMapping[index]	= highest.intValue();
				groupMapping[index]		= row;
				newInput[row].put(highest, new double[COLUMN_COUNT]);
				Arrays.fill(newInput[row].get(highest),Double.NaN);// not useful because it's the default behaviour in JAVA (filling with 0.0)
				finished[row].put(highest, Boolean.FALSE);
				//ensure that the mapping contains only lower and highest to highest (no member taxa)
				lowerHighest.put(highest, highest);
				index++;
			}
		}
		
		if(isUsingGroups()){
			for(row=0;row<input.length;row++){
				final int group			= groupOf(row);
				final Integer highest	= lowerHighest.get(new Integer(this.taxa[row]));//better to put here because most of data are lower taxa
				if(highest != null){//member not highest nor lower
					if(finished[group].get(highest).booleanValue()){
						//nothing to do. Bad programming style but it removes one level of nested if and skip the following elseifs.
					}else if(isLowLevel(this.taxa[row])){
						final double[] ptr = newInput[group].get(highest);
						for(index=0;index<ptr.length;index++){
							final double d = input[row][index];
							if(!Double.isNaN(d))
								ptr[index] = Double.isNaN(ptr[index])?d:ptr[index]+d;
						}
					}else if(isHighestLevel(this.taxa[row])){
						finished[group].put(highest, Boolean.TRUE);
						final double[] ptr = newInput[group].get(highest);
						for(index=0;index<ptr.length;index++)
							ptr[index] = input[row][index];
					}
				}
			}
		}else{
			for(row=0;row<input.length;row++){
				final Integer highest	= lowerHighest.get(new Integer(this.taxa[row]));//better to put here because most of data are lower taxa
				if(highest != null){//member not highest nor lower
					if(finished[0].get(highest).booleanValue()){
						//nothing to do. Bad programming style but it removes one level of nested if and skip the following elseifs.
					}else if(isLowLevel(this.taxa[row])){
						final double[] ptr = newInput[0].get(highest);
						for(index=0;index<ptr.length;index++){
							final double d = input[row][index];
							if(!Double.isNaN(d))
								ptr[index] = Double.isNaN(ptr[index])?d:ptr[index]+d;
						}
					}else if(isHighestLevel(this.taxa[row])){
						finished[0].put(highest, Boolean.TRUE);
						final double[] ptr = newInput[0].get(highest);
						for(index=0;index<ptr.length;index++)
							ptr[index] = input[row][index];
					}
				}
			}
		}
		
		return flatten(newInput,new double[resultMapping.length][COLUMN_COUNT]);
		*/
	}
	/*
	private double[][] flatten(Map<Integer, double[]>[] mapInput, double[][] plate) {
		int index=0;
		for(int group=0;group<mapInput.length;group++){
			final Map<Integer, double[]> ptr = mapInput[group];
			for(int ref=0;ref<ptr.size();ref++){
				plate[index]=ptr.get(new Integer(resultMapping[index]));
				index++;
			}
		}
		return plate;
	}
	*/
	private void applyOnAllRows(final int taxa,final boolean ignore){
		for(int i=0;i<input.length;i++)
			if(this.taxa[i] == taxa)
				prepareRenormalization(input[i],i,ignore);
	}
	@Override
	public void updateRenormalization(double[] vect,final int row,boolean ignore){
		//FIXME handle pairshuffle for taxa
		int taxa = this.taxa[row];
		if(taxa < 0) return;//feature (TODO It is a simple fix, need to wait the reply of Karoline)
		int[] next = this.bottomUp[taxa];
		//ignore all parents
		while(next.length > 0){
			taxa = next[0];
			if(ignore != ignored[taxa])
				applyOnAllRows(taxa,ignore);
			ignored[taxa] = !ignore;
			next = this.bottomUp[taxa];
		}
		
		Stack<Integer> children = new Stack<Integer>();
		children.push(new Integer(this.taxa[row]));
		//ignore all children
		while(!children.isEmpty()){
			taxa = children.pop();
			for(int child:this.topDown[taxa]){
				children.push(new Integer(child));
			}
			if(ignore != ignored[taxa])
				applyOnAllRows(taxa,ignore);
			ignored[taxa] = !ignore;
		}
		mergeRenormilizer();
	}
	/*private Integer seekHighest(int taxa) {
		if(bottomUp[taxa].length<1)
			return new Integer(taxa);
		return seekHighest(bottomUp[taxa][0]);
	}*/
	/*
	 * Get the mapping from (index)  to (taxa's name identifier) for the new input matrix
	 * @return The mapping row index to the taxa's name of the resulting new input matrix
	 *//*
	public String[] getNamedResult(){
		if(resultMapping == null) return null;
		final String[] names = new String[resultMapping.length];
		final Set<String> taxa = str2ID.keySet();
		for(int i=0;i<names.length;i++){
			//not efficient but we assume small and medium taxa size
			names[i] = seek(str2ID,taxa,new Integer(resultMapping[i]));
		}
		return names;// tell if not found null
	}*//*
	private static final String seek(final Map<String, Integer> str2ID, final Set<String> taxaNames, final Integer taxaID){
		for(String name:taxaNames)
			if(str2ID.get(name).equals(taxaID)){
				return name;
			}
		return null;
	}*/
	/*
	 * Get the mapping from (index)  to (taxa's index identifier) for the new input matrix
	 * @return The mapping row index to the taxa's index of the resulting new input matrix
	 *//*
	public int[] getIndexResult(){
		if(resultMapping == null) return null;
		return Arrays.copyOf(resultMapping, resultMapping.length);
	}*/
	/**
	 * Get the mapping from (index)  to (groups index identifier) for the new input matrix
	 * @return The mapping row index to the group's index of the resulting new input matrix
	 */
	public int[] getGroupResult(){
		if(resultMapping == null) return null;
		return Arrays.copyOf(groupMapping, groupMapping.length);
	}
	@Override
	public double[][] normalizeOutput(double[][] input, double[][] output,Measure measure) {
		// nothing to do
		return output;
	}

	@Override
	public boolean hasNativeImplementation(){
		return false;
	}

	@Override
	public boolean processInput() {
		return true;
	}

	@Override
	public boolean processOutput() {
		return false;
	}
}
