#!/bin/bash

####### CoNet DEMO bash script ##########
#
# Needs bash shell to run.
# Needs to be placed in a folder that contains two
# folders named "Input" and "Output", where "Input"
# contains the input matrix and metadata from the third 
# CoNet tutorial (see CoNet webpage).
#
# RUN
# chmod 755 cooc.sh
# ./cooc.sh > cooc.log &
#
#
# CLUSTER
# Set to "-b" to send jobs to SGE cluster. 
# If jobs are sent to the cluster, create a temporary directory for permutation jobs and a second temporary directory for bootstrap jobs.
# Point to the permutation temp directory in the CoNetConfig.txt file. 
# Point to the bootstrap temp directroy in the CoNetConfigBoot.txt file.
# Check CoNet command line documentation for additional cluster settings needed in both configuration files.
# Note that you can restore random scores from the temporary random score files by adding option --restorefromscorefolder.
# Please keep lineage_separator=- in both configuration files.
#
# Author: Karoline Faust
#
##########################################

# point to your java installation
JAVA=java
# allocate java runtime memory in MB
MEM=6000
# location of jar file, needs to point to CoNet.jar
LIB=../lib/CoNet.jar
INPUTFOLDER=$PWD/Input
OUTPUTFOLDER=$PWD/Output
# example input from third CoNet tutorial
MATRIX=HMP_16SV35_phylotypes_vaginalSites_StLouis.txt
METADATA=HMP_16SV35_phylotypes_vaginalSites_metadata.txt
METADATA_ITEMS=lineage/taxon/bodysite
RESULT_NAME=16S_V35_phylotypes_vaginalSites
NANTREATMENT=pairwise_omit
NANTREATMENTPARAM=50
FILTER_STRATEGIES=noinclusivetaxalinks
METHODS=correl_spearman/correl_pearson/dist_kullbackleibler/dist_bray
# prefilter: only keep given number of top and bottom edges
EDGE_NUMBER=50
ITERATIONS=100
NETWORK_MERGE=union
RENORM="--renorm"
RANDROUTINE=edgeScores
MULTITESTCORR=benjaminihochberg
RESAMPLING=shuffle_rows
PVAL_MERGE=simes
PVAL_THRESHOLD=0.05
NULLDISTRIBS=$RESULT_NAME"_permutations.txt"
# gml/dot
FORMAT=gdl
CONFIG=CoNetConfig.txt
CONFIGBOOT=CoNetConfigBoot.txt
CLUSTER=""
COMPUTE_THRESHOLDS=true
TEST=false
PERMUT=true
BOOT=true
RESTORE=true

# if not yet there, create the config files
if [ ! -f $INPUTFOLDER/$CONFIG ]; then
    touch $INPUTFOLDER/$CONFIG
    echo "lineage_separator=-" > $INPUTFOLDER/$CONFIG
fi

if [ ! -f $INPUTFOLDER/$CONFIGBOOT ]; then
    touch $INPUTFOLDER/$CONFIGBOOT
    echo "lineage_separator=-" > $INPUTFOLDER/$CONFIGBOOT
fi

if [ $COMPUTE_THRESHOLDS == true ] ; then
    # compute initial thresholds such that resulting network has the requested number of intersection edges
    # this step creates a file with the initial thresholds
    echo "Computing thresholds"
    $JAVA -Xmx"$MEM"m -cp "$LIB" be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser -i $INPUTFOLDER/$MATRIX -E $METHODS --thresholdguessing edgeNumber --guessingparam $EDGE_NUMBER -F $FILTER_STRATEGIES --metadataattribs $METADATA_ITEMS -D $INPUTFOLDER/$METADATA --method ensemble -o $OUTPUTFOLDER/"$RESULT_NAME"_thresholds.txt --topbottom --nantreatment $NANTREATMENT --nantreatmentparam $NANTREATMENTPARAM -Z $INPUTFOLDER/$CONFIG > $OUTPUTFOLDER/"$RESULT_NAME".log
fi

# run cooccurrence analysis 
if [ $TEST == true ] ; then
    echo "Doing cooccurrence analysis"
    $JAVA -Xmx"$MEM"m -cp "$LIB" be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser -i $INPUTFOLDER/$MATRIX -E $METHODS -F $FILTER_STRATEGIES --metadataattribs $METADATA_ITEMS -D $INPUTFOLDER/$METADATA --method ensemble -f $FORMAT -o $OUTPUTFOLDER/"$RESULT_NAME"_ensemble.gdl --method ensemble --ensembleparamfile $OUTPUTFOLDER/"$RESULT_NAME"_thresholds.txt --multigraph --networkmergestrategy $NETWORK_MERGE --nantreatment $NANTREATMENT --nantreatmentparam $NANTREATMENTPARAM -Z $INPUTFOLDER/$CONFIG  >> $OUTPUTFOLDER/"$RESULT_NAME".log
else
    if [ $PERMUT == true ]
    then
        # compute permutation distribution
        # this step creates a file with permutation rand scores
        echo "Computing permutations"
        $JAVA -Xmx"$MEM"m -cp "$LIB" be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser -i $INPUTFOLDER/$MATRIX -E $METHODS --method ensemble -f $FORMAT -o $OUTPUTFOLDER/"$RESULT_NAME"_ensemble.gdl --metadataattribs $METADATA_ITEMS -D $INPUTFOLDER/$METADATA --ensembleparamfile $OUTPUTFOLDER/"$RESULT_NAME"_thresholds.txt --networkmergestrategy $NETWORK_MERGE --multigraph --pvaluemerge $PVAL_MERGE -F $FILTER_STRATEGIES/rand $RENORM --iterations $ITERATIONS -g $PVAL_THRESHOLD --resamplemethod $RESAMPLING -I $OUTPUTFOLDER/$NULLDISTRIBS -K $RANDROUTINE --scoreexport --nantreatment $NANTREATMENT --nantreatmentparam $NANTREATMENTPARAM $CLUSTER -Z $INPUTFOLDER/$CONFIG >> $OUTPUTFOLDER/"$RESULT_NAME".log
    fi
    if [ $BOOT == true ]
    then
        # compute bootstrap distribution
        # this step creates a file with bootstrap rand scores
        echo "Computing bootstraps"
        RESAMPLING=bootstrap
        $JAVA -Xmx"$MEM"m -cp "$LIB" be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser -i $INPUTFOLDER/$MATRIX -E $METHODS --method ensemble -f $FORMAT -o $OUTPUTFOLDER/"$RESULT_NAME"_ensemble.gdl --ensembleparamfile $OUTPUTFOLDER/"$RESULT_NAME"_thresholds.txt --metadataattribs $METADATA_ITEMS -D $INPUTFOLDER/$METADATA --networkmergestrategy $NETWORK_MERGE --multigraph --pvaluemerge $PVAL_MERGE -F $FILTER_STRATEGIES/rand --iterations $ITERATIONS -g $PVAL_THRESHOLD --resamplemethod $RESAMPLING -I $OUTPUTFOLDER/"$RESULT_NAME"_bootstraps.txt -K $RANDROUTINE --scoreexport --nantreatment $NANTREATMENT --nantreatmentparam $NANTREATMENTPARAM $CLUSTER -Z $INPUTFOLDER/$CONFIGBOOT >> $OUTPUTFOLDER/"$RESULT_NAME".log            
    fi
    if [ $RESTORE == true ]
    then
        # compute final p-values
        # this step creates the final network file
        echo "Restoring from random score files"
        RESAMPLING=bootstrap
        $JAVA -Xmx"$MEM"m -cp "$LIB" be.ac.vub.bsb.cooccurrence.cmd.CooccurrenceAnalyser -i $INPUTFOLDER/$MATRIX -E $METHODS --method ensemble -f $FORMAT -o $OUTPUTFOLDER/"$RESULT_NAME"_ensemble.gdl --ensembleparamfile $OUTPUTFOLDER/"$RESULT_NAME"_thresholds.txt --networkmergestrategy $NETWORK_MERGE --metadataattribs $METADATA_ITEMS -D $INPUTFOLDER/$METADATA --multigraph --pvaluemerge $PVAL_MERGE -F $FILTER_STRATEGIES/rand/confidence_boot --iterations $ITERATIONS -g $PVAL_THRESHOLD --resamplemethod $RESAMPLING -I $OUTPUTFOLDER/"$RESULT_NAME"_bootstraps.txt -K $RANDROUTINE --nantreatment $NANTREATMENT --nantreatmentparam $NANTREATMENTPARAM --multicorr $MULTITESTCORR --nulldistribfile $OUTPUTFOLDER/$NULLDISTRIBS -Z $INPUTFOLDER/$CONFIG > $OUTPUTFOLDER/"$RESULT_NAME"_restore.log
    fi
fi

