############################################################
##########################  Australia Case Study: preprocessing for SamBada
############################################################
### Load required packages

library(raster)

### Load required inputs

load('StSn_fSNPS.robj')
env=shapefile('StSn_env.shp')
load('StSn_PCA.robj')

## Same as in Morocco, We transform the env shapefile object in a dataframe and we index the rows with name of the samples

env.df = as.data.frame(env)
rownames(env.df) = env$ID




### Create the Genetic Input for SamBada

# This is exactly the same procedure as in the Morocco example. 
sGT = cbind(fSNPS==0, fSNPS==1, fSNPS==2)+0 # the +0 transforms TRUE and FALSE in 1 and 0.
colnames(sGT) = paste0(rep(colnames(fSNPS), times=3), rep(c('_0','_1','_2'), each=ncol(fSNPS))) # add unique identifier for each genotype

sambada_mol = cbind("Name"=rownames(sGT), sGT)
write.table(sambada_mol, file = 'mol-data-StSn.txt', col.names=T, row.names=F, quote=F)


### Create the Environmental Input for SamBada

env.df = env.df[rownames(fSNPS),] # env.df is now correctly sorted

# Check name of columns
colnames(env.df)

# In this case study, we will need to incorporate two co-variables related to population structure.
# For this reason, we will run 3-variate models (2 variable of population structure + 1 environmental). 
# This kind of run can become quite slow, since SamBada currently builds all the possible tri-variate models (even those with three environmental variables).
# For the purposes of this exercies we will use only 3 variables. 

sENV=env.df[,c('Tm006', 'Ts001', 'Cm013')] 

# Tm006: is the average sea surface temperature in June.
# Ts001: is the standard deviation of sea surface temperature in January. 
# Cm013: is the average chlorophyll concentration. 

## We now perpare the population structure variables, i.e. the first two Principal Components calculated during the previous exercise. 

sPS=cbind('PS1'=PCA$x[,1], 'PS2'=PCA$x[,2])
sPS=sPS[rownames(sENV),] # we set the same order of samples as the environmental table.


## Finally, we write everything in a text file.
sambada_env = cbind("Name"=rownames(sENV), sENV, sPS)
write.table(sambada_env, file = 'env-data-StSn.txt', col.names=T, row.names=F, quote=F)






### Create the Parameter Files for SamBada

# We can create the paramter file running the custom function
 
load('createParam.Rfun')
createParam(mol=sambada_mol, env=sambada_env, idcol='Name', ps=2, filename = 'param-StSn.txt') # set ps=2!

# If you open the param-StSn.txt file you will see some differences in comparison to the previous example.
# The number of dimension is now set to three.
# The last line of the param file ('POPULATIONVAR LAST') indicates that columns of population structure appears in the table after those containing the environmental information. 


### Run SamBada from R

## Make sure that the Sambada application or executable is in the working directory.  

# You can run Sambada from R using the following command. Note that under some Rstudio istances this does not print any output until the end. 

system('./sambada param-StSn.txt env-data-StSn.txt mol-data-StSn.txt')

# The system function invokes command as in the terminal. 


