#!/bin/bash
############
############
### Script to run jobs on a system with
### CentOS 7.3, BeeGFS 6.19, Torque 6.0.4
### and a conda environment (named "rlenv")
### with Python 3.6.7. and packages
### NumPy 1.17.3, QInfer 1.0a1,  PyTorch 1.3.1, Gym 0.14.0,
### Stable Baselines 2.9.0, Tensorflow 1.14.0, mpi4py 3.0.2.
 
# Runs all calucalations by submitting jobs via qsub 
# in parallel making use of the job queue system of Torque.
# Jobs which require the results of other jobs, are started
# only once these other jobs are done by using
# the #PBS -W depend=afterok:"$jtwf" option 
# of qsub, where $jtwf is a colon separated string 
# of job-IDs of the jobs to wait for (jtwf).
# Every job first copies the NeuralNetHeuristics folder
# to the working directory ${work_dir} (defined below). Afterwards, 
# results are copied to directories training_results/,
# pretraining_results/, and sampling_results/, which must exist already.

# You can choose some parameters right below. For instance,
# choose env_ids="SPM-time-v2" to run jobs only for this environment
# (which is a relatively simple and fast environment).

###
### choose parameters here:
###

### choose environments by writing their IDs to the following string
### possible are: SPM-mea-v2 SPM-time-v2 SPM-long-mea-v2 
### SPM-long-time-v2 SPM-long-damped-mea-v0 SPM-long-strong-damped-mea-v0 
### SPM-long-damped-time-v0 SPM-long-strong-damped-time-v0 SPM-T2-time-v0 
### SPM-T2-mea-v0 SPM-multi-mea-v0 SPM-multi-time-v0
env_ids="SPM-mea-v2 SPM-time-v2 SPM-long-damped-mea-v0 SPM-long-strong-damped-mea-v0 SPM-long-damped-time-v0 SPM-long-strong-damped-time-v0 SPM-multi-mea-v0 SPM-multi-time-v0"
# env_ids="SPM-time-v2"         ### <--- use this for testing
# env_ids="SPM-mea-v2 SPM-time-v2"

### choose heuristics by writing their IDs to the following string
### possible are: exp-sparse inv-sqrt-of-cov-norm PGH CEM
### must include CEM if CEM will be used as pre_id (see below)
heu_ids="exp-sparse inv-sqrt-of-cov-trace PGH CEM"

### choose heuristics or CEM which are then used for pretraining TRPO 
# possible are: exp-sparse inv-sqrt-of-cov-norm inv-sqrt-of-cov-trace PGH CEM
pre_ids="inv-sqrt-of-cov-trace PGH CEM"

### choose random seeds for different trainings of CEM and TRPO
### use integers between 1 and 200
### (n_seeds_cem=3 means CEM is trained 3 times for each environment)
n_seeds_cem=5
n_seeds_trpo=5

### numer of iteration for CEM/TRPO training
n_iter_cem=1000
n_iter_trpo=500
batch_size_trpo=1000

### pretraining parameters
n_traj=10000
n_epoch=10000
n_batch=100

### numer of trajectories (runs) when sampling from a heuristic/agent
n_sample=10000

### number of cores used per job
n_cores=20
### wall time for jobs


### work directory
work_dir="/beegfs/work/"

### queue name
queue_name="qoptics"
queue_name_2="qoptics"

# CEM and TRPO training is done even if no heu_ids given

############
############


# write the environment variables as strings to pass them to jobs
home='$'HOME

# sample from heuristics
for env_id in $env_ids
do
for heu_id in $heu_ids
do
if [ "${heu_id}" == "CEM" ]
then
   # CEM training
   jobs_to_wait_for=()
   for seed in $(seq 1 $n_seeds_cem)
   do
   
   rand_name=$(mktemp --dry-run run-trainingCEM-${env_id}-${heu_id}-${seed}-XXXXXXXX)
   mkdir ${work_dir}${rand_name}
   ############ begin job
   job=`qsub - << EOJ
   
   #!/bin/bash
   #PBS -q ${queue_name}
   #PBS -lnodes=1:ppn=${n_cores}:${queue_name}
   #PBS -lwalltime=20:00:00:00
   #PBS -lmem=4gb
   #PBS -N run_trainingCEM
   
   
   cp -R NeuralNetHeuristics/* ${work_dir}${rand_name}
   echo ${rand_name}
   echo "finished copying"
   
   cd ${work_dir}${rand_name}
   
   ### load modules
   module load chains/GNU-4.9
   module load mpi/openmpi/2.1-gnu-4.9
   
   ### activate conda environment
   source activate rlenv
   echo "after activation:"
   conda info --envs
   
   python trainingCEM.py --env ${env_id} --niter ${n_iter_cem} --seed ${seed} --cores ${n_cores}
   python sampling.py --env ${env_id} --heu CEM --seed ${seed} --reps ${n_sample} --path results/ --cores ${n_cores}   
   
   echo "finished calculation"
   ### Copying results back to result directories
   
   cp agents/* ${home}/training_results
   cp results/* ${home}/sampling_results
   
   cd ${home}
   ### Cleaning up to be on the save side
   # remove_name="${work_dir}${rand_name}"
   # rm -r "${remove_name}"
   
EOJ
   `
   ############ end job
   jobs_to_wait_for+=("$job")
   
   echo "JobID = ${job} for CEM training with seed ${seed} with env ${env_id} submitted on `date`"
   
   done # seeds cem loop
   
   # Write content of array (the jobIDs) to colon-separated string.
   jtwf=$(IFS=:; echo "${jobs_to_wait_for[*]}")
   
   # find best CEM agent (with respect to used seeds) which will be used
   # for pretraining TRPO (if pre_ids contains CEM)
   ############ begin job
   find_best_cem_job=`qsub - << EOJ
   #!/bin/bash
   #PBS -W depend=afterok:"$jtwf"
   #PBS -q ${queue_name_2}
   #PBS -lnodes=1:ppn=1:${queue_name_2}
   #PBS -lwalltime=20:00:00:00
   #PBS -lmem=4gb
   #PBS -N run_find_best_agentCEM
   cp sampling_results/${env_id}*CEM*.pkl NeuralNetHeuristics/results/
   cd NeuralNetHeuristics/
   ### load modules
   module load chains/GNU-4.9
   module load mpi/openmpi/2.1-gnu-4.9
   ### activate conda environment
   source activate rlenv
   echo "after activation:"
   conda info --envs
   
   python find_best_agent.py --env ${env_id} --heu CEM --path results/
   
   echo "finished calculation"
   # remove_name="${home}/NeuralNetHeuristics/results/*"
   # rm -r "${remove_name}"
   cd ${home}
EOJ
   `
   ############ end job
   echo "JobID = ${find_best_cem_job} to find best CEM agent for env ${env_id} submitted on `date`"
else
   rand_name=$(mktemp --dry-run run-sampling-${env_id}-${heu_id}-XXXXXXXX)
   mkdir ${work_dir}${rand_name}
   ############ begin job
   job=`qsub - << EOJ
   
   #!/bin/bash
   #PBS -q ${queue_name_2}
   #PBS -lnodes=1:ppn=${n_cores}:${queue_name_2}
   #PBS -lwalltime=20:00:00:00
   #PBS -lmem=4gb
   #PBS -N run_sampling
   
   
   cp -R NeuralNetHeuristics/* ${work_dir}${rand_name}
   echo ${rand_name}
   echo "finished copying"
   
   cd ${work_dir}${rand_name}
   
   ### load modules
   module load chains/GNU-4.9
   module load mpi/openmpi/2.1-gnu-4.9
   
   ### activate conda environment
   source activate rlenv
   echo "after activation:"
   conda info --envs
   
   python sampling.py --env ${env_id} --heu ${heu_id} --reps ${n_sample} --path results/ --cores ${n_cores} 
   
   echo "finished calculation"
   ### Copying results back to result directories
   
   cp results/* ${home}/sampling_results
   
   cd ${home}
   ### Cleaning up to be on the save side
   
   # remove_name="${work_dir}${rand_name}"
   # rm -r "${remove_name}"
   
EOJ
   `
   ############  end job
   echo "JobID = ${job} for sampling with env ${env_id} submitted on `date`"
fi
done # heu loop

# TRPO pretraining
for pre_id in ${pre_ids}
do
# pretraining
rand_name=$(mktemp --dry-run run-pretrainingTRPO-${env_id}-${pre_id}-XXXXXXXX)
mkdir ${work_dir}${rand_name}
if [ ${pre_id} == "CEM" ]
then
   ############ begin job
   pretrain_job=`qsub - << EOJ
   
   #!/bin/bash
   #PBS -W depend=afterok:"${find_best_cem_job}"
   #PBS -q ${queue_name}
   #PBS -lnodes=1:ppn=${n_cores}:${queue_name}
   #PBS -lwalltime=20:00:00:00
   #PBS -lmem=4gb
   #PBS -N run_pre_TRPO_CEM
   
   cp -R NeuralNetHeuristics/* ${work_dir}${rand_name}
   cp training_results/${env_id}* ${work_dir}${rand_name}/agents/
   
   echo ${rand_name}
    echo "pre ID:"
   echo ${pre_id}
   echo "finished copying"
   
   cd ${work_dir}${rand_name}
   
   ### load modules
   module load chains/GNU-4.9
   module load mpi/openmpi/2.1-gnu-4.9
   
   ### activate conda environment
   source activate rlenv
   echo "after activation:"
   conda info --envs
   
   python pretraining.py --env ${env_id} --heu ${pre_id} --nbatch ${n_batch} --ntraj ${n_traj} --nepoch ${n_epoch} --cores ${n_cores}
   python sampling.py --env ${env_id} --heu TRPO --pre ${pre_id} --preonly True --reps ${n_sample} --path results/ --cores ${n_cores}
   
   echo "finished calculation"
   ### Copying results back to result directories
   
   cp agents/* ${home}/pretraining_results
   cp results/* ${home}/sampling_results
   
   cd ${home}
   ### Cleaning up to be on the save side
   # remove_name="${work_dir}${rand_name}"
   # rm -r "${remove_name}"
   
EOJ
   `
   ############ end job
else
   ############ begin job
   pretrain_job=`qsub - << EOJ
   #!/bin/bash
   #PBS -W depend=afterok:"${find_best_cem_job}"
   #PBS -q ${queue_name}
   #PBS -lnodes=1:ppn=${n_cores}:${queue_name}
   #PBS -lwalltime=20:00:00:00
   #PBS -lmem=4gb
   #PBS -N run_pre_TRPO
   
   cp -R NeuralNetHeuristics/* ${work_dir}${rand_name}
   
   echo ${rand_name}
   echo "pre ID:"
   echo ${pre_id}
   echo "finished copying"
   
   cd ${work_dir}${rand_name}
   
   ### load modules
   module load chains/GNU-4.9
   module load mpi/openmpi/2.1-gnu-4.9
   
   ### activate conda environment
   source activate rlenv
   echo "after activation:"
   conda info --envs
   
   python pretraining.py --env ${env_id} --heu ${pre_id} --nbatch ${n_batch} --ntraj ${n_traj} --nepoch ${n_epoch} --cores ${n_cores}
   python sampling.py --env ${env_id} --heu TRPO --pre ${pre_id} --preonly True --reps ${n_sample} --path results/ --cores ${n_cores}
   
   echo "finished calculation"
   ### Copying results back to result directories
   
   cp agents/* ${home}/pretraining_results
   cp results/* ${home}/sampling_results
   
   cd ${home}
   ### Cleaning up to be on the save side
   # remove_name="${work_dir}${rand_name}"
   # rm -r "${remove_name}"
   
EOJ
   `
   ############ end job
fi
echo "JobID = ${pretrain_job} for pretraining with env ${env_id} submitted on `date`"

# training TRPO
for seed in $(seq 1 $n_seeds_trpo)
do
rand_name=$(mktemp --dry-run run-trainingTRPO-${env_id}-${pre_id}-${seed}-XXXXXXXX)
mkdir ${work_dir}${rand_name}
############ begin job
job=`qsub - << EOJ

#!/bin/bash
#PBS -W depend=afterok:"${pretrain_job}"
#PBS -q ${queue_name}
#PBS -lnodes=1:ppn=${n_cores}:${queue_name}
#PBS -lwalltime=20:00:00:00
#PBS -lmem=4gb
#PBS -N run_trainingTRPO


cp -R NeuralNetHeuristics/* ${work_dir}${rand_name}
cp -R pretraining_results/${env_id}*${pre_id}*pretrain_only* ${work_dir}${rand_name}/agents/
echo ${rand_name}
echo "finished copying"

cd ${work_dir}${rand_name}

### load modules
module load chains/GNU-4.9
module load mpi/openmpi/2.1-gnu-4.9

### activate conda environment
source activate rlenv
echo "after activation:"
conda info --envs

mpirun -np ${n_cores} python trainingTRPO.py --env ${env_id} --pre ${pre_id} --batchsize ${batch_size_trpo} --niter ${n_iter_trpo} --seed ${seed}
python sampling.py --env ${env_id} --heu TRPO --pre ${pre_id} --seed ${seed} --reps ${n_sample} --path results/ --cores ${n_cores}

echo "finished calculation"
### Copying results back to result directories

cp agents/* ${home}/training_results/
cp results/* ${home}/sampling_results/

cd ${home}
### Cleaning up to be on the save side
# remove_name="${work_dir}${rand_name}"
# rm -r "${remove_name}"

EOJ
`
############ end job
echo "JobID = ${job} for TRPO training with seed ${seed} with env ${env_id} submitted on `date`"
done # seeds trpo loop
done # pre loop
done # env loop

echo "all jobs submitted"