Training AUGUSTUS with BUSCO

First attempt at training AUGUSTUS with BUSCO


Config parameters

# This is the BUSCOv5 configuration file template.
# It is not necessary to use this, as BUSCO will use the dependencies available on your PATH by default.
# The busco run parameters can all be set on the command line. See the help prompt (busco -h) for details.
# To use this file for an alternative configuration, or to specify particular versions of dependencies:
# 1) edit the path and command values to match your desired dependency versions.
#    WARNING: passing a parameter through the command line overrides the value specified in this file.
# 2) Enable a parameter by removing ";"
# 3) Make this config file available to BUSCO either by setting an environment variable
#                   export BUSCO_CONFIG_FILE="/path/to/myconfig.ini"
#    or by passing it as a command line argument
#                   busco <args> --config /path/to/config.ini
# Input file
;in = /path/to/input_file.fna
# Run name, used in output files and folder
;out = BUSCO_run
# Where to store the output directory
;out_path = /path/to/output_folder
# Path to the BUSCO dataset
;lineage_dataset = /data/putnamlab/shared/busco/downloads
# Which mode to run (genome / proteins / transcriptome)
;mode = genome
# Run lineage auto selector
;auto-lineage = True
# Run auto selector only for non-eukaryote datasets
;auto-lineage-prok = True
# Run auto selector only for eukaryote datasets
;auto-lineage-euk = True
# How many threads to use for multithreaded steps
;cpu = 16
# Force rewrite if files already exist (True/False)
;force = False
# Restart a previous BUSCO run (True/False)
;restart = False
# Blast e-value
;evalue = 1e-3
# How many candidate regions (contigs, scaffolds) to consider for each BUSCO
;limit = 3
# Metaeuk parameters for initial run
# Metaeuk parameters for rerun
# Augustus parameters
# Quiet mode (True/False)
;quiet = False
# Local destination path for downloaded lineage datasets
;download_path = /data/putnamlab/shared/busco/downloads/
# Run offline
# Ortho DB Datasets version
;datasets_version = odb10
# URL to BUSCO datasets
;download_base_url =
# Download most recent BUSCO data and files
;update-data = True
# Use Augustus gene predictor instead of metaeuk
;use_augustus = True

path = /opt/software/BLAST+/2.11.0-gompi-2020b/bin/
command = tblastn

path = /opt/software/BLAST+/2.11.0-gompi-2020b/bin/
command = makeblastdb

path = /opt/software/MetaEuk/4-GCC-10.2.0/bin/
command = metaeuk

path = /opt/software/AUGUSTUS/3.4.0-foss-2020b/bin/
command = augustus

path = /opt/software/AUGUSTUS/3.4.0-foss-2020b/bin/
command = etraining

path = /opt/software/AUGUSTUS/3.4.0-foss-2020b/scripts/
command =

path = /opt/software/AUGUSTUS/3.4.0-foss-2020b/scripts/
command =

path = /opt/software/AUGUSTUS/3.4.0-foss-2020b/scripts/
command =

path = /opt/software/HMMER/3.3.2-gompi-2020b/bin/

path = /opt/software/SEPP/4.4.0-foss-2020b/bin
command =

path = /opt/software/prodigal/2.6.3-GCCcore-10.2.0/bin
command = prodigal



#SBATCH --job-name="BUSCO"
#SBATCH -t 100:00:00
#SBATCH --export=NONE
#SBATCH --nodes=1 --ntasks-per-node=20
#SBATCH -D /data/putnamlab/kevin_wong1/Past_Genome/AUGUSTUS_BUSCO
#SBATCH --mem=500GB

echo "Starting BUSCO" $(date)

#load modules
module load BUSCO/5.2.2-foss-2020b

#run BUSCO
busco \
--config config.ini \
--in ../past_filtered_assembly.fasta \
--out past_geome_AUG_BUSCO \
-l busco_downloads/metazoa_odb10 \
-m genome \
-f \
--long \
--augustus_parameters='--progress=true' \

echo "BUSCO Mission complete!" $(date)

sbatch /data/putnamlab/kevin_wong1/Past_Genome/

ERROR MESSAGE: Metaeuk did not recognize any genes matching the dataset metazoa_odb10 in the input file. If this is unexpected, check your input file and your installation of Metaeuk

trying this script again but defining Metaeuk disk space metaeuk disk space parameters


#SBATCH --job-name="BUSCO"
#SBATCH -t 100:00:00
#SBATCH --export=NONE
#SBATCH --nodes=1 --ntasks-per-node=20
#SBATCH -D /data/putnamlab/kevin_wong1/Past_Genome/AUGUSTUS_BUSCO
#SBATCH --mem=500GB

echo "Starting BUSCO" $(date)

#load modules
module load BUSCO/5.2.2-foss-2020b

#run BUSCO
busco \
--config config.ini \
--in ../past_filtered_assembly.fasta \
--out past_geome_AUG_BUSCO \
-l busco_downloads/metazoa_odb10 \
-m genome \
-f \
--long \
--augustus_parameters='--progress=true' \
--metaeuk_parameters="--disk-space-limit=500M,--remove-tmp-files=1" --metaeuk_rerun_parameters="--disk-space-limit=500M,--remove-tmp-files=1" \

echo "BUSCO Mission complete!" $(date)

sbatch /data/putnamlab/kevin_wong1/Past_Genome/

Job ID: 94134

Started: Oct 25, 11:15

Written on October 21, 2021