BUSCO on Pastreoides reference transcriptomes

Creating new ref folders

mkdir Past_mansour

cd Past_mansour

Uploading reference transcriptome and annotations from Mansour

wget https://ftp.cngb.org/pub/gigadb/pub/10.5524/100001_101000/100207/p_ast2016.fasta

wget https://ftp.cngb.org/pub/gigadb/pub/10.5524/100001_101000/100207/Por_ast_ann_allTrans.fasta

BUSCO shell script for Mansour

#!/bin/bash

#SBATCH --job-name="busco"
#SBATCH --time="100:00:00"
#SBATCH --nodes 1 --ntasks-per-node=20
#SBATCH --mem=250G
##SBATCH --output="busco-%u-%x-%j"
##SBATCH --account=putnamlab
##SBATCH --export=NONE

echo "START" $(date)

labbase=/data/putnamlab
busco_shared="${labbase}/shared/busco"
[ -z "$query" ] && query="/data/putnamlab/kevin_wong1/REFS/Past_Mansour/p_ast2016.fasta" # set this to the query (genome/transcriptome) you are running
[ -z "$db_to_compare" ] && db_to_compare="${busco_shared}/downloads/lineages/metazoa_odb10"

source "${busco_shared}/scripts/busco_init.sh"  # sets up the modules required for this in the right order

# we require the agustus_config/ directory copied to a "writetable" location for
# busco to run and AUGUSTUS_CONFIG_PATH set to that

if [ ! -d "${labbase}/${USER}/agustus_config" ] ; then
    echo -e "Copying agustus_config/ to ${labbase}/${USER} .. "
    tar -C "${labbase}/${USER}" -xzf "${busco_shared}/agustus_config.tgz"
    echo done
fi

export AUGUSTUS_CONFIG_PATH="${labbase}/${USER}/agustus_config"
# This will generate output under your $HOME/busco_output
cd "${labbase}/${USER}"
busco --config "${busco_shared}/scripts/busco-config.ini"  -f -c 20 --long -i "${query}" -l "${db_to_compare}" -o busco_output_Past_Mansour -m transcriptome

echo "STOP" $(date)

Running BUSCO for Mansour Transcriptome

sbatch -o ~/%u-%x.%j.out -e ~/%u-%x.%j.err --mail-type=BEGIN,END,FAIL --mail-user=kevin_wong1@uri.edu \
--export query=/data/putnamlab/kevin_wong1/REFS/Past_Mansour/p_ast2016.fasta  \
/data/putnamlab/kevin_wong1/scripts/run-busco-transcriptome_Past_mansour.sh

Mansour Output

# BUSCO version is: 4.0.6
# The lineage dataset is: metazoa_odb10 (Creation date: 2019-11-20, number of species: 65, number of BUSCOs: 954)
# Summarized benchmarking in BUSCO notation for file /data/putnamlab/kevin_wong1/REFS/Past_Mansour/p_ast2016.fasta
# BUSCO was run in mode: transcriptome

        ***** Results: *****

        C:30.5%[S:19.7%,D:10.8%],F:5.1%,M:64.4%,n:954
        291     Complete BUSCOs (C)
        188     Complete and single-copy BUSCOs (S)
        103     Complete and duplicated BUSCOs (D)
        49	Fragmented BUSCOs (F)
        614     Missing BUSCOs (M)
        954     Total BUSCO groups searched

Running BUSCO for Kenkel

#!/bin/bash

#SBATCH --job-name="busco"
#SBATCH --time="100:00:00"
#SBATCH --nodes 1 --ntasks-per-node=20
#SBATCH --mem=250G
##SBATCH --output="busco-%u-%x-%j"
##SBATCH --account=putnamlab
##SBATCH --export=NONE

echo "START" $(date)

labbase=/data/putnamlab
busco_shared="${labbase}/shared/busco"
[ -z "$query" ] && query="/data/putnamlab/kevin_wong1/REFS/Past/Kenkel2013_past_transcriptome.fasta" # set this to the query (genome/transcriptome) you are running
[ -z "$db_to_compare" ] && db_to_compare="${busco_shared}/downloads/lineages/metazoa_odb10"

source "${busco_shared}/scripts/busco_init.sh"  # sets up the modules required for this in the right order

# we require the agustus_config/ directory copied to a "writetable" location for
# busco to run and AUGUSTUS_CONFIG_PATH set to that

if [ ! -d "${labbase}/${USER}/agustus_config" ] ; then
    echo -e "Copying agustus_config/ to ${labbase}/${USER} .. "
    tar -C "${labbase}/${USER}" -xzf "${busco_shared}/agustus_config.tgz"
    echo done
fi

export AUGUSTUS_CONFIG_PATH="${labbase}/${USER}/agustus_config"
# This will generate output under your $HOME/busco_output
cd "${labbase}/${USER}"
busco --config "${busco_shared}/scripts/busco-config.ini"  -f -c 20 --long -i "${query}" -l "${db_to_compare}" -o busco_output_Past_kenkel -m transcriptome

echo "STOP" $(date)

Running BUSCO for Kenkel Transcriptome

sbatch -o ~/%u-%x.%j.out -e ~/%u-%x.%j.err --mail-type=BEGIN,END,FAIL --mail-user=kevin_wong1@uri.edu \
--export query=/data/putnamlab/kevin_wong1/REFS/Past/Kenkel2013_past_transcriptome.fasta  \
/data/putnamlab/kevin_wong1/scripts/run-busco-transcriptome_Past_Kenkel.sh

Kenkel output

# BUSCO version is: 4.0.6
# The lineage dataset is: metazoa_odb10 (Creation date: 2019-11-20, number of species: 65, number of BUSCOs: 954)
# Summarized benchmarking in BUSCO notation for file /data/putnamlab/kevin_wong1/REFS/Past/Kenkel2013_past_transcriptome.fasta
# BUSCO was run in mode: transcriptome

        ***** Results: *****

        C:23.6%[S:22.7%,D:0.9%],F:30.9%,M:45.5%,n:954
        226     Complete BUSCOs (C)
        217     Complete and single-copy BUSCOs (S)
        9	Complete and duplicated BUSCOs (D)
        295     Fragmented BUSCOs (F)
        433     Missing BUSCOs (M)
        954     Total BUSCO groups searched
Written on September 30, 2021