Orthofinder between coral species for cell type annotation

Orthofinder

Comp_Name Comp_Number Speices_1 Species_2 Speices_3 Species_4 Speices_5 Species_6 Species_7
Gfas_Spis 1 spis gfas          
Pdam_Spis 2 spis pdam          
Acer_Spis 3 spis acer          
Coral_ortho 4 spis gfas pdam acer oarb    
Cnid_ortho 5 spis gfas pdam acer oarb nvec xenia
Stemcell 6 nvec pdam acer        
source anaconda3/bin/activate 

conda create -n orthofinder_env -y -c bioconda orthofinder diamond fasttree
conda activate orthofinder_env

cd /scratch/projects/dark_genes
mkdir orthofinder_comps

Paths:

Spis: /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa
Gfas: /nethome/kxw755/genomes/GCA_948470475.1_Gfas/data/GCA_948470475.1/gfas_1.0.proteins.fasta
Acer: /nethome/kxw755/genomes/GCA_032359415.1_Acer/acer_protein.faa
Pdam: /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta
Apoc: /nethome/kxw755/genomes/Apoc_14110456/apoculata_proteins.fasta.gz
Xenia: /nethome/kxw755/genomes/GCF_021976095.1_Xenia/xenia_protein.faa
Nvec: /nethome/kxw755/genomes/GCF_932526225.1_Nvec/nvec_protein.faa

Gfas_markers

mkdir Gfas_Spis
cd Gfas_Spis

ln -s /nethome/kxw755/genomes/GCA_948470475.1_Gfas/data/GCA_948470475.1/gfas_1.0.proteins.fasta
ln -s /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa

mkdir results
nano of_gfas_spis.job
#!/bin/bash

#BSUB -J OF_Gfas_Spis
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_Gfas_Spis_%J.out
#BSUB -e OF_Gfas_Spis_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Gfas_Spis"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Gfas_Spis/results"  # Update this with the desired output directory
THREADS=4  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

Pdam_markers

mkdir Pdam_Spis
cd Pdam_Spis

ln -s /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta
ln -s /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa

mkdir results
nano of_pdam_spis.job
#!/bin/bash

#BSUB -J OF_Pdam_Spis
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_Pdam_Spis_%J.out
#BSUB -e OF_Pdam_Spis_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Pdam_Spis"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Pdam_Spis/results"  # Update this with the desired output directory
THREADS=8  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_pdam_spis.job

Acer_markers

mkdir Acer_Spis
cd Acer_Spis

ln -s /nethome/kxw755/genomes/GCA_032359415.1_Acer/acer_protein.faa
ln -s /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa

nano of_acer_spis.job
#!/bin/bash

#BSUB -J OF_Acer_Spis
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_Acer_Spis_%J.out
#BSUB -e OF_Acer_Spis_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Acer_Spis"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Acer_Spis/results"  # Update this with the desired output directory
THREADS=8  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_acer_spis.job

Coral_ortho

mkdir Coral_ortho
cd Coral_ortho

ln -s /nethome/kxw755/genomes/GCA_032359415.1_Acer/acer_protein.faa
ln -s /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa
ln -s /nethome/kxw755/genomes/GCA_948470475.1_Gfas/data/GCA_948470475.1/gfas_1.0.proteins.fasta
ln -s /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta
ln -s /nethome/kxw755/genomes/Apoc_14110456/apoculata_proteins.fasta

nano of_coral_ortho.job
#!/bin/bash

#BSUB -J OF_Coral_ortho
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_Coral_ortho_%J.out
#BSUB -e OF_Coral_ortho_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Coral_ortho"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Coral_ortho/results"  # Update this with the desired output directory
THREADS=8  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_coral_ortho.job

Cnid_ortho

mkdir Cnid_ortho
cd Cnid_ortho

ln -s /nethome/kxw755/genomes/GCA_032359415.1_Acer/acer_protein.faa
ln -s /nethome/kxw755/genomes/GCA_002571385.2_Spis/spis_protein.faa
ln -s /nethome/kxw755/genomes/GCA_948470475.1_Gfas/data/GCA_948470475.1/gfas_1.0.proteins.fasta
ln -s /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta
ln -s /nethome/kxw755/genomes/Apoc_14110456/apoculata_proteins.fasta
ln -s /nethome/kxw755/genomes/GCF_021976095.1_Xenia/xenia_protein.faa
ln -s /nethome/kxw755/genomes/GCF_932526225.1_Nvec/nvec_protein.faa

nano of_cnid_ortho.job
#!/bin/bash

#BSUB -J OF_cnid_ortho
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_cnid_ortho_%J.out
#BSUB -e OF_cnid_ortho_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Cnid_ortho"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Cnid_ortho/results"  # Update this with the desired output directory
THREADS=16  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_cnid_ortho.job

Stem_ortho

mkdir Stem_ortho
cd Stem_ortho

ln -s /nethome/kxw755/genomes/GCA_032359415.1_Acer/acer_protein.faa
ln -s /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta
ln -s /nethome/kxw755/genomes/GCF_932526225.1_Nvec/nvec_protein.faa

nano of_stem_ortho.job
#!/bin/bash

#BSUB -J OF_stem_ortho
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_stem_ortho_%J.out
#BSUB -e OF_stem_ortho_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Stem_ortho"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Stem_ortho/results"  # Update this with the desired output directory
THREADS=16  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_stem_ortho.job

scp -r kxw755@pegasus.ccs.miami.edu:/scratch/projects/dark_genes/orthofinder_comps/Pdam_Spis/results/Results_Dec24/Orthogroups/Orthogroups.tsv ./Pdam_Spis_Orthogroups.tsv

Gfas_Pdam

mkdir Gfas_Pdam
cd Gfas_Pdam

ln -s /nethome/kxw755/genomes/GCA_948470475.1_Gfas/data/GCA_948470475.1/gfas_1.0.proteins.fasta
ln -s /nethome/kxw755/genomes/pdam_genome/pdam_proteins.fasta

nano of_Gfas_Pdam.job
#!/bin/bash

#BSUB -J OF_Gfas_Pdam
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_Gfas_Pdam_%J.out
#BSUB -e OF_Gfas_Pdam_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Gfas_Pdam"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Gfas_Pdam/results"  # Update this with the desired output directory
THREADS=16  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_Gfas_Pdam.job

scp -r kxw755@pegasus.ccs.miami.edu:/scratch/projects/dark_genes/orthofinder_comps/Gfas_Pdam/results/Results_Dec27/Orthogroups/Orthogroups.tsv ./Gfas_Pdam_Orthogroups.tsv

Nvec_Nvec

mkdir Nvec_Nvec2
cd Nvec_Nvec2

ln -s /nethome/kxw755/genomes/GCF_932526225.1_Nvec/nvec_protein.faa
ln -s /nethome/kxw755/genomes/Nvec_200/NV2g.20240221.protein.fa

nano of_nvec_nvec.job
#!/bin/bash

#BSUB -J OF_nvec_nvec
#BSUB -q bigmem
#BSUB -P dark_genes
#BSUB -n 16
#BSUB -W 120:00
#BSUB -R "rusage[mem=15000]"
#BSUB -u kxw755@earth.miami.edu
#BSUB -o OF_nvec_nvec_%J.out
#BSUB -e OF_nvec_nvec_%J.err
#BSUB -B
#BSUB -N
###################################################################

# Parameters
GENOME_DIR="/scratch/projects/dark_genes/orthofinder_comps/Nvec_Nvec2"  # Update this with the path to your genome/proteome files
OUTPUT_DIR="/scratch/projects/dark_genes/orthofinder_comps/Nvec_Nvec2/results"  # Update this with the desired output directory
THREADS=16  # Adjust based on the number of available CPU cores

# Run OrthoFinder
echo "Running OrthoFinder..."
orthofinder -f "$GENOME_DIR" -o "$OUTPUT_DIR" -t $THREADS -a $THREADS

# Explanation of parameters:
# -f : Path to the folder containing genome/protein files
# -o : Path to the output folder
# -t : Number of threads for OrthoFinder to use
# -a : Number of threads for DIAMOND alignment

echo "OrthoFinder analysis complete. Results saved to $OUTPUT_DIR."

bsub < of_nvec_nvec.job

scp -r kxw755@pegasus.ccs.miami.edu:/scratch/projects/dark_genes/orthofinder_comps/Nvec_Nvec2/results/Results_Dec27/Orthogroups/Orthogroups.tsv ./Nvec_Nvec2_Orthogroups.tsv

Written on November 27, 2024