src/hg/makeDb/doc/susScr2.txt 1.1
1.1 2010/03/25 20:08:11 hiram
initial browser up and running
Index: src/hg/makeDb/doc/susScr2.txt
===================================================================
RCS file: src/hg/makeDb/doc/susScr2.txt
diff -N src/hg/makeDb/doc/susScr2.txt
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/makeDb/doc/susScr2.txt 25 Mar 2010 20:08:11 -0000 1.1
@@ -0,0 +1,81 @@
+# for emacs: -*- mode: sh; -*-
+
+# $Id$
+
+# Sus scrofa - SGSC Sscrofa9.2 NCBI project 10718, CM000812
+# ftp://ftp.ncbi.nlm.nih.gov:genbank/genomes/Eukaryotes/vertebrates_mammals/Sus_scrofa/Sscrofa9.2/
+
+##########################################################################
+# Download sequence (DONE - 2010-03-03 Chin)
+ mkdir /hive/data/genomes/susScr2
+ cd /hive/data/genomes/susScr2
+ mkdir genbank
+ cd genbank
+ mkdir Sscrofa9.2
+ cd Sscrofa9.2
+wget --timestamping -r --cut-dirs=6 --level=0 -nH -x --no-remove-listing -np \
+"ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Sus_scrofa/Sscrofa9.2/*"
+ cd ..
+
+ mkdir ucscChr
+ # stay at genbank directory
+ # fixup the accession names to become UCSC chrom names
+
+export S=Sscrofa9.2/Primary_Assembly/assembled_chromosomes
+cut -f2 ${S}/chr2acc | while read ACC
+do
+ C=`grep "${ACC}" ${S}/chr2acc | cut -f1`
+ echo "${ACC} -> chr${C}"
+ zcat ${S}/AGP/chr${C}.comp.agp.gz \
+ | sed -e "s/^${ACC}/chr${C}/" | gzip > ucscChr/chr${C}.agp.gz
+done
+
+export S=Sscrofa9.2/Primary_Assembly/assembled_chromosomes
+cut -f2 ${S}/chr2acc | while read ACC
+do
+ C=`grep "${ACC}" ${S}/chr2acc | cut -f1`
+ echo "${ACC} -> chr${C}"
+ echo ">chr${C}" > ucscChr/chr${C}.fa
+ zcat ${S}/FASTA/chr${C}.fa.gz | grep -v "^>" >> ucscChr/chr${C}.fa
+ gzip ucscChr/chr${C}.fa &
+done
+ # Check them with faSize
+ faSize Sscrofa9.2/Primary_Assembly/assembled_chromosomes/FASTA/chr*.fa.gz
+ # 2262484801 bases (31203023 N's 2231281778 real 2231281778 upper
+ # 0 lower) in 19 sequences in 19 files
+ faSize ucscChr/chr*.fa.gz
+ # 2262484801 bases (31203023 N's 2231281778 real 2231281778 upper
+ # 0 lower) in 19 sequences in 19 files
+
+#########################################################################
+# Initial makeGenomeDb.pl (DONE - 2009-11-06 - Hiram)
+ cd /hive/data/genomes/susScr2
+ cat << '_EOF_' > susScr2.config.ra
+# Config parameters for makeGenomeDb.pl:
+db susScr2
+clade mammal
+genomeCladePriority 35
+scientificName Sus scrofa
+commonName Pig
+assemblyDate Nov. 2009
+assemblyLabel SGSC Sscrofa9.2 (NCBI project 10718, GCA_000003025.2)
+assemblyShortLabel SGSC Sscrofa9.2
+orderKey 234
+mitoAcc NC_012095
+fastaFiles /hive/data/genomes/susScr2/genbank/ucscChr/chr*.fa.gz
+agpFiles /hive/data/genomes/susScr2/genbank/ucscChr/chr*.agp.gz
+# qualFiles none
+dbDbSpeciesDir pig
+taxId 9823
+'_EOF_'
+ # << happy emacs
+
+ time makeGenomeDb.pl -noGoldGapSplit -workhorse=hgwdev susScr2.config.ra \
+ > makeGenomeDb.log 2>&1
+ # real 9m0.673s
+
+ # add the trackDb entries to the source tree, and the 2bit link:
+ ln -s `pwd`/susScr2.unmasked.2bit /gbdb/susScr2/susScr2.2bit
+ # browser should function now
+
+#########################################################################