src/hg/makeDb/doc/sacCer2.txt 1.17

1.17 2010/02/05 06:54:06 kuhn
grammar
Index: src/hg/makeDb/doc/sacCer2.txt
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/makeDb/doc/sacCer2.txt,v
retrieving revision 1.16
retrieving revision 1.17
diff -b -B -U 4 -r1.16 -r1.17
--- src/hg/makeDb/doc/sacCer2.txt	2 Feb 2010 18:56:32 -0000	1.16
+++ src/hg/makeDb/doc/sacCer2.txt	5 Feb 2010 06:54:06 -0000	1.17
@@ -189,9 +189,9 @@
 
     ldHgGene -gtf sacCer2 sgdGene sacCer2.sgdGene.gtf
     hgLoadBed sacCer2 sgdOther otherFeatures.bed \
         -tab -sqlTable=$HOME/kent/src/hg/lib/sgdOther.sql
-    #	this perl script will fixup the fasta header lines for
+    #	this perl script will fix up the fasta header lines for
     #	the chr*.peptides.fsa files to run into hgSgdPep
     cat << '_EOF_' > filter.pl
 #!/usr/bin/env perl
 
@@ -205,9 +205,9 @@
 while (my $line=<>) {
     if ($line =~ m/^>Annotated\|/) {
         $inAnnotation = 1;
         my (@words) = split('\s+', $line);
-        die "can not find four fields in\n'$line'" if (scalar(@words) < 4);
+        die "cannot find four fields in\n'$line'" if (scalar(@words) < 4);
         my $name = $words[3];
         $name =~ s/;.*//;
         $name =~ s#/.*##;
         printf ">ORFP:%s\n", $name;
@@ -302,8 +302,11 @@
     hgsql sacCer2 -e 'load data local infile "sgdToSwissProt.txt" \
           into table sgdToSwissProt;'
     hgProtIdToGenePred sacCer2 sgdGene sgdToSwissProt name value
 
+# interesting to note that sgdTOSwissProt has one accession not
+# in sgdGene.name:  KHS1  (BK 2009-07-14)
+
 ############################################################################
 # CREATE SGD-BASED CLONE TRACK (DONE - 2009-02-10 - Hiram)
     mkdir /hive/data/genomes/sacCer2/bed/sgdClone
     cd /hive/data/genomes/sacCer2/bed/sgdClone