97199fbf1ff56c9ee01956deb736b97244ea3ac6
hiram
  Sun Mar 1 21:52:28 2020 -0800
calculating featureBits like measurement for gene tracks, removing duplicates for ncbiRefSeq, remove blanks from gene names for ncbiRefSeq, and fix fundamental bug reference to geneToId in ncbiRefSeqOtherAttrs.pl refs #23891

diff --git src/hg/utils/automation/doXenoRefGene.pl src/hg/utils/automation/doXenoRefGene.pl
index a522341..45f384d 100755
--- src/hg/utils/automation/doXenoRefGene.pl
+++ src/hg/utils/automation/doXenoRefGene.pl
@@ -284,30 +284,37 @@
   }
 
   my $whatItDoes = "Makes bigGenePred.bb file from filterPsl output.";
   my $bossScript = newBash HgRemoteScript("$runDir/makeGp.bash", $workhorse,
 				      $runDir, $whatItDoes);
 
   $bossScript->add(<<_EOF_
 export db=$db
 if [ -s "\$db.xenoRefGene.psl" ]; then
   grep NR_ \$db.xenoRefGene.psl > NR.psl
   grep NM_ \$db.xenoRefGene.psl > NM.psl
   mrnaToGene -cdsDb=hgFixed NM.psl NM.gp
   mrnaToGene -noCds NR.psl NR.gp
   cat NM.gp NR.gp | genePredSingleCover stdin \$db.xenoRefGene.gp
   genePredCheck -db=\$db -chromSizes=\$db.chrom.sizes \$db.xenoRefGene.gp
+  genePredToBed \$db.xenoRefGene.gp stdout \\
+    | bedToExons stdin stdout | bedSingleCover.pl stdin > \$db.exons.bed
+  export baseCount=`awk '{sum+=\$3-\$2}END{printf "%d", sum}' \$db.exons.bed`
+  export asmSizeNoGaps=`grep sequences ../../\$db.faSize.txt | awk '{print \$5}'`
+  export perCent=`echo \$baseCount \$asmSizeNoGaps | awk '{printf "%.3f", 100.0*\$1/\$2}'`
+  printf "%d bases of %d (%s%%) in intersection\\n" "\$baseCount" "\$asmSizeNoGaps" "\$perCent" > fb.\$db.xenoRefGene.txt
+  rm -f \$db.exons.bed
   genePredToBigGenePred -geneNames=$mrnas/geneOrgXref.txt \$db.xenoRefGene.gp \\
      stdout | sort -k1,1 -k2,2n > \$db.bgpInput
   sed -e 's#Alternative/human readable gene name#species of origin of the mRNA#; s#Name or ID of item, ideally both human readable and unique#RefSeq accession id#; s#Primary identifier for gene#gene name#;' \\
     \$HOME/kent/src/hg/lib/bigGenePred.as > xenoRefGene.as
   bedToBigBed -extraIndex=name,geneName -type=bed12+8 -tab -as=xenoRefGene.as \\
      \$db.bgpInput \$db.chrom.sizes \$db.xenoRefGene.bb
   \$HOME/kent/src/hg/utils/automation/xenoRefGeneIx.pl \$db.bgpInput | sort -u > \$db.ix.txt
   ixIxx \$db.ix.txt \$db.xenoRefGene.ix \$db.xenoRefGene.ixx
 fi
 _EOF_
   );
   $bossScript->execute();
 } # doMakeGp
 
 #########################################################################