851385903f257839da60df1ba89e525360742b4a
braney
  Wed Dec 8 14:23:24 2021 -0800
add JASPAR 2022 core non-redundant PWMs to hgFixed

diff --git src/hg/makeDb/doc/hgFixed.txt src/hg/makeDb/doc/hgFixed.txt
index 99ca174..54518c8 100644
--- src/hg/makeDb/doc/hgFixed.txt
+++ src/hg/makeDb/doc/hgFixed.txt
@@ -1069,15 +1069,25 @@
 
     hgsql hg18 -e 'create database ctdBraney'
     hgsql ctdBraney < ~/kent/src/hg/lib/chem_gene_ixns.sql
 
     hgsql ctdBraney -e 'load data local infile "CTD_chem_gene_ixns.tsv" into table chem_gene_ixns'
 
 # create sorted data
 
     hgsql hg19 -N -e \
     'select x.geneSymbol, ChemicalId, count(distinct Interaction), ChemicalName from kgXref x, ctdBraney.chem_gene_ixns c where x.geneSymbol=c.GeneSymbol group by x.geneSymbol, ChemicalId'|\
     sort -k 1,1 -k 3,3nr -k 4,4 >ctdSorted.tab
 
     hgsql hgFixed < ~/kent/src/hg/lib/ctdSorted.sql
     hgsql hgFixed -e 'load data local infile "ctdSorted.tab" into table ctdSorted'
 
+#####
+# Jaspar 2022 PFM  (DONE 2021/12/08 braney)
+#####
+mkdir -p /hive/data/outside/jaspar/2022/all
+cd  /hive/data/outside/jaspar/2022/all
+wget "https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_non-redundant_pfms_jaspar.zip"
+unzip JASPAR2022_CORE_non-redundant_pfms_jaspar.zip
+for i in *.jaspar; do f=`awk '{print $1; exit}' $i | tr -d '>'`;  tail -n +1  $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}'  > /tmp/1;  tail -n +2   $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) printf "%d\t", $ii; printf "\n"}' >> /tmp/1; tail -n +1 $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}' | awk '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%d\t",mat[ii][jj]; printf "\n";}}'  /tmp/1 | awk '{for (ii=2; ii <= NF; ii++) printf "%f\t",$ii/$1; printf "\n";}' | awk -v name=$f '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {printf "%s\t%d\t",name,NR;for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%g,",mat[ii][jj]; printf "\t";}} END {printf "\n"}'  ; done > jasparMotif.tab
+
+hgLoadSqlTab hgFixed jasparCore2022 ~/kent/src/hg/lib/dnaMotif.sql jasparMotif.tab