52e47105152b7e5dd2621b9e8439446e0d77336f
braney
  Mon Oct 23 12:01:11 2023 -0700
build Jaspar 2024 motif table

diff --git src/hg/makeDb/doc/hgFixed.txt src/hg/makeDb/doc/hgFixed.txt
index 54518c8..b226141 100644
--- src/hg/makeDb/doc/hgFixed.txt
+++ src/hg/makeDb/doc/hgFixed.txt
@@ -1079,15 +1079,26 @@
     sort -k 1,1 -k 3,3nr -k 4,4 >ctdSorted.tab
 
     hgsql hgFixed < ~/kent/src/hg/lib/ctdSorted.sql
     hgsql hgFixed -e 'load data local infile "ctdSorted.tab" into table ctdSorted'
 
 #####
 # Jaspar 2022 PFM  (DONE 2021/12/08 braney)
 #####
 mkdir -p /hive/data/outside/jaspar/2022/all
 cd  /hive/data/outside/jaspar/2022/all
 wget "https://jaspar.genereg.net/download/data/2022/CORE/JASPAR2022_CORE_non-redundant_pfms_jaspar.zip"
 unzip JASPAR2022_CORE_non-redundant_pfms_jaspar.zip
 for i in *.jaspar; do f=`awk '{print $1; exit}' $i | tr -d '>'`;  tail -n +1  $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}'  > /tmp/1;  tail -n +2   $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) printf "%d\t", $ii; printf "\n"}' >> /tmp/1; tail -n +1 $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}' | awk '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%d\t",mat[ii][jj]; printf "\n";}}'  /tmp/1 | awk '{for (ii=2; ii <= NF; ii++) printf "%f\t",$ii/$1; printf "\n";}' | awk -v name=$f '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {printf "%s\t%d\t",name,NR;for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%g,",mat[ii][jj]; printf "\t";}} END {printf "\n"}'  ; done > jasparMotif.tab
 
 hgLoadSqlTab hgFixed jasparCore2022 ~/kent/src/hg/lib/dnaMotif.sql jasparMotif.tab
+
+#####
+# Jaspar 2024 PFM  (DONE 2024/10/23 braney)
+#####
+mkdir -p /hive/data/outside/jaspar/2024/all
+cd  /hive/data/outside/jaspar/2024/all
+wget "https://testjaspar.uio.no/download/data/2024/CORE/JASPAR2024_CORE_non-redundant_pfms_jaspar.zip"
+unzip JASPAR2024_CORE_non-redundant_pfms_jaspar.zip
+for i in *.jaspar; do f=`awk '{print $1; exit}' $i | tr -d '>'`;  tail -n +1  $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}'  > /tmp/1;  tail -n +2   $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) printf "%d\t", $ii; printf "\n"}' >> /tmp/1; tail -n +1 $i | awk '{count=NF - 3;  for (ii=3; ii <= NF - 1; ii++) counts[ii - 3] += $ii;} END {for(ii=0; ii < count; ii++) printf "%d\t", counts[ii]; printf "\n"}' | awk '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%d\t",mat[ii][jj]; printf "\n";}}'  /tmp/1 | awk '{for (ii=2; ii <= NF; ii++) printf "%f\t",$ii/$1; printf "\n";}' | awk -v name=$f '{numCols=NF; for(ii=1;ii <= numCols; ii++) mat[NR-1][ii] = $ii;} END {printf "%s\t%d\t",name,NR;for(jj=1; jj <= numCols; jj++) {for(ii=0; ii < NR; ii++) printf "%g,",mat[ii][jj]; printf "\t";}} END {printf "\n"}'  ; done > jasparMotif.tab
+
+hgLoadSqlTab hgFixed jasparCore2024 ~/kent/src/hg/lib/dnaMotif.sql jasparMotif.tab