62b1428915cc2c8b9acbf46a86c1248833257efe
mspeir
  Mon Nov 17 18:50:32 2025 -0800
changing rest of genome-source references to github, refs #34485

diff --git src/hg/utils/automation/aws/hubStart.txt src/hg/utils/automation/aws/hubStart.txt
index 913030d2687..d2c8e785ad1 100644
--- src/hg/utils/automation/aws/hubStart.txt
+++ src/hg/utils/automation/aws/hubStart.txt
@@ -1,175 +1,175 @@
 #!/bin/bash
 
 # on AWS and OpenStack, This script ends up in:
 #       /var/lib/cloud/instance/scripts/part-001
 # running as the 'root' user and is run during first start up of the machine
 # This script attempts to keep its own log file.  If anything leaks out from
 #   here, it will end up in:
 #    /var/log/cloud-init-output.log
 
 # start a log file to record information from the operation of this script:
 export logFile="/tmp/startUpScript.log.$$"
 
 # record environment to find out the operating conditions of this script
 
 printf "#### begin start up script log %s\n" "`date '+%s %F %T'`" > "${logFile}"
 printf "#### uname -a\n" >> "${logFile}"
 uname -a >> "${logFile}" 2>&1
 printf "#### df -h\n" >> "${logFile}"
 df -h >> "${logFile}" 2>&1
 printf "#### env\n" >> "${logFile}"
 env >> "${logFile}" 2>&1
 printf "#### set\n" >> "${logFile}"
 set >> "${logFile}" 2>&1
 printf "#### arp -a\n" >> "${logFile}"
 arp -a >> "${logFile}" 2>&1
 printf "#### ifconfig -a\n" >> "${logFile}"
 ifconfig -a >> "${logFile}" 2>&1
 printf "#### PATH\n" >> "${logFile}"
 printf "${PATH}\n" >> "${logFile}" 2>&1
 
 export homeDir="notFound"
 export nativeUser="notFound"
 
 # determine OpenStack or AWS environment
 if [ -d "/home/centos" ]; then
   nativeUser="centos"
 elif [ -d "/home/ec2-user" ]; then
   nativeUser="ec2-user"
 fi
 homeDir="/home/$nativeUser"
 
 
 if [ -s "${homeDir}/.bashrc" ]; then
   export startTime=`date "+%s"`
   printf "## parasol hub machine started install %s\n" "`date '+%s %F %T'`" >> /etc/motd
   export instanceId=`curl http://169.254.169.254/latest/meta-data/instance-id`
   # directories and symlinks to make this machine appear very much as it
   # would in the U.C. Santa Cruz Genome browser development environment
   mkdir -p /data/bin /data/scripts /data/genomes /data/bedtools /data/parasol/nodeInfo ${homeDir}/bin /hive /cluster/bin /scratch /genomes
   chmod 777 /data /scratch /hive /cluster /cluster/bin /genomes
   chmod 755 /data/bin /data/scripts /data/genomes /data/parasol ${homeDir}/bin
   chmod 755 ${homeDir}
   ln -s /data/bedtools /cluster/bin/bedtools
   ln -s /data /hive/data
   ln -s /dev/shm /scratch/tmp
 
   # these additions to .bashrc should protect themselves from repeating
   printf "export PATH=/data/bin:/data/scripts:${homeDir}/bin:\$PATH\n" >> "${homeDir}/.bashrc"
   printf "export LANG=C\n" >> "${homeDir}/.bashrc"
   printf "alias og='ls -ogrt'\n" >> "${homeDir}/.bashrc"
   printf "alias plb='parasol list batches'\n" >> "${homeDir}/.bashrc"
   printf "alias vi='vim'\n" >> "${homeDir}/.bashrc"
   printf "set -o vi\n" >> "${homeDir}/.bashrc"
   printf "export AWS_DEFAULT_OUTPUT=text\n" >> "${homeDir}/.bashrc"
   availZone=`ec2-metadata --availability-zone | sed -e 's/placement: *//;'`
   region=`echo ${availZone:0:${#availZone} - 1}`
   printf "export AWS_DEFAULT_REGION=%s\n" "${region}" >> "${homeDir}/.bashrc"
   printf "export AWS_ACCESS_KEY_ID=AWS_AccessKey\n" >> "${homeDir}/.bashrc"
   printf "export AWS_SECRET_ACCESS_KEY=AWS_SecretKey\n" >> "${homeDir}/.bashrc"
   printf "instId: %s, availZone: %s, region: %s\n" "${instanceId}" "${availZone}" "${region}" >> "${logFile}"
   printf "set background=dark\n" >> "${homeDir}/.vimrc"
 
   # install the wget command right away so the wgets can get done
   yum -y install wget >> "${logFile}" 2>&1
   # and the kent command line utilities into /data/bin/
   rsync -a --stats rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/ /data/bin/ >> "${logFile}" 2>&1
   # initParasol script to manage start/stop of parasol system
   wget -qO /data/parasol/initParasol 'http://genomewiki.ucsc.edu/images/4/4f/InitParasol.sh.txt' >> "${logFile}" 2>&1
   chmod 755 /data/parasol/initParasol
   # script in nodeInfo to be used by the nodes to report themselves
   wget -qO /data/parasol/nodeInfo/nodeReport.sh 'http://genomewiki.ucsc.edu/images/e/e3/NodeReport.sh.txt' >> "${logFile}" 2>&1
   chmod 755 /data/parasol/nodeInfo/nodeReport.sh
   # bedSingleCover.pl for use in running featureBits like measurements
-  wget -P /data/scripts 'https://genome-source.gi.ucsc.edu/gitlist/kent.git/raw/master/src/utils/bedSingleCover.pl' >> "${logFile}" 2>&1
+  wget -P /data/scripts 'https://github.com/ucscGenomeBrowser/kent/raw/master/src/utils/bedSingleCover.pl' >> "${logFile}" 2>&1
   chmod +x /data/scripts/bedSingleCover.pl
   chown -R ${nativeUser}:${nativeUser} /data "${homeDir}/bin" "${homeDir}/.vimrc"
 
   # ec2-user HOME/bin/attachVolume.sh used to attach extra data volume
   wget -qO "${homeDir}/bin/attachVolume.sh"  'http://genomewiki.ucsc.edu/images/2/2c/AwsAttachVolume.sh.txt' >> "${logFile}" 2>&1
   chmod 755 "${homeDir}/bin/attachVolume.sh"
   chown ${nativeUser}:${nativeUser} "${homeDir}/bin/attachVolume.sh"
   chown ${nativeUser}:${nativeUser} "/genomes"
 
   # and now can start the rest of yum installs, these take a while
   # useful to have the 'host' command, 'traceroute' and nmap ('nc')
   # to investigate the network, wget for transfers, and git for kent source tree
   # the git-all installs 87 packages, including perl, tcsh for the csh shell,
   # screen for terminal management, vim for editing convenience, bc for math
   yum -y update >> "${logFile}" 2>&1
   yum repolist >> "${logFile}" 2>&1
   yum -y install bind-utils traceroute nmap-ncat tcsh screen vim-X11 vim-common vim-enhanced vim-minimal git-all bc >> "${logFile}" 2>&1
   yum -y install epel-release >> "${logFile}" 2>&1
   # this python business allows the crispr calculation pipeline to function
   yum -y install python-pip gcc gcc-c++ zlib-devel python-devel tkinter libpng12 strace >> "${logFile}" 2>&1
   pip install --ignore-installed pytabix pandas twobitreader scipy matplotlib numpy >> "${logFile}" 2>&1
   pip install scikit-learn==0.16.1 Biopython xlwt >> "${logFile}" 2>&1
 
   # these systemctl commands may not be necessary
   # the package installs may have already performed these initalizations
   systemctl enable rpcbind >> "${logFile}" 2>&1
   systemctl enable nfs-server >> "${logFile}" 2>&1
   systemctl start rpcbind >> "${logFile}" 2>&1
   systemctl start nfs-server >> "${logFile}" 2>&1
   systemctl start nfs-lock >> "${logFile}" 2>&1
   systemctl start nfs-idmap >> "${logFile}" 2>&1
 
   # this business needs to be improved to be a single rsync from
   # a download directory.
-  git archive --format=tar --remote=git://genome-source.soe.ucsc.edu/kent.git \
+  git archive --format=tar --remote=https://github.com/ucscGenomeBrowser/kent.git \
       --prefix=kent/ HEAD src/hg/utils/automation \
         | tar vxf - -C /data/scripts --strip-components=5 \
       --exclude='kent/src/hg/utils/automation/incidentDb' \
       --exclude='kent/src/hg/utils/automation/configFiles' \
       --exclude='kent/src/hg/utils/automation/ensGene' \
       --exclude='kent/src/hg/utils/automation/genbank' \
       --exclude='kent/src/hg/utils/automation/lastz_D' \
       --exclude='kent/src/hg/utils/automation/openStack' >> "${logFile}" 2>&1
 
   chown -R ${nativeUser}:${nativeUser} /data/scripts >> "${logFile}" 2>&1
 
   # setup NFS exports
   export localIp=`curl http://169.254.169.254/latest/meta-data/local-ipv4`
   export privateIp=`ifconfig -a | grep broadcast | head -1 | awk '{print $2}'`
   export subNet=`ifconfig -a | grep broadcast | head -1 | awk '{print $2}' | awk -F'.' '{printf "%s.%s.0.0", $1,$2}'`
 
   printf "# localIp: '%s', instanceId: '%s'\n" "${localIp}" "${instanceId}" >> "${logFile}" 2>&1
   printf "# lsblk:\n" >> "${logFile}" 2>&1
   lsblk >> "${logFile}" 2>&1
   printf "# fdisk -l:\n" >> "${logFile}" 2>&1
   fdisk -l >> "${logFile}" 2>&1
 
   # special case OpenStack /mnt setups (temporary work around for difficulties)
   if [ "${nativeUser}" = "centos" ]; then
     umount /mnt >> "${logFile}" 2>&1
     parted -s /dev/vdb mklabel gpt >> "${logFile}" 2>&1
     parted -s /dev/vdb mkpart primary 2048s 100% >> "${logFile}" 2>&1
     mkfs -t xfs /dev/vdb1 >> "${logFile}" 2>&1
     sed --in-place=.bak -e 's#/dev/vdb#/dev/vdb1#; s/auto/xfs/;' /etc/fstab >> "${logFile}" 2>&1
     mount /mnt
     printf "/mnt    ${subNet}/16(rw)\n" >> /etc/exports
   else
     printf "/data    ${subNet}/16(rw)\n" >> /etc/exports
     printf "/genomes    ${subNet}/16(rw)\n" >> /etc/exports
   fi
 
   exportfs -a >> "${logFile}" 2>&1
   printf "### meta data from curl 169.254.169.254\n" >> "${logFile}" 2>&1
 
   curl http://169.254.169.254/latest/meta-data/ 2> /dev/null | while read N
 do
   V=`curl http://169.254.169.254/latest/meta-data/${N} 2> /dev/null`
   printf "%s\t%s\n" "${N}" "${V}"
 done >> "${logFile}" 2>&1
 
   export endTime=`date "+%s"`
   export et=`echo $endTime $startTime | awk '{printf "%d", $1-$2}'`
   printf "## parasol hub machine finished install %s\n" "`date '+%s %F %T'`" >> /etc/motd
   printf "## elapsed time: %d seconds\n" "${et}" >> /etc/motd
   printf "## elapsed time: %d seconds\n## %s\n" "${et}" "`date '+%s %F %T'`" >> /tmp/hub.machine.ready.signal
   printf "## this parasol hub privateIp: %s\n" "${privateIp}" >> /tmp/hub.machine.ready.signal
   printf "## this parasol hub subnet: %s\n" "${subNet}" >> /tmp/hub.machine.ready.signal
   printf "## external IP address: %s from curl icanhazip.com\n" "`curl icanhazip.com`" >> /tmp/hub.machine.ready.signal
   printf "## external IP address: %s from curl wgetip.com\n" "`curl wgetip.com`" >> /tmp/hub.machine.ready.signal
 fi
 printf "#### end start up script log %s\n" "`date '+%s %F %T'`" >> "${logFile}"