a3f24a9e80f42401d17724d6b88e6a9df1c24f24 max Tue Feb 2 06:39:57 2021 -0800 changes after code review, refs #26903 diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc index 6e17808..4c4bb8d 100755 --- src/utils/chromToUcsc/chromToUcsc +++ src/utils/chromToUcsc/chromToUcsc @@ -15,30 +15,33 @@ # ==== functions ===== def parseArgs(): " setup logging, parse command line arguments and options. -h shows auto-generated help page " parser = optparse.OptionParser("""usage: %prog [options] filename - change NCBI or Ensembl chromosome names to UCSC names in tabular or wiggle files, using a chromAlias table. Supports these UCSC file formats: BED, genePred, PSL, wiggle (all formats), bedGraph, VCF, SAM ... or any other csv or tsv format where the sequence (chromosome) name is a separate field. Requires a <genome>.chromAlias.tsv file which can be downloaded like this: %prog --get hg19 # download the file hg19.chromAlias.tsv into current directory If you do not want to use the --get option to retrieve the mapping tables, you can also download the alias mapping files yourself, e.g. for mm10 with 'wget https://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromAlias.txt.gz' + For NCBI RefSeq assemblies that are available as UCSC track hubs, the chromAlias files can be obtained like this: + 'wget https://hgdownload.soe.ucsc.edu/hubs/GCF/000/001/405/GCF_000001405.39/GCF_000001405.39.chromAlias.txt' + Then the script can be run like this: %prog -i in.bed -o out.bed -a hg19.chromAlias.tsv %prog -i in.bed -o out.bed -a https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromAlias.txt.gz Or in pipes, like this: cat test.bed | %prog -a mm10.chromAlias.tsv > test.ucsc.bed For BAM files use in a pipe with samtools: samtools view -h in.bam | ./chromToUcsc -a mm10.chromAlias.tsv | samtools -bS > out.bam """) parser.add_option("", "--get", dest="downloadDb", action="store", help="download a chrom alias table from UCSC for the genomeDb into the current directory and exit") parser.add_option("-a", "--chromAlias", dest="aliasFname", action="store", help="a UCSC chromAlias file in tab-sep format or the http/https URL to one") parser.add_option("-i", "--in", dest="inFname", action="store", help="input filename, default: /dev/stdin") parser.add_option("-o", "--out", dest="outFname", action="store", help="output filename, default: /dev/stdout") parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")