be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/utils/chromToUcsc/chromToUcsc src/utils/chromToUcsc/chromToUcsc index 3760c0f..683b3f0 100755 --- src/utils/chromToUcsc/chromToUcsc +++ src/utils/chromToUcsc/chromToUcsc @@ -5,30 +5,33 @@ try: from urllib.request import urlopen # py2 except ImportError: from urllib2 import urlopen # py3 try: from cStringIO import StringIO # py2 except ImportError: from io import BytesIO # py3 # ==== functions ===== def parseArgs(): " setup logging, parse command line arguments and options. -h shows auto-generated help page " parser = optparse.OptionParser("""usage: %prog [options] filename - change NCBI or Ensembl chromosome names to UCSC names in tabular or wiggle files, using a chromAlias table. + Supports at least these file formats: BED, PSL, genePred, VCF, wiggle (bedgraph, span and fixed and mixes thereof), SAM, chain, interact, + pgSNP, GTF/GFF. + Requires a <genome>.chromAlias.tsv file which can be downloaded like this: %prog --get hg19 # download the file hg19.chromAlias.tsv into current directory If you do not want to use the --get option to retrieve the mapping tables, you can also download the alias mapping files yourself, e.g. for mm10 with 'wget https://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromAlias.txt.gz' Then the script can be run like this: %prog -i in.bed -o out.bed -a hg19.chromAlias.tsv %prog -i in.bed -o out.bed -a https://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromAlias.txt.gz Or in pipes, like this: cat test.bed | %prog -a mm10.chromAlias.tsv > test.ucsc.bed """) parser.add_option("", "--get", dest="downloadDb", action="store", help="download a chrom alias table from UCSC for the genomeDb into the current directory and exit") @@ -152,27 +155,29 @@ args, options = parseArgs() aliasFname = options.aliasFname inFname = options.inFname outFname = options.outFname if options.downloadDb: download(options.downloadDb) if aliasFname is None: logging.error("You need to provide an alias table with the -a option or use --get to download one.") exit(1) if inFname is None: ifh = stdin + elif inFname.endswith(".gz"): + ifh = gzip.open(inFname) else: ifh = open(inFname) if outFname is None: ofh = stdout else: ofh = open(outFname, "w") fieldIdx = options.fieldNo-1 chromToUcsc(aliasFname, fieldIdx, ifh, ofh) main()