The following 121 words could not be found in the dictionary of 615 words (including 615 LocalSpellingWords) and are highlighted below:

and   append   argv   base   bases   by   Chr   chr   chrlist   chrnames   chrom   chromosome   Chromosomes   Chrs   chrsize   close   comment   continue   converts   count   creating   ctotal   declare   dict   dictionary   didn   Eland   elandfilename   else   empty   endswith   exit   export   Export   fa   fbegin   feedback   fhout   file   File   filter   filtercode   filtering   fname   Fnames   for   format   Fps   from   give   if   import   in   inputfile   int   intuple   keys   L1   len   line   lines   many   mm   more   mybase   mycount   myset   Name   new   newline   next   nlines   open   os   out   outfile   output   pass   program   python   randint   random   range   reads   remove   results   rpartition   rstrip   script   search   seek   sequence   size   skip   Solexa   some   sort   sorted   spit   split   start   startswith   stderr   Step   str   strand   sys   tab   tell   than   that   the   to   too   usage   variable   which   while   wig   with   write  

Clear message

Solexa Eland Export format to WIG

Here is a python script which converts from Eland export output to a wig file:

#!/n/site/inst/Linux-i686/sys/bin/python
# 
#
import os, sys, random

# if no args print usage message and quit
if len(sys.argv) < 3:
    print 'usage: program chrlist elandfilename'
    sys.exit(0)


# create output file name
inputfile = sys.argv[2].rpartition('/')[2]

if inputfile.endswith('.txt'):
    intuple = inputfile.rpartition('.')
    outfile = intuple[0] + ".wig"
else:
    outfile = inputfile + ".wig"

# get chromosome ids, UCSC names, sizes
f=open(sys.argv[1], 'r')

Chrs = []
chrnames = {}
chrsize = {}
for line in f:
    # skip comment lines
    if(line.startswith("#")):
        continue
    # remove newline, split by tab
    (chr,size,fa) = line.rstrip('\n').split('\t')
    Chrs.append(fa)
    chrnames[fa] = chr
    chrsize[fa] = size
f.close()

print "Chromosomes to search for:"
for c in Chrs[:]:
    print c + " "
    print chrnames[c]

# print len(Chrs)

# create filenames
# and open a bunch of files
ChrFnames = []
Fps = {}
i = 0
while i < len(Chrs):
    newFileName = Chrs[i] + "." + str(random.randint(1,1000)) + ".txt"
    Fps[Chrs[i]] = open(newFileName,'w')
    ChrFnames.append(newFileName)
    i = i + 1

# open input eland file
f=open(sys.argv[2], 'r')

# remember the begining position of file
fbegin = f.tell()

for c in Chrs[:]:
    f.seek(fbegin)
    # declare empty dictionary
    bases = {}
    print >>sys.stderr, "chromosome: " + c
    nlines = 0
    for line in f:
        nlines += 1
        L1 = line.split('\t')
        filtercode = L1[21].rstrip('\n')
        # skip reads that didn't pass filtering
        if filtercode == "N":
            continue
        # 10 is chr, 12 is base, 13 is strand
        if(L1[10] == c):
            # filter out U reads with more than 2 mm
            myset = ["A", "C", "G", "T", "N"]
            ctotal = 0
            for s in myset:
                mycount = L1[14].count(s)
                ctotal += mycount
            if(ctotal > 2):
                # too many mm, skip to next sequence
                continue
            start = int(L1[12])
            end = start + 36
            if(end > int(chrsize[c])):
                end = int(chrsize[c])
            for i in range(start, end):
                if(i in bases):
                    bases[i] += 1
                else:
                    bases[i] = 1
        # give some feedback
        if(not nlines % 10000):
            print >>sys.stderr, nlines

    # spit out results
    print >>Fps[c], "variableStep chrom=" + chrnames[c]

    # sort the dict and print it to file
    for mybase in sorted(bases.keys()):
        print >>Fps[c], "%s\t%d" % (mybase, bases[mybase])
    # close file
    Fps[c].close()
f.close()

# create an output file to concatenate all the results
# might want to test for presence of outfile, and if exists modify with random number
# newFileName = "output" + str(random.randint(1,1000)) + ".wig"

print >>sys.stderr, "creating output file: " + outfile

fhout = open(outfile, 'w')

# open each temp chr file
# write results to output file
# remove temp file
for fname in ChrFnames[:]:
    f = open(fname,'r')
    for line in f:
        fhout.write(line)
    f.close()
    os.remove(fname)

fhout.close()

Solexa/ElandExport2WIG (last edited 2009-07-31 22:35:49 by ChrisSeidel)