Source code for schicluster.dev.generate_matrix

import numpy as np
from collections import Counter


[docs] def generate_matrix(infile, res, genome, dist, outdir, cell): fin = open(infile, 'r') if res >= 1000000: k = str(res / 1000000) + 'mb' elif res >= 1000: k = str(res / 1000) + 'kb' if genome[:2] == 'hg': chrom = [str(i + 1) for i in range(22)] elif genome[:2] == 'mm': chrom = [str(i + 1) for i in range(19)] count = Counter() for line in fin: tmp = line.strip().split('\t') tmp[1], tmp[5] = tmp[1][3:], tmp[5][3:] if (np.abs(int(tmp[6]) - int(tmp[2])) >= dist) and (tmp[1] == tmp[5]) and (tmp[1] in chrom): tmp[2], tmp[6] = int(tmp[2]) // res, int(tmp[6]) // res if tmp[2] > tmp[6]: tmp[2], tmp[6] = tmp[6], tmp[2] if tmp[2] != tmp[6]: pos = '-'.join([tmp[1], str(tmp[2]), str(tmp[6])]) count[pos] += 1 fin.close() fout = {c: open(outdir + 'chr' + c + '/' + cell + '_chr' + c + '.txt', 'w') for c in chrom} for key in count: tmp = key.split('-') fout[tmp[0]].write('{0}\t{1}\t{2}.0\n'.format(tmp[1], tmp[2], count[key])) for c in chrom: fout[c].close() return