Correlation between loop and other modalities#
import cooler
import numpy as np
import pandas as pd
from scipy.sparse import triu
from scipy.stats import pearsonr, zscore, norm
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor, as_completed
from ALLCools.mcds import MCDS
from ALLCools.mcds.utilities import calculate_posterior_mc_frac
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib import cm as cm
import seaborn as sns
mpl.style.use('default')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica'
leg = ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'L5_ET', 'Amy',
'Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7',
'MSN_D1', 'MSN_D2', 'Foxp2', 'SubCtx',
'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
]
legname = ['L2/3-IT', 'L4-IT', 'L5-IT', 'L6-IT', 'L6-IT-Car3', 'L5/6-NP', 'L6-CT', 'L6b', 'L5-ET', 'Amy-Exc',
'Lamp5', 'Lamp5-Lhx6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb-ChC', 'Sst', 'Chd7',
'MSN-D1', 'MSN-D2', 'Foxp2', 'SubCtx-Cplx',
'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
]
leg2name = {xx:yy for xx,yy in zip(leg, legname)}
leg = {'exc': ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'Amy'],
'inh': ['Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7'],
'msn': ['MSN_D1', 'MSN_D2', 'Foxp2'],
'sub': ['SubCtx'],
'glia': ['ASC', 'ODC', 'OPC'],
'mgc': ['MGC'],
'smc': ['PC'],
'endo': ['EC'],
'fibro': ['VLMC'],
}
leg['neu'] = leg['exc'] + leg['inh'] + leg['msn'] + leg['sub']
leg['all'] = leg['neu'] + leg['glia'] + leg['mgc'] + leg['smc'] + leg['endo'] + leg['fibro']
group_name = 'neu'
leg = pd.Index(leg[group_name])
legname = leg.map(leg2name)
res = 10000
indir = '/home/jzhou_salk_edu/sky_workdir/hba/loop_majortype/'
outdir = f'/home/jzhou_salk_edu/sky_workdir/hba/loop_majortype/diff/{group_name}/'
chrom_size_path = f'{indir}hg38_with_chrl.main.chrom.sizes'
chrom_sizes = cooler.read_chromsizes(chrom_size_path, all_names=True)
loopall = pd.read_hdf(f'{outdir}merged_loop.hdf', key='data')
loopall
0 | 1 | 2 | 3 | 4 | 5 | Qanova | Eanova | Tanova | mCG_corr | mCH_corr | ATAC_corr | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | chr1 | 900000 | 910000 | chr1 | 960000 | 970000 | 3.750881 | 6.097476 | 2.068213 | 0.031954 | 0.094070 | 0.253426 |
1 | chr1 | 900000 | 910000 | chr1 | 970000 | 980000 | 3.322128 | 6.001146 | 2.007495 | -0.090257 | 0.070470 | 0.265731 |
2 | chr1 | 910000 | 920000 | chr1 | 970000 | 980000 | 3.293559 | 5.439024 | 2.229271 | -0.018746 | -0.044665 | 0.376666 |
3 | chr1 | 910000 | 920000 | chr1 | 980000 | 990000 | 2.704021 | 5.648575 | 2.289167 | -0.105304 | -0.060823 | 0.306052 |
4 | chr1 | 910000 | 920000 | chr1 | 990000 | 1000000 | 2.819877 | 5.675182 | 1.669268 | -0.354732 | -0.320624 | 0.051738 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2873610 | chr22 | 50570000 | 50580000 | chr22 | 50670000 | 50680000 | 1.646674 | 11.822375 | 1.625390 | 0.009941 | -0.277535 | 0.078601 |
2873611 | chr22 | 50580000 | 50590000 | chr22 | 50670000 | 50680000 | 2.256175 | 11.555016 | 1.335000 | -0.049832 | -0.312835 | 0.082439 |
2873612 | chr22 | 50590000 | 50600000 | chr22 | 50670000 | 50680000 | 3.531459 | 11.165133 | 1.195543 | 0.075519 | -0.135215 | 0.085896 |
2873613 | chr22 | 50600000 | 50610000 | chr22 | 50670000 | 50680000 | 4.896728 | 11.926161 | 2.028210 | 0.152859 | -0.214300 | 0.119297 |
2873614 | chr22 | 50610000 | 50620000 | chr22 | 50670000 | 50680000 | 5.475712 | 13.031472 | 3.153314 | -0.041111 | -0.198155 | 0.250070 |
2873615 rows × 12 columns
loopq = pd.read_hdf(f'{outdir}loop_Q.hdf', key='data')
loopt = pd.read_hdf(f'{outdir}loop_T.hdf', key='data')
Load Loop mC#
idx1 = loopall[0] + '_' + (loopall[1] // res).astype(str)
idx2 = loopall[3] + '_' + (loopall[4] // res).astype(str)
mcds = MCDS.open('/data/hba/mc_majortype/MajorType.mcds', var_dim='chrom5k')
mcds['chrom10k'] = mcds['chrom5k_chrom'].to_pandas().astype(str) + '_' + (mcds['chrom5k_start'] // 10000).to_pandas().astype(str)
mcds
<xarray.MCDS> Dimensions: (cell: 40, chrom5k: 617669, count_type: 2, mc_type: 2) Coordinates: * cell (cell) <U10 'Amy' 'ASC' 'CA1' 'CA2' ... 'THM_MB' 'Vip' 'VLMC' * chrom5k (chrom5k) object 'chr1_0' 'chr1_1' ... 'chrY_11445' chrom5k_chrom (chrom5k) <U5 'chr1' 'chr1' 'chr1' ... 'chrY' 'chrY' 'chrY' chrom5k_end (chrom5k) int64 5000 10000 15000 ... 57225000 57227415 chrom5k_start (chrom5k) int64 0 5000 10000 ... 57215000 57220000 57225000 * count_type (count_type) <U3 'mc' 'cov' * mc_type (mc_type) <U3 'CGN' 'CHN' Data variables: chrom5k_da (cell, chrom5k, mc_type, count_type) uint32 dask.array<chunksize=(5, 77209, 1, 1), meta=np.ndarray> chrom10k (chrom5k) object 'chr1_0' 'chr1_0' ... 'chrY_5722' Attributes: obs_dim: cell var_dim: chrom5k
mCG#
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CGN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CGN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287509
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_0 | chr10_1 | chr10_10 | chr10_100 | chr10_1000 | chr10_10000 | chr10_10001 | chr10_10002 | chr10_10003 | chr10_10004 | ... | chr9_9990 | chr9_9991 | chr9_9992 | chr9_9993 | chr9_9994 | chr9_9995 | chr9_9996 | chr9_9997 | chr9_9998 | chr9_9999 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 0.865750 | 1.098113 | 1.104957 | 0.970369 | 0.546823 | 0.765560 | 1.030258 | 1.122590 | 1.044473 | ... | 0.444581 | 1.120092 | 1.100522 | 1.093261 | 1.098141 | 1.069337 | 1.101062 | 1.070954 | 1.024206 | 0.910354 |
L4_IT | 1.0 | 0.829846 | 1.079987 | 1.100184 | 1.031948 | 0.544228 | 0.771079 | 1.003114 | 1.103790 | 1.051402 | ... | 0.420759 | 1.109077 | 1.098732 | 1.061285 | 1.072169 | 1.095959 | 1.089031 | 1.092414 | 1.054224 | 0.718477 |
L5_IT | 1.0 | 0.871156 | 1.086426 | 1.090221 | 0.994796 | 0.572899 | 0.733758 | 0.978924 | 1.100353 | 1.021597 | ... | 0.448407 | 1.106363 | 1.069185 | 1.078171 | 1.086915 | 1.044753 | 1.098709 | 1.076597 | 1.038894 | 1.003211 |
L6_IT | 1.0 | 0.841410 | 1.096542 | 1.104306 | 1.026670 | 0.584123 | 0.744700 | 1.045663 | 1.108378 | 1.030944 | ... | 0.431908 | 1.115438 | 1.104424 | 1.086339 | 1.107761 | 1.018649 | 1.108945 | 1.097433 | 1.064725 | 0.993141 |
L6_IT_Car3 | 1.0 | 0.873949 | 1.090812 | 1.096528 | 1.091477 | 0.534639 | 0.701619 | 1.057742 | 1.096929 | 1.004430 | ... | 0.426434 | 1.112644 | 1.107946 | 1.083099 | 1.102255 | 1.067188 | 1.098333 | 1.078087 | 1.071479 | 0.834742 |
L56_NP | 1.0 | 0.797610 | 1.057855 | 1.061449 | 1.023070 | 0.559595 | 0.698354 | 0.937548 | 1.072523 | 1.008295 | ... | 0.423151 | 1.075689 | 1.046400 | 1.038178 | 1.058969 | 1.048859 | 1.047916 | 1.030193 | 1.039942 | 1.069534 |
L6_CT | 1.0 | 0.852024 | 1.091768 | 1.099437 | 1.044037 | 0.528753 | 0.785238 | 0.993313 | 1.105819 | 1.042183 | ... | 0.424429 | 1.108913 | 1.100481 | 1.087009 | 1.105778 | 0.998863 | 1.079483 | 1.073137 | 1.042358 | 1.088393 |
L6b | 1.0 | 0.878122 | 1.102235 | 1.096597 | 0.910365 | 0.614620 | 0.712343 | 1.069397 | 1.109824 | 1.016281 | ... | 0.457184 | 1.109582 | 1.114130 | 1.063698 | 1.099670 | 0.983586 | 1.092634 | 1.062037 | 1.012311 | 0.950932 |
Amy | 1.0 | 0.851257 | 1.097555 | 1.107771 | 0.990718 | 0.491762 | 0.724681 | 1.016296 | 1.116206 | 1.050156 | ... | 0.442131 | 1.127280 | 1.097345 | 1.108722 | 1.102675 | 1.047166 | 1.113539 | 1.068260 | 1.089860 | 1.074183 |
Lamp5 | 1.0 | 0.805150 | 1.033116 | 1.088847 | 1.096238 | 0.512818 | 0.724184 | 1.062630 | 1.091241 | 1.021778 | ... | 0.434840 | 1.096492 | 1.069307 | 1.090170 | 1.084815 | 1.073408 | 1.077023 | 1.072035 | 1.080043 | 1.079055 |
Lamp5_LHX6 | 1.0 | 0.792747 | 1.035162 | 1.076712 | 1.092387 | 0.434159 | 0.709179 | 1.022559 | 1.079085 | 1.031669 | ... | 0.452515 | 1.097232 | 1.090241 | 1.092998 | 1.078567 | 1.052278 | 1.078464 | 1.077278 | 1.076348 | 1.073098 |
Sncg | 1.0 | 0.792520 | 1.025522 | 1.074973 | 1.079286 | 0.558658 | 0.731335 | 1.056605 | 1.080625 | 1.030201 | ... | 0.490980 | 1.092729 | 1.081799 | 1.092366 | 1.078335 | 1.064443 | 1.070893 | 1.057093 | 1.061662 | 1.079037 |
Vip | 1.0 | 0.769253 | 1.024638 | 1.081912 | 0.938605 | 0.542403 | 0.752858 | 1.012633 | 1.081400 | 1.020987 | ... | 0.470599 | 1.084906 | 1.084526 | 1.089036 | 1.077619 | 1.087517 | 1.008403 | 1.040687 | 1.080248 | 1.071452 |
Pvalb | 1.0 | 0.795581 | 1.034650 | 1.068094 | 0.949514 | 0.477211 | 0.736213 | 0.961686 | 1.072778 | 1.008399 | ... | 0.416954 | 1.078209 | 1.062200 | 1.082316 | 1.070744 | 1.064724 | 1.073725 | 1.063671 | 1.069274 | 1.056688 |
Pvalb_ChC | 1.0 | 0.804534 | 1.056973 | 1.067313 | 1.018693 | 0.504919 | 0.760289 | 0.844227 | 1.076801 | 0.974374 | ... | 0.411450 | 1.080136 | 1.082128 | 1.081539 | 1.077761 | 1.038962 | 1.069850 | 1.075585 | 1.059912 | 1.074994 |
Sst | 1.0 | 0.794229 | 1.039973 | 1.052752 | 0.998262 | 0.474805 | 0.739483 | 1.029493 | 1.077364 | 1.004946 | ... | 0.425891 | 1.075167 | 1.052733 | 1.077323 | 1.075246 | 1.072911 | 1.072097 | 1.072932 | 1.059704 | 1.069832 |
CHD7 | 1.0 | 0.787437 | 1.037584 | 1.076422 | 0.952265 | 0.501408 | 0.713221 | 0.939310 | 1.084314 | 0.997800 | ... | 0.444873 | 1.093239 | 1.084173 | 1.093810 | 1.079436 | 1.086668 | 1.047163 | 1.077643 | 1.076318 | 1.076364 |
MSN_D1 | 1.0 | 0.808276 | 1.077796 | 1.078513 | 1.074961 | 0.493063 | 0.726965 | 1.061647 | 1.096675 | 0.997394 | ... | 0.424403 | 1.089656 | 1.098023 | 1.091237 | 1.077489 | 1.073559 | 1.086470 | 1.064815 | 1.075967 | 1.080365 |
MSN_D2 | 1.0 | 0.810564 | 1.083761 | 1.083081 | 1.079663 | 0.530576 | 0.782879 | 1.067903 | 1.090645 | 1.019102 | ... | 0.414780 | 1.099974 | 1.086474 | 1.100330 | 1.081124 | 1.062736 | 1.075620 | 1.079292 | 1.086640 | 1.086207 |
Foxp2 | 1.0 | 0.782796 | 1.075557 | 1.091417 | 1.070874 | 0.521455 | 0.734907 | 1.058666 | 1.099930 | 1.028032 | ... | 0.413546 | 1.106870 | 1.105477 | 1.097594 | 1.088425 | 1.075589 | 1.045977 | 1.039735 | 1.081181 | 1.087157 |
SubCtx | 1.0 | 0.783712 | 1.050213 | 1.070455 | 1.045838 | 0.544439 | 0.765156 | 1.025895 | 1.071964 | 1.012209 | ... | 0.420346 | 1.082754 | 1.083586 | 1.088390 | 1.073200 | 1.054467 | 1.025523 | 1.034854 | 1.050823 | 1.065440 |
21 rows × 287509 columns
loopcg = (ratio[idx1].values + ratio[idx2].values).T / 2
loopcg = pd.DataFrame(loopcg, index=loopq.index, columns=leg)
loopall['mCG_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopcg.values, loopq.values)]
mCH#
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CHN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CHN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_0 | chr10_1 | chr10_10 | chr10_100 | chr10_1000 | chr10_10000 | chr10_10001 | chr10_10002 | chr10_10003 | chr10_10004 | ... | chr9_9990 | chr9_9991 | chr9_9992 | chr9_9993 | chr9_9994 | chr9_9995 | chr9_9996 | chr9_9997 | chr9_9998 | chr9_9999 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 0.581408 | 1.048868 | 0.242306 | 0.609380 | 0.868303 | 1.127298 | 1.081475 | 1.683425 | 1.620382 | ... | 0.759118 | 0.580684 | 0.614478 | 0.674427 | 0.580364 | 0.553518 | 0.636553 | 0.463157 | 0.426187 | 0.392909 |
L4_IT | 1.0 | 0.495013 | 0.795514 | 0.245440 | 0.963888 | 1.094922 | 1.168507 | 1.088782 | 1.546328 | 1.765103 | ... | 0.846806 | 0.786174 | 0.756176 | 0.789065 | 0.802313 | 0.947783 | 0.883354 | 0.705005 | 0.579375 | 0.484728 |
L5_IT | 1.0 | 0.455561 | 1.015350 | 0.228945 | 0.840501 | 1.270735 | 1.163866 | 1.093905 | 1.705293 | 1.773222 | ... | 0.864993 | 0.658063 | 0.572586 | 0.707412 | 0.744451 | 0.576037 | 0.699209 | 0.611104 | 0.495135 | 0.455101 |
L6_IT | 1.0 | 0.650953 | 1.057721 | 0.240116 | 0.898931 | 1.362157 | 1.445102 | 1.237765 | 1.786124 | 1.641940 | ... | 0.741295 | 0.624843 | 0.609334 | 0.696892 | 0.714934 | 0.526063 | 0.674852 | 0.523097 | 0.412630 | 0.391960 |
L6_IT_Car3 | 1.0 | 0.575051 | 1.077855 | 0.224761 | 1.325802 | 0.861500 | 1.221765 | 1.217034 | 1.706987 | 1.594311 | ... | 0.756624 | 0.621904 | 0.665758 | 0.670322 | 0.707093 | 0.612832 | 0.649896 | 0.507689 | 0.448870 | 0.378274 |
L56_NP | 1.0 | 0.402756 | 0.881177 | 0.187907 | 0.880132 | 1.413384 | 1.596061 | 1.131252 | 2.004234 | 2.065186 | ... | 0.636043 | 0.541412 | 0.470409 | 0.574378 | 0.622158 | 0.716971 | 0.701500 | 0.593755 | 0.432142 | 0.351776 |
L6_CT | 1.0 | 0.444559 | 0.978495 | 0.277735 | 1.018575 | 1.440508 | 1.389699 | 1.128223 | 1.888419 | 1.825062 | ... | 0.643229 | 0.474623 | 0.578430 | 0.663219 | 0.629395 | 0.454445 | 0.595501 | 0.479590 | 0.393849 | 0.387964 |
L6b | 1.0 | 0.445212 | 1.214124 | 0.392053 | 0.693349 | 1.660831 | 1.387080 | 1.277147 | 1.901074 | 1.448106 | ... | 0.657112 | 0.503805 | 0.520558 | 0.580309 | 0.642889 | 0.478172 | 0.602957 | 0.457603 | 0.336922 | 0.341128 |
Amy | 1.0 | 0.516281 | 1.099986 | 0.236326 | 0.984687 | 0.700906 | 1.150781 | 0.997126 | 1.717197 | 1.658379 | ... | 0.732941 | 0.549465 | 0.526880 | 0.701293 | 0.601888 | 0.488402 | 0.583821 | 0.471135 | 0.456757 | 0.394323 |
Lamp5 | 1.0 | 0.396334 | 0.570967 | 0.270190 | 1.144927 | 0.668095 | 1.072014 | 1.080740 | 1.521001 | 1.666255 | ... | 0.675580 | 0.596514 | 0.467924 | 0.610111 | 0.579131 | 0.587864 | 0.589750 | 0.568379 | 0.550557 | 0.388556 |
Lamp5_LHX6 | 1.0 | 0.404816 | 0.608607 | 0.253795 | 1.325472 | 0.601759 | 0.982635 | 1.042335 | 1.219227 | 1.265571 | ... | 0.774466 | 0.677173 | 0.498887 | 0.602810 | 0.579005 | 0.603801 | 0.663251 | 0.588113 | 0.537083 | 0.364999 |
Sncg | 1.0 | 0.402602 | 0.546298 | 0.251427 | 1.250350 | 1.081394 | 1.136336 | 1.014171 | 1.537153 | 1.924419 | ... | 0.721124 | 0.700234 | 0.611148 | 0.784875 | 0.690060 | 0.841363 | 0.740360 | 0.664454 | 0.594320 | 0.359005 |
Vip | 1.0 | 0.358805 | 0.494400 | 0.254932 | 1.173710 | 0.901838 | 1.023045 | 1.004033 | 1.327499 | 1.684392 | ... | 0.731962 | 0.694987 | 0.586643 | 0.722016 | 0.728619 | 0.858907 | 0.835467 | 0.704260 | 0.587966 | 0.404680 |
Pvalb | 1.0 | 0.470842 | 0.609634 | 0.227138 | 1.089242 | 0.469059 | 0.983036 | 0.705535 | 1.355733 | 1.528034 | ... | 0.765015 | 0.775923 | 0.620986 | 0.802344 | 0.772071 | 0.796700 | 0.711192 | 0.666182 | 0.607206 | 0.459098 |
Pvalb_ChC | 1.0 | 0.517545 | 0.846547 | 0.215440 | 1.205763 | 0.434411 | 0.905762 | 0.603844 | 1.370046 | 1.216711 | ... | 0.937215 | 0.902624 | 0.672443 | 0.843801 | 0.747351 | 0.867895 | 0.771968 | 0.695975 | 0.632434 | 0.469026 |
Sst | 1.0 | 0.506479 | 0.707526 | 0.234483 | 1.298698 | 0.511207 | 1.094510 | 1.015818 | 1.331275 | 1.546514 | ... | 0.739821 | 0.654896 | 0.548909 | 0.687845 | 0.799333 | 0.821752 | 0.786264 | 0.700508 | 0.615703 | 0.454094 |
CHD7 | 1.0 | 0.632120 | 0.663165 | 0.232783 | 0.855401 | 0.630320 | 0.934222 | 0.840777 | 1.288995 | 1.273929 | ... | 0.666504 | 0.639161 | 0.519633 | 0.630447 | 0.649252 | 0.685826 | 0.667158 | 0.617631 | 0.522935 | 0.374313 |
MSN_D1 | 1.0 | 0.487243 | 1.004119 | 0.194402 | 1.251101 | 0.874967 | 1.127812 | 1.248663 | 1.757706 | 1.507170 | ... | 0.694553 | 0.515377 | 0.509170 | 0.671571 | 0.681606 | 0.613367 | 0.553275 | 0.483214 | 0.479693 | 0.360556 |
MSN_D2 | 1.0 | 0.479048 | 1.051841 | 0.254269 | 1.213524 | 0.860425 | 1.126030 | 1.199805 | 1.700450 | 1.445411 | ... | 0.658539 | 0.485480 | 0.486422 | 0.626516 | 0.626566 | 0.647211 | 0.549901 | 0.478409 | 0.457948 | 0.376326 |
Foxp2 | 1.0 | 0.473911 | 0.857691 | 0.279863 | 1.141587 | 0.749200 | 0.834646 | 0.999043 | 1.433186 | 1.678098 | ... | 0.540286 | 0.512814 | 0.551306 | 0.632186 | 0.662397 | 0.750259 | 0.582253 | 0.539680 | 0.503252 | 0.441115 |
SubCtx | 1.0 | 0.487050 | 0.885348 | 0.259848 | 1.279375 | 0.810411 | 1.101160 | 0.991180 | 1.509706 | 1.677918 | ... | 0.674976 | 0.636079 | 0.524447 | 0.716531 | 0.676386 | 0.796614 | 0.708006 | 0.668927 | 0.566836 | 0.365590 |
21 rows × 287509 columns
loopch = (ratio[idx1].values + ratio[idx2].values).T / 2
loopch = pd.DataFrame(loopch, index=loopq.index, columns=leg)
loopall['mCH_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopch.values, loopq.values)]
ATAC#
sig = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_signal.hdf')
cov = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_cov.hdf')
bins = pd.DataFrame(index=sig.columns)
bins['chrom'] = bins.index.str.split('_').str[0]
bins['start'] = (bins.index.str.split('_').str[1].astype(int) - 1) * 5000
bins['chrom10k'] = bins['chrom'] + '_' + (bins['start'] // res).astype(str)
sig = sig.groupby(by=bins['chrom10k'], axis=1).sum()
cov = cov.groupby(by=bins['chrom10k']).sum()
atac = (sig/cov).fillna(0)
legatac = leg[leg.isin(atac.index)]
atac = atac.loc[legatac]
atac = atac / atac.sum(axis=1)[:, None]
loopatac = (atac[idx1].values + atac[idx2].values).T / 2
loopatac = pd.DataFrame(loopatac, index=loopq.index, columns=legatac)
loopall['ATAC_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopatac[legatac].values, loopq[legatac].values)]
loopall
0 | 1 | 2 | 3 | 4 | 5 | Qanova | Eanova | Tanova | mCG_corr | mCH_corr | ATAC_corr | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | chr1 | 900000 | 910000 | chr1 | 960000 | 970000 | 3.750881 | 6.097476 | 2.068213 | 0.031954 | 0.094070 | 0.253426 |
1 | chr1 | 900000 | 910000 | chr1 | 970000 | 980000 | 3.322128 | 6.001146 | 2.007495 | -0.090257 | 0.070470 | 0.265731 |
2 | chr1 | 910000 | 920000 | chr1 | 970000 | 980000 | 3.293559 | 5.439024 | 2.229271 | -0.018746 | -0.044665 | 0.376666 |
3 | chr1 | 910000 | 920000 | chr1 | 980000 | 990000 | 2.704021 | 5.648575 | 2.289167 | -0.105304 | -0.060823 | 0.306052 |
4 | chr1 | 910000 | 920000 | chr1 | 990000 | 1000000 | 2.819877 | 5.675182 | 1.669268 | -0.354732 | -0.320624 | 0.051738 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2873610 | chr22 | 50570000 | 50580000 | chr22 | 50670000 | 50680000 | 1.646674 | 11.822375 | 1.625390 | 0.009941 | -0.277535 | 0.078601 |
2873611 | chr22 | 50580000 | 50590000 | chr22 | 50670000 | 50680000 | 2.256175 | 11.555016 | 1.335000 | -0.049832 | -0.312835 | 0.082439 |
2873612 | chr22 | 50590000 | 50600000 | chr22 | 50670000 | 50680000 | 3.531459 | 11.165133 | 1.195543 | 0.075519 | -0.135215 | 0.085896 |
2873613 | chr22 | 50600000 | 50610000 | chr22 | 50670000 | 50680000 | 4.896728 | 11.926161 | 2.028210 | 0.152859 | -0.214300 | 0.119297 |
2873614 | chr22 | 50610000 | 50620000 | chr22 | 50670000 | 50680000 | 5.475712 | 13.031472 | 3.153314 | -0.041111 | -0.198155 | 0.250070 |
2873615 rows × 12 columns
loopall.to_hdf(f'{outdir}merged_loop.hdf', key='data')
loopcg.to_hdf(f'{outdir}loop_mCG.hdf', key='data')
loopch.to_hdf(f'{outdir}loop_mCH.hdf', key='data')
loopatac.to_hdf(f'{outdir}loop_ATAC.hdf', key='data')