Correlation between loop and other modalities

Correlation between loop and other modalities#

import cooler
import numpy as np
import pandas as pd
from scipy.sparse import triu
from scipy.stats import pearsonr, zscore, norm
from multiprocessing import Pool
from concurrent.futures import ProcessPoolExecutor, as_completed
from ALLCools.mcds import MCDS
from ALLCools.mcds.utilities import calculate_posterior_mc_frac
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib import cm as cm
import seaborn as sns

mpl.style.use('default')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica'
leg = ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'L5_ET', 'Amy', 
       'Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7', 
       'MSN_D1', 'MSN_D2', 'Foxp2', 'SubCtx', 
       'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
      ]
legname = ['L2/3-IT', 'L4-IT', 'L5-IT', 'L6-IT', 'L6-IT-Car3', 'L5/6-NP', 'L6-CT', 'L6b', 'L5-ET', 'Amy-Exc', 
       'Lamp5', 'Lamp5-Lhx6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb-ChC', 'Sst', 'Chd7', 
       'MSN-D1', 'MSN-D2', 'Foxp2', 'SubCtx-Cplx', 
       'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
      ]
leg2name = {xx:yy for xx,yy in zip(leg, legname)}
leg = {'exc': ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'Amy'], 
       'inh': ['Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7'], 
       'msn': ['MSN_D1', 'MSN_D2', 'Foxp2'], 
       'sub': ['SubCtx'], 
       'glia': ['ASC', 'ODC', 'OPC'], 
       'mgc': ['MGC'], 
       'smc': ['PC'], 
       'endo': ['EC'], 
       'fibro': ['VLMC'],
      }
leg['neu'] = leg['exc'] + leg['inh'] + leg['msn'] + leg['sub']
leg['all'] = leg['neu'] + leg['glia'] + leg['mgc'] + leg['smc'] + leg['endo'] + leg['fibro']
group_name = 'neu'
leg = pd.Index(leg[group_name])
legname = leg.map(leg2name)
res = 10000
indir = '/home/jzhou_salk_edu/sky_workdir/hba/loop_majortype/'
outdir = f'/home/jzhou_salk_edu/sky_workdir/hba/loop_majortype/diff/{group_name}/'
chrom_size_path = f'{indir}hg38_with_chrl.main.chrom.sizes'
chrom_sizes = cooler.read_chromsizes(chrom_size_path, all_names=True)
loopall = pd.read_hdf(f'{outdir}merged_loop.hdf', key='data')
loopall
0 1 2 3 4 5 Qanova Eanova Tanova mCG_corr mCH_corr ATAC_corr
0 chr1 900000 910000 chr1 960000 970000 3.750881 6.097476 2.068213 0.031954 0.094070 0.253426
1 chr1 900000 910000 chr1 970000 980000 3.322128 6.001146 2.007495 -0.090257 0.070470 0.265731
2 chr1 910000 920000 chr1 970000 980000 3.293559 5.439024 2.229271 -0.018746 -0.044665 0.376666
3 chr1 910000 920000 chr1 980000 990000 2.704021 5.648575 2.289167 -0.105304 -0.060823 0.306052
4 chr1 910000 920000 chr1 990000 1000000 2.819877 5.675182 1.669268 -0.354732 -0.320624 0.051738
... ... ... ... ... ... ... ... ... ... ... ... ...
2873610 chr22 50570000 50580000 chr22 50670000 50680000 1.646674 11.822375 1.625390 0.009941 -0.277535 0.078601
2873611 chr22 50580000 50590000 chr22 50670000 50680000 2.256175 11.555016 1.335000 -0.049832 -0.312835 0.082439
2873612 chr22 50590000 50600000 chr22 50670000 50680000 3.531459 11.165133 1.195543 0.075519 -0.135215 0.085896
2873613 chr22 50600000 50610000 chr22 50670000 50680000 4.896728 11.926161 2.028210 0.152859 -0.214300 0.119297
2873614 chr22 50610000 50620000 chr22 50670000 50680000 5.475712 13.031472 3.153314 -0.041111 -0.198155 0.250070

2873615 rows × 12 columns

loopq = pd.read_hdf(f'{outdir}loop_Q.hdf', key='data')
loopt = pd.read_hdf(f'{outdir}loop_T.hdf', key='data')

Load Loop mC#

idx1 = loopall[0] + '_' + (loopall[1] // res).astype(str)
idx2 = loopall[3] + '_' + (loopall[4] // res).astype(str)
mcds = MCDS.open('/data/hba/mc_majortype/MajorType.mcds', var_dim='chrom5k')
mcds['chrom10k'] = mcds['chrom5k_chrom'].to_pandas().astype(str) + '_' + (mcds['chrom5k_start'] // 10000).to_pandas().astype(str)
mcds
<xarray.MCDS>
Dimensions:        (cell: 40, chrom5k: 617669, count_type: 2, mc_type: 2)
Coordinates:
  * cell           (cell) <U10 'Amy' 'ASC' 'CA1' 'CA2' ... 'THM_MB' 'Vip' 'VLMC'
  * chrom5k        (chrom5k) object 'chr1_0' 'chr1_1' ... 'chrY_11445'
    chrom5k_chrom  (chrom5k) <U5 'chr1' 'chr1' 'chr1' ... 'chrY' 'chrY' 'chrY'
    chrom5k_end    (chrom5k) int64 5000 10000 15000 ... 57225000 57227415
    chrom5k_start  (chrom5k) int64 0 5000 10000 ... 57215000 57220000 57225000
  * count_type     (count_type) <U3 'mc' 'cov'
  * mc_type        (mc_type) <U3 'CGN' 'CHN'
Data variables:
    chrom5k_da     (cell, chrom5k, mc_type, count_type) uint32 dask.array<chunksize=(5, 77209, 1, 1), meta=np.ndarray>
    chrom10k       (chrom5k) object 'chr1_0' 'chr1_0' ... 'chrY_5722'
Attributes:
    obs_dim:  cell
    var_dim:  chrom5k

mCG#

mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CGN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CGN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287509
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k chr10_0 chr10_1 chr10_10 chr10_100 chr10_1000 chr10_10000 chr10_10001 chr10_10002 chr10_10003 chr10_10004 ... chr9_9990 chr9_9991 chr9_9992 chr9_9993 chr9_9994 chr9_9995 chr9_9996 chr9_9997 chr9_9998 chr9_9999
L23_IT 1.0 0.865750 1.098113 1.104957 0.970369 0.546823 0.765560 1.030258 1.122590 1.044473 ... 0.444581 1.120092 1.100522 1.093261 1.098141 1.069337 1.101062 1.070954 1.024206 0.910354
L4_IT 1.0 0.829846 1.079987 1.100184 1.031948 0.544228 0.771079 1.003114 1.103790 1.051402 ... 0.420759 1.109077 1.098732 1.061285 1.072169 1.095959 1.089031 1.092414 1.054224 0.718477
L5_IT 1.0 0.871156 1.086426 1.090221 0.994796 0.572899 0.733758 0.978924 1.100353 1.021597 ... 0.448407 1.106363 1.069185 1.078171 1.086915 1.044753 1.098709 1.076597 1.038894 1.003211
L6_IT 1.0 0.841410 1.096542 1.104306 1.026670 0.584123 0.744700 1.045663 1.108378 1.030944 ... 0.431908 1.115438 1.104424 1.086339 1.107761 1.018649 1.108945 1.097433 1.064725 0.993141
L6_IT_Car3 1.0 0.873949 1.090812 1.096528 1.091477 0.534639 0.701619 1.057742 1.096929 1.004430 ... 0.426434 1.112644 1.107946 1.083099 1.102255 1.067188 1.098333 1.078087 1.071479 0.834742
L56_NP 1.0 0.797610 1.057855 1.061449 1.023070 0.559595 0.698354 0.937548 1.072523 1.008295 ... 0.423151 1.075689 1.046400 1.038178 1.058969 1.048859 1.047916 1.030193 1.039942 1.069534
L6_CT 1.0 0.852024 1.091768 1.099437 1.044037 0.528753 0.785238 0.993313 1.105819 1.042183 ... 0.424429 1.108913 1.100481 1.087009 1.105778 0.998863 1.079483 1.073137 1.042358 1.088393
L6b 1.0 0.878122 1.102235 1.096597 0.910365 0.614620 0.712343 1.069397 1.109824 1.016281 ... 0.457184 1.109582 1.114130 1.063698 1.099670 0.983586 1.092634 1.062037 1.012311 0.950932
Amy 1.0 0.851257 1.097555 1.107771 0.990718 0.491762 0.724681 1.016296 1.116206 1.050156 ... 0.442131 1.127280 1.097345 1.108722 1.102675 1.047166 1.113539 1.068260 1.089860 1.074183
Lamp5 1.0 0.805150 1.033116 1.088847 1.096238 0.512818 0.724184 1.062630 1.091241 1.021778 ... 0.434840 1.096492 1.069307 1.090170 1.084815 1.073408 1.077023 1.072035 1.080043 1.079055
Lamp5_LHX6 1.0 0.792747 1.035162 1.076712 1.092387 0.434159 0.709179 1.022559 1.079085 1.031669 ... 0.452515 1.097232 1.090241 1.092998 1.078567 1.052278 1.078464 1.077278 1.076348 1.073098
Sncg 1.0 0.792520 1.025522 1.074973 1.079286 0.558658 0.731335 1.056605 1.080625 1.030201 ... 0.490980 1.092729 1.081799 1.092366 1.078335 1.064443 1.070893 1.057093 1.061662 1.079037
Vip 1.0 0.769253 1.024638 1.081912 0.938605 0.542403 0.752858 1.012633 1.081400 1.020987 ... 0.470599 1.084906 1.084526 1.089036 1.077619 1.087517 1.008403 1.040687 1.080248 1.071452
Pvalb 1.0 0.795581 1.034650 1.068094 0.949514 0.477211 0.736213 0.961686 1.072778 1.008399 ... 0.416954 1.078209 1.062200 1.082316 1.070744 1.064724 1.073725 1.063671 1.069274 1.056688
Pvalb_ChC 1.0 0.804534 1.056973 1.067313 1.018693 0.504919 0.760289 0.844227 1.076801 0.974374 ... 0.411450 1.080136 1.082128 1.081539 1.077761 1.038962 1.069850 1.075585 1.059912 1.074994
Sst 1.0 0.794229 1.039973 1.052752 0.998262 0.474805 0.739483 1.029493 1.077364 1.004946 ... 0.425891 1.075167 1.052733 1.077323 1.075246 1.072911 1.072097 1.072932 1.059704 1.069832
CHD7 1.0 0.787437 1.037584 1.076422 0.952265 0.501408 0.713221 0.939310 1.084314 0.997800 ... 0.444873 1.093239 1.084173 1.093810 1.079436 1.086668 1.047163 1.077643 1.076318 1.076364
MSN_D1 1.0 0.808276 1.077796 1.078513 1.074961 0.493063 0.726965 1.061647 1.096675 0.997394 ... 0.424403 1.089656 1.098023 1.091237 1.077489 1.073559 1.086470 1.064815 1.075967 1.080365
MSN_D2 1.0 0.810564 1.083761 1.083081 1.079663 0.530576 0.782879 1.067903 1.090645 1.019102 ... 0.414780 1.099974 1.086474 1.100330 1.081124 1.062736 1.075620 1.079292 1.086640 1.086207
Foxp2 1.0 0.782796 1.075557 1.091417 1.070874 0.521455 0.734907 1.058666 1.099930 1.028032 ... 0.413546 1.106870 1.105477 1.097594 1.088425 1.075589 1.045977 1.039735 1.081181 1.087157
SubCtx 1.0 0.783712 1.050213 1.070455 1.045838 0.544439 0.765156 1.025895 1.071964 1.012209 ... 0.420346 1.082754 1.083586 1.088390 1.073200 1.054467 1.025523 1.034854 1.050823 1.065440

21 rows × 287509 columns

loopcg = (ratio[idx1].values + ratio[idx2].values).T / 2
loopcg = pd.DataFrame(loopcg, index=loopq.index, columns=leg)
loopall['mCG_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopcg.values, loopq.values)]

mCH#

mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CHN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CHN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k chr10_0 chr10_1 chr10_10 chr10_100 chr10_1000 chr10_10000 chr10_10001 chr10_10002 chr10_10003 chr10_10004 ... chr9_9990 chr9_9991 chr9_9992 chr9_9993 chr9_9994 chr9_9995 chr9_9996 chr9_9997 chr9_9998 chr9_9999
L23_IT 1.0 0.581408 1.048868 0.242306 0.609380 0.868303 1.127298 1.081475 1.683425 1.620382 ... 0.759118 0.580684 0.614478 0.674427 0.580364 0.553518 0.636553 0.463157 0.426187 0.392909
L4_IT 1.0 0.495013 0.795514 0.245440 0.963888 1.094922 1.168507 1.088782 1.546328 1.765103 ... 0.846806 0.786174 0.756176 0.789065 0.802313 0.947783 0.883354 0.705005 0.579375 0.484728
L5_IT 1.0 0.455561 1.015350 0.228945 0.840501 1.270735 1.163866 1.093905 1.705293 1.773222 ... 0.864993 0.658063 0.572586 0.707412 0.744451 0.576037 0.699209 0.611104 0.495135 0.455101
L6_IT 1.0 0.650953 1.057721 0.240116 0.898931 1.362157 1.445102 1.237765 1.786124 1.641940 ... 0.741295 0.624843 0.609334 0.696892 0.714934 0.526063 0.674852 0.523097 0.412630 0.391960
L6_IT_Car3 1.0 0.575051 1.077855 0.224761 1.325802 0.861500 1.221765 1.217034 1.706987 1.594311 ... 0.756624 0.621904 0.665758 0.670322 0.707093 0.612832 0.649896 0.507689 0.448870 0.378274
L56_NP 1.0 0.402756 0.881177 0.187907 0.880132 1.413384 1.596061 1.131252 2.004234 2.065186 ... 0.636043 0.541412 0.470409 0.574378 0.622158 0.716971 0.701500 0.593755 0.432142 0.351776
L6_CT 1.0 0.444559 0.978495 0.277735 1.018575 1.440508 1.389699 1.128223 1.888419 1.825062 ... 0.643229 0.474623 0.578430 0.663219 0.629395 0.454445 0.595501 0.479590 0.393849 0.387964
L6b 1.0 0.445212 1.214124 0.392053 0.693349 1.660831 1.387080 1.277147 1.901074 1.448106 ... 0.657112 0.503805 0.520558 0.580309 0.642889 0.478172 0.602957 0.457603 0.336922 0.341128
Amy 1.0 0.516281 1.099986 0.236326 0.984687 0.700906 1.150781 0.997126 1.717197 1.658379 ... 0.732941 0.549465 0.526880 0.701293 0.601888 0.488402 0.583821 0.471135 0.456757 0.394323
Lamp5 1.0 0.396334 0.570967 0.270190 1.144927 0.668095 1.072014 1.080740 1.521001 1.666255 ... 0.675580 0.596514 0.467924 0.610111 0.579131 0.587864 0.589750 0.568379 0.550557 0.388556
Lamp5_LHX6 1.0 0.404816 0.608607 0.253795 1.325472 0.601759 0.982635 1.042335 1.219227 1.265571 ... 0.774466 0.677173 0.498887 0.602810 0.579005 0.603801 0.663251 0.588113 0.537083 0.364999
Sncg 1.0 0.402602 0.546298 0.251427 1.250350 1.081394 1.136336 1.014171 1.537153 1.924419 ... 0.721124 0.700234 0.611148 0.784875 0.690060 0.841363 0.740360 0.664454 0.594320 0.359005
Vip 1.0 0.358805 0.494400 0.254932 1.173710 0.901838 1.023045 1.004033 1.327499 1.684392 ... 0.731962 0.694987 0.586643 0.722016 0.728619 0.858907 0.835467 0.704260 0.587966 0.404680
Pvalb 1.0 0.470842 0.609634 0.227138 1.089242 0.469059 0.983036 0.705535 1.355733 1.528034 ... 0.765015 0.775923 0.620986 0.802344 0.772071 0.796700 0.711192 0.666182 0.607206 0.459098
Pvalb_ChC 1.0 0.517545 0.846547 0.215440 1.205763 0.434411 0.905762 0.603844 1.370046 1.216711 ... 0.937215 0.902624 0.672443 0.843801 0.747351 0.867895 0.771968 0.695975 0.632434 0.469026
Sst 1.0 0.506479 0.707526 0.234483 1.298698 0.511207 1.094510 1.015818 1.331275 1.546514 ... 0.739821 0.654896 0.548909 0.687845 0.799333 0.821752 0.786264 0.700508 0.615703 0.454094
CHD7 1.0 0.632120 0.663165 0.232783 0.855401 0.630320 0.934222 0.840777 1.288995 1.273929 ... 0.666504 0.639161 0.519633 0.630447 0.649252 0.685826 0.667158 0.617631 0.522935 0.374313
MSN_D1 1.0 0.487243 1.004119 0.194402 1.251101 0.874967 1.127812 1.248663 1.757706 1.507170 ... 0.694553 0.515377 0.509170 0.671571 0.681606 0.613367 0.553275 0.483214 0.479693 0.360556
MSN_D2 1.0 0.479048 1.051841 0.254269 1.213524 0.860425 1.126030 1.199805 1.700450 1.445411 ... 0.658539 0.485480 0.486422 0.626516 0.626566 0.647211 0.549901 0.478409 0.457948 0.376326
Foxp2 1.0 0.473911 0.857691 0.279863 1.141587 0.749200 0.834646 0.999043 1.433186 1.678098 ... 0.540286 0.512814 0.551306 0.632186 0.662397 0.750259 0.582253 0.539680 0.503252 0.441115
SubCtx 1.0 0.487050 0.885348 0.259848 1.279375 0.810411 1.101160 0.991180 1.509706 1.677918 ... 0.674976 0.636079 0.524447 0.716531 0.676386 0.796614 0.708006 0.668927 0.566836 0.365590

21 rows × 287509 columns

loopch = (ratio[idx1].values + ratio[idx2].values).T / 2
loopch = pd.DataFrame(loopch, index=loopq.index, columns=leg)
loopall['mCH_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopch.values, loopq.values)]

ATAC#

sig = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_signal.hdf')
cov = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_cov.hdf')
bins = pd.DataFrame(index=sig.columns)
bins['chrom'] = bins.index.str.split('_').str[0]
bins['start'] = (bins.index.str.split('_').str[1].astype(int) - 1) * 5000
bins['chrom10k'] = bins['chrom'] + '_' + (bins['start'] // res).astype(str)
sig = sig.groupby(by=bins['chrom10k'], axis=1).sum()
cov = cov.groupby(by=bins['chrom10k']).sum()
atac = (sig/cov).fillna(0)
legatac = leg[leg.isin(atac.index)]
atac = atac.loc[legatac]
atac = atac / atac.sum(axis=1)[:, None]
loopatac = (atac[idx1].values + atac[idx2].values).T / 2
loopatac = pd.DataFrame(loopatac, index=loopq.index, columns=legatac)
loopall['ATAC_corr'] = [pearsonr(xx,yy)[0] for xx,yy in zip(loopatac[legatac].values, loopq[legatac].values)]
loopall
0 1 2 3 4 5 Qanova Eanova Tanova mCG_corr mCH_corr ATAC_corr
0 chr1 900000 910000 chr1 960000 970000 3.750881 6.097476 2.068213 0.031954 0.094070 0.253426
1 chr1 900000 910000 chr1 970000 980000 3.322128 6.001146 2.007495 -0.090257 0.070470 0.265731
2 chr1 910000 920000 chr1 970000 980000 3.293559 5.439024 2.229271 -0.018746 -0.044665 0.376666
3 chr1 910000 920000 chr1 980000 990000 2.704021 5.648575 2.289167 -0.105304 -0.060823 0.306052
4 chr1 910000 920000 chr1 990000 1000000 2.819877 5.675182 1.669268 -0.354732 -0.320624 0.051738
... ... ... ... ... ... ... ... ... ... ... ... ...
2873610 chr22 50570000 50580000 chr22 50670000 50680000 1.646674 11.822375 1.625390 0.009941 -0.277535 0.078601
2873611 chr22 50580000 50590000 chr22 50670000 50680000 2.256175 11.555016 1.335000 -0.049832 -0.312835 0.082439
2873612 chr22 50590000 50600000 chr22 50670000 50680000 3.531459 11.165133 1.195543 0.075519 -0.135215 0.085896
2873613 chr22 50600000 50610000 chr22 50670000 50680000 4.896728 11.926161 2.028210 0.152859 -0.214300 0.119297
2873614 chr22 50610000 50620000 chr22 50670000 50680000 5.475712 13.031472 3.153314 -0.041111 -0.198155 0.250070

2873615 rows × 12 columns

loopall.to_hdf(f'{outdir}merged_loop.hdf', key='data')
loopcg.to_hdf(f'{outdir}loop_mCG.hdf', key='data')
loopch.to_hdf(f'{outdir}loop_mCH.hdf', key='data')
loopatac.to_hdf(f'{outdir}loop_ATAC.hdf', key='data')