Correlation between boundary and other modalities#
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from matplotlib import cm as cm
import seaborn as sns
from matplotlib.colors import LogNorm
from ALLCools.mcds import MCDS
from ALLCools.mcds.utilities import calculate_posterior_mc_frac
mpl.style.use('default')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica'
leg = ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'L5_ET', 'Amy',
'Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7',
'MSN_D1', 'MSN_D2', 'Foxp2', 'SubCtx',
'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
]
legname = ['L2/3-IT', 'L4-IT', 'L5-IT', 'L6-IT', 'L6-IT-Car3', 'L5/6-NP', 'L6-CT', 'L6b', 'L5-ET', 'Amy-Exc',
'Lamp5', 'Lamp5-Lhx6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb-ChC', 'Sst', 'Chd7',
'MSN-D1', 'MSN-D2', 'Foxp2', 'SubCtx-Cplx',
'ASC', 'ODC', 'OPC', 'MGC', 'PC', 'EC', 'VLMC'
]
leg2name = {xx:yy for xx,yy in zip(leg, legname)}
leg = {'exc': ['L23_IT', 'L4_IT', 'L5_IT', 'L6_IT', 'L6_IT_Car3', 'L56_NP', 'L6_CT', 'L6b', 'Amy'],
'inh': ['Lamp5', 'Lamp5_LHX6', 'Sncg', 'Vip', 'Pvalb', 'Pvalb_ChC', 'Sst', 'CHD7'],
'msn': ['MSN_D1', 'MSN_D2', 'Foxp2'],
'sub': ['SubCtx'],
'glia': ['ASC', 'ODC', 'OPC'],
'mgc': ['MGC'],
'smc': ['PC'],
'endo': ['EC'],
'fibro': ['VLMC'],
}
leg['neu'] = leg['exc'] + leg['inh'] + leg['msn'] + leg['sub']
leg['all'] = leg['neu'] + leg['glia'] + leg['mgc'] + leg['smc'] + leg['endo'] + leg['fibro']
group_name = 'neu'
leg = pd.Index(leg[group_name])
legname = leg.map(leg2name)
res = 25000
indir = '/data/hba/domain_majortype/'
outdir = f'/home/jzhou_salk_edu/sky_workdir/hba/domain_majortype/diff/{group_name}/'
binall = pd.read_hdf(f'{outdir}bin_stats.hdf', key='data')
mcds = MCDS.open('/data/hba/mc_majortype/MajorType.mcds', var_dim='chrom5k')
mcds['chrom10k_even'] = mcds['chrom5k_chrom'].to_pandas().astype(str) + '_' + (mcds['chrom5k_start'] // 10000 * 2).to_pandas().astype(str)
mcds['chrom10k_odd'] = mcds['chrom5k_chrom'].to_pandas().astype(str) + '_' + ((mcds['chrom5k_start'] + 5000) // 10000 * 2 - 1).to_pandas().astype(str)
mcds
<xarray.MCDS> Dimensions: (cell: 40, chrom5k: 617669, count_type: 2, mc_type: 2) Coordinates: * cell (cell) <U10 'Amy' 'ASC' 'CA1' 'CA2' ... 'THM_MB' 'Vip' 'VLMC' * chrom5k (chrom5k) object 'chr1_0' 'chr1_1' ... 'chrY_11445' chrom5k_chrom (chrom5k) <U5 'chr1' 'chr1' 'chr1' ... 'chrY' 'chrY' 'chrY' chrom5k_end (chrom5k) int64 5000 10000 15000 ... 57225000 57227415 chrom5k_start (chrom5k) int64 0 5000 10000 ... 57215000 57220000 57225000 * count_type (count_type) <U3 'mc' 'cov' * mc_type (mc_type) <U3 'CGN' 'CHN' Data variables: chrom5k_da (cell, chrom5k, mc_type, count_type) uint32 dask.array<chunksize=(5, 77209, 1, 1), meta=np.ndarray> chrom10k_even (chrom5k) object 'chr1_0' 'chr1_0' ... 'chrY_11444' chrom10k_odd (chrom5k) object 'chr1_-1' 'chr1_1' ... 'chrY_11445' Attributes: obs_dim: cell var_dim: chrom5k
mCG#
boundcg = pd.DataFrame(index=binall.index, columns=leg)
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CGN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k_odd'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CGN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k_odd'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287523
(21, 287523) (21, 287523)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_-1 | chr10_1 | chr10_10001 | chr10_10003 | chr10_10005 | chr10_10007 | chr10_10009 | chr10_1001 | chr10_10011 | chr10_10013 | ... | chr9_9983 | chr9_9985 | chr9_9987 | chr9_9989 | chr9_999 | chr9_9991 | chr9_9993 | chr9_9995 | chr9_9997 | chr9_9999 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 0.860457 | 1.081336 | 0.925333 | 1.009707 | 1.112735 | 1.108767 | 1.073129 | 0.951386 | 0.582522 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.103275 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L4_IT | 1.0 | 0.823696 | 1.075727 | 0.914209 | 0.981365 | 1.105991 | 1.093843 | 1.056361 | 0.940196 | 0.554439 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.074466 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L5_IT | 1.0 | 0.866910 | 1.095000 | 0.987629 | 1.034343 | 1.093693 | 1.091953 | 1.070210 | 1.022007 | 0.581350 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.068948 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT | 1.0 | 0.832396 | 1.101325 | 0.912712 | 1.015612 | 1.102152 | 1.099118 | 1.065947 | 0.907962 | 0.593821 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.079558 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT_Car3 | 1.0 | 0.870886 | 1.103143 | 0.981731 | 1.015656 | 1.099449 | 1.027843 | 1.066831 | 0.961772 | 0.537093 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.054659 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L56_NP | 1.0 | 0.789294 | 1.065180 | 1.019229 | 1.034690 | 1.069006 | 1.045871 | 1.053416 | 1.031039 | 0.558070 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.073882 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_CT | 1.0 | 0.850089 | 1.095890 | 0.944332 | 1.006207 | 1.098835 | 1.085214 | 1.042651 | 1.021171 | 0.540605 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.096978 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6b | 1.0 | 0.874737 | 1.082416 | 0.972364 | 1.032669 | 1.104728 | 1.072531 | 1.067300 | 1.029268 | 0.539889 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.102754 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Amy | 1.0 | 0.849339 | 1.121909 | 0.955589 | 1.021628 | 1.111045 | 1.108338 | 1.034604 | 0.975217 | 0.498814 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.098151 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5 | 1.0 | 0.803441 | 1.099491 | 0.901131 | 0.996747 | 1.084012 | 1.084341 | 1.061091 | 1.065331 | 0.573551 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.029678 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5_LHX6 | 1.0 | 0.788889 | 1.095696 | 0.948651 | 1.009161 | 1.064117 | 1.076356 | 1.044956 | 1.053349 | 0.559807 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.043798 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sncg | 1.0 | 0.790187 | 1.092085 | 0.943028 | 1.013903 | 1.075181 | 1.074456 | 1.025663 | 1.041255 | 0.578329 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.041308 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Vip | 1.0 | 0.764841 | 1.079621 | 0.954340 | 0.988894 | 1.074535 | 1.076771 | 1.030072 | 1.030965 | 0.523459 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.077833 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb | 1.0 | 0.791781 | 1.078261 | 0.957770 | 0.996808 | 1.065400 | 1.062728 | 1.062498 | 1.035391 | 0.573805 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.066142 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb_ChC | 1.0 | 0.799670 | 1.079489 | 1.042405 | 1.039103 | 0.965079 | 1.065501 | 1.047256 | 0.947254 | 0.535809 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.072020 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sst | 1.0 | 0.790550 | 1.009991 | 0.967094 | 1.003426 | 1.075078 | 1.063624 | 1.052106 | 1.037828 | 0.567196 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.068358 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
CHD7 | 1.0 | 0.783001 | 1.091096 | 0.973865 | 0.987348 | 1.059799 | 1.079257 | 1.049891 | 1.006733 | 0.535868 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.069605 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D1 | 1.0 | 0.805578 | 1.081088 | 0.949242 | 0.987770 | 1.095029 | 1.077659 | 1.031337 | 1.028073 | 0.541634 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.070032 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D2 | 1.0 | 0.807129 | 1.078072 | 0.925217 | 0.991441 | 1.091273 | 1.082277 | 1.035319 | 1.031940 | 0.531349 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.079601 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Foxp2 | 1.0 | 0.779720 | 1.100584 | 0.911965 | 0.957330 | 1.087619 | 1.084313 | 1.057260 | 1.002953 | 0.547599 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.082638 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
SubCtx | 1.0 | 0.778561 | 1.064634 | 0.979620 | 0.998362 | 1.063654 | 1.059708 | 1.040602 | 0.996573 | 0.525402 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.063075 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
21 rows × 287523 columns
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000!=0)# & idx1.isin(ratio.columns)
boundcg.loc[selb] = (ratio.loc[:, idx1[selb]].values + ratio.loc[:, idx2[selb]].values).T / 2
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CGN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k_even'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CGN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k_even'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287509
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_0 | chr10_10 | chr10_100 | chr10_1000 | chr10_10000 | chr10_10002 | chr10_10004 | chr10_10006 | chr10_10008 | chr10_10010 | ... | chr9_9980 | chr9_9982 | chr9_9984 | chr9_9986 | chr9_9988 | chr9_9990 | chr9_9992 | chr9_9994 | chr9_9996 | chr9_9998 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 1.030605 | 0.938468 | 1.079237 | 1.087932 | 1.036463 | 0.933259 | 1.085677 | 1.113516 | 1.092081 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L4_IT | 1.0 | 1.008030 | 0.893732 | 1.061923 | 1.084499 | 1.066191 | 0.883820 | 1.083390 | 1.100948 | 1.046365 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L5_IT | 1.0 | 1.035229 | 0.968205 | 1.064959 | 1.097828 | 1.078424 | 0.974695 | 1.077973 | 1.091472 | 1.080833 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT | 1.0 | 1.043728 | 1.006064 | 1.036545 | 1.108225 | 1.035341 | 0.927944 | 1.081138 | 1.100847 | 1.058464 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT_Car3 | 1.0 | 1.031033 | 1.024065 | 0.972414 | 1.106033 | 1.069932 | 0.960716 | 1.080387 | 1.014772 | 1.059353 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L56_NP | 1.0 | 1.024043 | 1.067959 | 1.031688 | 1.077790 | 1.064895 | 1.004588 | 1.056444 | 1.054735 | 1.060728 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_CT | 1.0 | 1.025605 | 1.000952 | 1.033709 | 1.105316 | 1.079336 | 0.916573 | 1.080173 | 1.081485 | 1.098547 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6b | 1.0 | 1.051203 | 1.039012 | 1.055885 | 1.094319 | 1.048304 | 0.974152 | 1.092275 | 1.058137 | 1.089253 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Amy | 1.0 | 1.037157 | 0.883397 | 0.945863 | 1.126014 | 1.067755 | 0.954871 | 1.078414 | 1.115085 | 1.086908 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5 | 1.0 | 0.945802 | 1.048737 | 1.020667 | 1.102167 | 0.995533 | 0.926336 | 1.069959 | 1.081162 | 1.089796 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5_LHX6 | 1.0 | 0.980062 | 1.057993 | 0.982959 | 1.096893 | 1.008295 | 0.969985 | 1.048721 | 1.079863 | 1.075894 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sncg | 1.0 | 0.925705 | 1.048671 | 0.978772 | 1.098617 | 1.045065 | 0.951006 | 1.055583 | 1.074915 | 1.075155 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Vip | 1.0 | 0.962763 | 0.984013 | 0.947534 | 1.089664 | 1.054085 | 0.932628 | 1.041100 | 1.083444 | 1.070266 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb | 1.0 | 0.998853 | 1.010985 | 1.007189 | 1.083609 | 1.071767 | 0.926349 | 1.047639 | 1.067686 | 1.051676 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb_ChC | 1.0 | 1.014795 | 0.930437 | 1.039565 | 1.079252 | 1.072438 | 1.024667 | 0.993707 | 1.042559 | 0.973670 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sst | 1.0 | 0.975179 | 1.061164 | 0.932726 | 1.079956 | 1.027505 | 0.942059 | 1.056591 | 1.069491 | 1.061776 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
CHD7 | 1.0 | 0.970671 | 0.875364 | 0.964086 | 1.093977 | 1.072723 | 0.940073 | 1.037374 | 1.080247 | 1.056002 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D1 | 1.0 | 1.039511 | 1.045804 | 1.058324 | 1.081883 | 1.071348 | 0.917046 | 1.072203 | 1.087172 | 1.079323 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D2 | 1.0 | 1.044885 | 1.037520 | 1.043545 | 1.076334 | 1.076735 | 0.896778 | 1.073562 | 1.088832 | 1.081124 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Foxp2 | 1.0 | 0.991555 | 0.970998 | 1.050386 | 1.104645 | 1.080997 | 0.862161 | 1.056843 | 1.089961 | 1.080012 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
SubCtx | 1.0 | 1.004029 | 1.038828 | 0.960336 | 1.073671 | 1.061120 | 0.947283 | 1.052452 | 1.068439 | 1.040170 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
21 rows × 287509 columns
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000==0) & idx1.isin(ratio.columns)
boundcg.loc[selb] = (ratio.loc[:, idx1[selb]].values + ratio.loc[:, idx2[selb]].values).T / 2
mCH#
boundch = pd.DataFrame(index=binall.index, columns=leg)
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CHN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k_odd'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CHN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k_odd'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287523
(21, 287523) (21, 287523)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_-1 | chr10_1 | chr10_10001 | chr10_10003 | chr10_10005 | chr10_10007 | chr10_10009 | chr10_1001 | chr10_10011 | chr10_10013 | ... | chr9_9983 | chr9_9985 | chr9_9987 | chr9_9989 | chr9_999 | chr9_9991 | chr9_9993 | chr9_9995 | chr9_9997 | chr9_9999 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 0.510464 | 0.441238 | 0.856999 | 1.229206 | 1.541424 | 1.589255 | 1.146245 | 1.419087 | 0.385546 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.911612 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L4_IT | 1.0 | 0.424107 | 0.397881 | 0.681891 | 0.994451 | 1.350999 | 1.335008 | 1.133499 | 1.108866 | 0.333833 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.764278 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L5_IT | 1.0 | 0.382084 | 0.450242 | 0.937998 | 1.271952 | 1.531300 | 1.513247 | 1.199643 | 1.342807 | 0.344674 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.742818 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT | 1.0 | 0.516017 | 0.407969 | 0.842039 | 1.121085 | 1.496506 | 1.548475 | 1.164626 | 1.119029 | 0.315417 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.696133 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT_Car3 | 1.0 | 0.501950 | 0.430480 | 0.856359 | 1.082338 | 1.393629 | 1.143643 | 1.204057 | 1.161465 | 0.339972 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.426450 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L56_NP | 1.0 | 0.320368 | 0.407804 | 0.852860 | 1.271590 | 1.428042 | 1.314577 | 1.145670 | 1.175423 | 0.216557 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.719350 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_CT | 1.0 | 0.377183 | 0.380987 | 0.793518 | 1.104818 | 1.291011 | 1.302317 | 1.083256 | 1.146751 | 0.319666 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.708590 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6b | 1.0 | 0.395342 | 0.428573 | 0.888223 | 1.155397 | 1.554271 | 1.391349 | 1.151900 | 1.298691 | 0.288390 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.760155 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Amy | 1.0 | 0.395040 | 0.504294 | 0.836117 | 1.225487 | 1.352221 | 1.581872 | 0.951827 | 1.300263 | 0.360894 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.557824 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5 | 1.0 | 0.344861 | 1.379219 | 1.571598 | 1.727956 | 1.958670 | 2.215138 | 1.033501 | 1.897049 | 0.497977 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.426988 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5_LHX6 | 1.0 | 0.359777 | 0.825219 | 1.361178 | 1.353167 | 1.376417 | 1.741107 | 1.057578 | 1.594113 | 0.422394 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.363146 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sncg | 1.0 | 0.366004 | 0.774885 | 1.467858 | 1.327428 | 1.357707 | 1.649965 | 0.977765 | 1.407841 | 0.460822 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.447523 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Vip | 1.0 | 0.333840 | 0.717739 | 1.288908 | 1.187511 | 1.293638 | 1.458470 | 1.100303 | 1.258915 | 0.321260 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.472692 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb | 1.0 | 0.390031 | 0.494385 | 1.010168 | 1.122271 | 1.336544 | 1.427473 | 1.252491 | 1.124361 | 0.274971 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.431932 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb_ChC | 1.0 | 0.443974 | 0.441581 | 0.945737 | 1.075615 | 0.896663 | 1.206834 | 1.309925 | 1.030457 | 0.238352 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.433659 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sst | 1.0 | 0.397211 | 0.458702 | 0.957316 | 1.135977 | 1.442911 | 1.558147 | 1.219512 | 1.375276 | 0.348379 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.430488 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
CHD7 | 1.0 | 0.523041 | 0.463368 | 1.046665 | 1.033602 | 1.223731 | 1.368707 | 1.172668 | 1.056110 | 0.315515 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.350804 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D1 | 1.0 | 0.402915 | 0.426668 | 0.792652 | 1.103806 | 1.378452 | 1.359629 | 1.314098 | 1.248070 | 0.301343 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.638274 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D2 | 1.0 | 0.366594 | 0.363376 | 0.736952 | 1.094653 | 1.371585 | 1.365391 | 1.173767 | 1.160560 | 0.292648 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.597558 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Foxp2 | 1.0 | 0.416216 | 0.392466 | 0.756189 | 0.891100 | 1.083624 | 1.261196 | 1.087753 | 1.003878 | 0.346461 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.527072 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
SubCtx | 1.0 | 0.418725 | 0.433070 | 0.996978 | 1.124348 | 1.283384 | 1.400723 | 1.181628 | 1.103102 | 0.315080 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 0.412262 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
21 rows × 287523 columns
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000!=0)# & idx1.isin(ratio.columns)
boundch.loc[selb] = (ratio.loc[:, idx1[selb]].values + ratio.loc[:, idx2[selb]].values).T / 2
mc = mcds['chrom5k_da'].sel(count_type='mc', mc_type='CHN').to_pandas().T
mc['chrom10k'] = mcds['chrom10k_even'].to_pandas()
mc = mc.groupby('chrom10k').sum().T
cov = mcds['chrom5k_da'].sel(count_type='cov', mc_type='CHN').to_pandas().T
cov['chrom10k'] = mcds['chrom10k_even'].to_pandas()
cov = cov.groupby('chrom10k').sum().T
binfilter = ['_'.join(xx.split('_')[:-1]) for xx in mc.columns]
binfilter = [(len(xx)<6) and (xx not in ['chrM','chrX','chrY']) for xx in binfilter]
print(np.sum(binfilter))
mc = mc.loc[leg, binfilter]
cov = cov.loc[leg, binfilter]
print(mc.shape, cov.shape)
287509
(21, 287509) (21, 287509)
ratio = calculate_posterior_mc_frac(mc.values, cov.values)
ratio = pd.DataFrame(ratio, index=leg, columns=mc.columns)
ratio
chrom10k | chr10_0 | chr10_10 | chr10_100 | chr10_1000 | chr10_10000 | chr10_10002 | chr10_10004 | chr10_10006 | chr10_10008 | chr10_10010 | ... | chr9_9980 | chr9_9982 | chr9_9984 | chr9_9986 | chr9_9988 | chr9_9990 | chr9_9992 | chr9_9994 | chr9_9996 | chr9_9998 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
L23_IT | 1.0 | 1.004191 | 0.493460 | 1.104673 | 0.378159 | 0.577510 | 1.104026 | 1.432413 | 1.759355 | 1.492050 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L4_IT | 1.0 | 0.869427 | 0.593259 | 1.191464 | 0.377823 | 0.468765 | 0.875656 | 1.254744 | 1.432864 | 1.190639 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L5_IT | 1.0 | 0.901131 | 0.664278 | 1.144501 | 0.397166 | 0.651486 | 1.175162 | 1.473311 | 1.663474 | 1.392828 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT | 1.0 | 1.011073 | 0.535467 | 1.080717 | 0.416892 | 0.529509 | 1.052912 | 1.361006 | 1.674774 | 1.247187 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_IT_Car3 | 1.0 | 0.990890 | 0.659824 | 0.899510 | 0.376149 | 0.579968 | 0.991147 | 1.359062 | 1.279046 | 1.186164 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L56_NP | 1.0 | 0.890467 | 1.008560 | 1.115885 | 0.375578 | 0.576387 | 1.110340 | 1.440122 | 1.478495 | 1.246266 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6_CT | 1.0 | 0.909796 | 0.825940 | 1.015736 | 0.392367 | 0.510480 | 1.034966 | 1.274706 | 1.381530 | 1.202989 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
L6b | 1.0 | 0.965625 | 0.488996 | 1.060917 | 0.460871 | 0.616292 | 1.076651 | 1.473702 | 1.472755 | 1.357385 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Amy | 1.0 | 1.032535 | 0.450361 | 0.911437 | 0.443585 | 0.582688 | 1.088852 | 1.387712 | 1.625620 | 1.352506 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5 | 1.0 | 0.762560 | 0.543802 | 0.997103 | 1.148783 | 1.356227 | 1.704878 | 2.037967 | 2.222906 | 1.870465 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Lamp5_LHX6 | 1.0 | 0.785061 | 0.497714 | 0.911998 | 0.693598 | 1.087251 | 1.364749 | 1.501270 | 1.703855 | 1.493310 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sncg | 1.0 | 0.658161 | 0.646019 | 0.839545 | 0.711791 | 1.164374 | 1.429084 | 1.381591 | 1.653368 | 1.397234 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Vip | 1.0 | 0.719938 | 0.567544 | 0.780902 | 0.622367 | 1.032879 | 1.248919 | 1.291014 | 1.519195 | 1.297617 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb | 1.0 | 0.910356 | 0.440534 | 1.016046 | 0.521019 | 0.761039 | 1.071845 | 1.290787 | 1.508756 | 1.246113 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Pvalb_ChC | 1.0 | 0.952738 | 0.426413 | 1.269375 | 0.409346 | 0.731215 | 0.976371 | 1.081925 | 1.207088 | 0.965896 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Sst | 1.0 | 0.778337 | 0.570330 | 0.841826 | 0.462577 | 0.692936 | 1.060044 | 1.373523 | 1.617611 | 1.445655 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
CHD7 | 1.0 | 0.796231 | 0.386411 | 0.874622 | 0.459980 | 0.814569 | 1.034546 | 1.179762 | 1.449169 | 1.097513 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D1 | 1.0 | 1.071710 | 0.493949 | 1.378706 | 0.434703 | 0.542109 | 0.974037 | 1.322889 | 1.533243 | 1.281728 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
MSN_D2 | 1.0 | 1.101675 | 0.485418 | 1.271987 | 0.351280 | 0.503255 | 0.967607 | 1.286954 | 1.513391 | 1.265431 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
Foxp2 | 1.0 | 0.851022 | 0.559014 | 1.094504 | 0.414223 | 0.556357 | 0.857242 | 1.030724 | 1.313589 | 1.098945 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
SubCtx | 1.0 | 0.984603 | 0.638502 | 0.929416 | 0.436759 | 0.740556 | 1.077906 | 1.282124 | 1.497868 | 1.163109 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 |
21 rows × 287509 columns
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000==0) & idx1.isin(ratio.columns)
boundch.loc[selb] = (ratio.loc[:, idx1[selb]].values + ratio.loc[:, idx2[selb]].values).T / 2
ATAC#
sig = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_signal.hdf')
cov = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_cov.hdf')
bins = pd.DataFrame(index=sig.columns)
bins['chrom'] = bins.index.str.split('_').str[0]
bins['start'] = (bins.index.str.split('_').str[1].astype(int) - 1) * 5000
bins['chrom10k_even'] = bins['chrom'] + '_' + (bins['start'] // 10000 * 2).astype(str)
bins['chrom10k_odd'] = bins['chrom'] + '_' + ((bins['start'] + 5000) // 10000 * 2 - 1).astype(str)
sig = sig.groupby(by=bins['chrom10k_odd'], axis=1).sum()
cov = cov.groupby(by=bins['chrom10k_odd']).sum()
atac = (sig/cov).fillna(0)
legatac = leg[leg.isin(atac.index)]
atac = atac.loc[legatac].T
atac = atac / atac.sum(axis=0)
boundatac = pd.DataFrame(index=binall.index, columns=legatac)
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000!=0)# & idx1.isin(atac.index)
boundatac.loc[selb] = (atac.loc[idx1[selb]].values + atac.loc[idx2[selb]].values) / 2
sig = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_signal.hdf')
cov = pd.read_hdf('/home/jzhou_salk_edu/sky_workdir/hba/atac_majortype/cluster_atac_cov.hdf')
sig = sig.groupby(by=bins['chrom10k_even'], axis=1).sum()
cov = cov.groupby(by=bins['chrom10k_even']).sum()
atac = (sig/cov).fillna(0)
legatac = leg[leg.isin(atac.index)]
atac = atac.loc[legatac].T
atac = atac / atac.sum(axis=0)
idx1 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000 - 2).astype(str)
idx2 = binall['chrom'].astype(str) + '_' + (binall['start'] // 5000).astype(str)
selb = (binall['start']%10000==0) & idx1.isin(atac.index)
boundatac.loc[selb] = (atac.loc[idx1[selb]].values + atac.loc[idx2[selb]].values) / 2
boundcg.to_hdf(f'{outdir}bound_mCG.hdf', key='data')
boundch.to_hdf(f'{outdir}bound_mCH.hdf', key='data')
boundatac.to_hdf(f'{outdir}bound_ATAC.hdf', key='data')
binfilter = (boundcg.isna().sum(axis=1)==0).values
binall.loc[~binfilter, ['mCG_corr', 'mCH_corr', 'ATAC_corr']] = 0
binall
chrom | start | end | bklfilter | chi2filter | ins_lm | probdiff | chi2_sc | insfc | diff_sc | mCG_corr | mCH_corr | ATAC_corr | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chr1_0 | chr1 | 0 | 25000 | False | False | 0 | 0.001333 | 18.816754 | 1.020000 | 0 | 0.0 | 0.0 | 0.0 |
chr1_1 | chr1 | 25000 | 50000 | False | False | 0 | 0.000000 | 0.000000 | inf | 0 | NaN | NaN | NaN |
chr1_2 | chr1 | 50000 | 75000 | False | False | 0 | 0.000000 | 0.000000 | inf | 0 | NaN | NaN | NaN |
chr1_3 | chr1 | 75000 | 100000 | False | False | 0 | 0.000000 | 0.000000 | inf | 0 | NaN | NaN | NaN |
chr1_4 | chr1 | 100000 | 125000 | False | False | 0 | 0.000000 | 0.000000 | inf | 0 | NaN | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
chr22_2028 | chr22 | 50700000 | 50725000 | False | False | 0 | 0.028816 | 21.417634 | 0.322651 | 0 | NaN | NaN | NaN |
chr22_2029 | chr22 | 50725000 | 50750000 | False | False | 0 | 0.023924 | 21.869962 | 0.297184 | 0 | NaN | NaN | NaN |
chr22_2030 | chr22 | 50750000 | 50775000 | False | False | 0 | 0.019718 | 20.685304 | 0.263324 | 0 | NaN | NaN | NaN |
chr22_2031 | chr22 | 50775000 | 50800000 | False | False | 0 | 0.016411 | 11.303940 | 0.238471 | 0 | NaN | NaN | NaN |
chr22_2032 | chr22 | 50800000 | 50818468 | False | False | 0 | 0.001982 | 22.931718 | 0.337864 | 0 | NaN | NaN | NaN |
115009 rows × 13 columns
bound_count_ct = pd.read_hdf(f'{indir}MajorType_boundcount.hdf', key='data').loc[leg]
cell_count_ct = pd.read_csv(f'{indir}MajorType_cellcount.csv.gz', index_col=0, header=0, squeeze=True).loc[leg]
bound_prob_ct = (bound_count_ct / cell_count_ct[:,None]).T
binall.loc[binfilter, 'mCG_corr'] = [pearsonr(xx, yy)[0] for xx,yy in zip(bound_prob_ct.values[binfilter], boundcg.values[binfilter])]
binall.loc[binfilter, 'mCH_corr'] = [pearsonr(xx, yy)[0] for xx,yy in zip(bound_prob_ct.values[binfilter], boundch.values[binfilter])]
binall.loc[binfilter, 'ATAC_corr'] = [pearsonr(xx, yy)[0] for xx,yy in zip(bound_prob_ct[legatac].values[binfilter], boundatac[legatac].values[binfilter])]
binall.to_hdf(f'{outdir}bin_stats.hdf', key='data')
fig, axes = plt.subplots(1, 2, figsize=(4,2), sharey='all', dpi=300)
ax = axes[0]
sns.histplot(binall, x='mCG_corr', y='chi2_sc', bins=100, ax=ax, log_scale=(False, 10))
ax = axes[1]
sns.histplot(binall, x='mCH_corr', y='chi2_sc', bins=100, ax=ax, log_scale=(False, 10))
<AxesSubplot:xlabel='mCH_corr', ylabel='chi2_sc'>
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Helvetica
fig, axes = plt.subplots(1, 2, figsize=(4,2), sharey='all', dpi=300)
ax = axes[0]
sns.histplot(binall, y='chi2_sc', x='probdiff', bins=100, ax=ax, log_scale=(False, False))
ax = axes[1]
sns.histplot(binall, y='chi2_sc', x='insfc', bins=100, ax=ax, log_scale=(10, False))
<AxesSubplot:xlabel='insfc', ylabel='chi2_sc'>