Plot example

Plot example#

import cooler
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.stats import pearsonr

mpl.style.use('default')
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Helvetica'
indir = '/data/hba/compartment_majortype/'
outdir = '/home/jzhou_salk_edu/sky_workdir/hba/compartment_majortype/'
binall = np.load(f'{outdir}binfilter_raw.npy', allow_pickle=True)
res = 100000
chrom_size_path = f'/home/jzhou_salk_edu/sky_workdir/hba/ref/hg38.main.chrom.sizes'
chrom_sizes = cooler.read_chromsizes(chrom_size_path, all_names=True)
chrom_sizes = chrom_sizes.iloc[:23]
cpg = pd.read_csv('/home/jzhou_salk_edu/sky_workdir/hba/ref/hg38.100kbin.CpG.txt', header=0, index_col=3, sep='\t')
cpg['CpG_density'] = cpg['14_user_patt_count'] / (cpg['13_seq_len'] - cpg['11_num_N'])
chrom = 'chr5'
ll, rr = 800, 1200
n_bins = (int(chrom_sizes.loc[chrom]) // res + 1)
binfilter = binall[4]
Qall = []
for ct in ['L5_IT', 'Sst', 'ASC', 'MGC']:
    cool = cooler.Cooler(f'{indir}{ct}/{ct}.Q.cool')
    Q = cool.matrix(balance=False, sparse=True).fetch(chrom).toarray()
    Q = Q - np.diag(np.diag(Q))
    rowsum = Q.sum(axis=0)
    # thres = np.percentile(rowsum[rowsum>0], 50) * 2 - np.percentile(rowsum[rowsum>0], 99)
    # binfilter = (rowsum>thres)
    # binall.append(binfilter)
    Q = Q[binfilter][:, binfilter]
    Qall.append(Q)
    print(ct)
L5_IT
Sst
ASC
MGC
fig, axes = plt.subplots(1, 4, figsize=(12,3))
for i in range(4):
    ax = axes[i]
    tmp = np.zeros((n_bins, n_bins))
    tmp[np.ix_(binfilter, binfilter)] = Qall[i]
    ax.imshow(tmp, cmap='bwr', vmin=-0.0015, vmax=0.0015)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_Q_decay.pdf', transparent=True)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Helvetica
../../_images/da25200b24cf6fd158667221567b9c3fb805a571ea60328ef6113998bd07eff6.png
fig, axes = plt.subplots(1, 4, figsize=(12,3))
for i in range(4):
    ax = axes[i]
    tmp = np.zeros((n_bins, n_bins))
    tmp[np.ix_(binfilter, binfilter)] = Qall[i]
    ax.imshow(tmp, cmap='afmhot_r', vmin=0, vmax=0.0008)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_Q_decay.pdf', transparent=True)
../../_images/9bdb18d116b02267a75c14c25c9b4c865644ee0e691577e3f128cb5ee07564e1.png
comp = pd.read_hdf(f'{indir}comp_impute_mergerawpca.hdf', key='data')

Call = []
pcall = []
for k,ct in enumerate(['L5_IT', 'Sst', 'ASC', 'MGC']):
    Q = Qall[k].copy()
    decay = np.array([np.mean(np.diag(Q, i)) for i in range(Q.shape[0])])
    E = np.zeros(Q.shape)
    row, col = np.diag_indices(E.shape[0])
    E[row, col] = 1
    for i in range(1, E.shape[0]):
        E[row[:-i], col[i:]] = (Q[row[:-i], col[i:]] + 1e-5) / (decay[i] + 1e-5)
    E = E + E.T
    C = np.corrcoef(np.log2(E + 0.001))
    Call.append(C)
    tmp = comp.loc[(comp.index.str.split('-').str[0]==chrom), ct]
    tmp.index = [int(xx.split('-')[1]) for xx in tmp.index]
    pcall.append(tmp[np.where(binfilter)[0]].values)
    print(ct)
L5_IT
Sst
ASC
MGC
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col', sharey='row')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins))
    tmp[np.ix_(binfilter, binfilter)] = Call[i]
    ax.imshow(tmp, cmap='bwr', vmin=-0.2, vmax=1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = pcall[i]# / np.std(pcall[i])
    x, y = np.arange(n_bins), tmp
    # x, y = np.arange(pcall[i].shape[0]), pcall[i]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([-15, 15])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_compimpute.pdf', transparent=True)
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
findfont: Generic family 'sans-serif' not found because none of the following families were found: Helvetica
../../_images/bd4beba8bbf20079cea8e75f6cd9c67320cb67712cb694aa6160843a30e8761e.png
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col', sharey='row')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins))
    tmp[np.ix_(binfilter, binfilter)] = Call[i]    
    ax.imshow(tmp[ll:rr,ll:rr], cmap='bwr', vmin=0.2, vmax=0.8)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, left=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = pcall[i]
    x, y = np.arange(rr-ll), tmp[ll:rr]
    # x, y = np.arange(rr-ll), pcall[i][ll:rr]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_compimpute_zoomin.pdf', transparent=True)
../../_images/f8ff592fba84f0a4580bb3a6267992841b29eb3c253348d80f6ac300fdc61c9d.png
pca = PCA(n_components=2)
Gall = []
Gpc = []
for i,ct in enumerate(['L5_IT', 'Sst', 'ASC', 'MGC']):
    cool = cooler.Cooler(f'{indir}{ct}/{ct}.CGcorr.cool')
    Q = cool.matrix(balance=False, sparse=True).fetch(chrom).toarray()
    Q = Q - np.diag(np.diag(Q))
    Q = Q[binfilter][:, binfilter]
    Gall.append(Q)
    pc = pca.fit_transform(Q)
    cpgtmp = cpg.loc[cpg['#1_usercol']==chrom, 'CpG_density'].values[binfilter]
    if pearsonr(cpgtmp, pc[:,0])[0]>0:
        pc = pc[:,0]
    else:
        pc = -pc[:,0]
    Gpc.append(pc)
    print(ct)
L5_IT
Sst
ASC
MGC
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins)) + (np.percentile(Gall[i], 5) + np.percentile(Gall[i], 95)) / 2
    tmp[np.ix_(binfilter, binfilter)] = Gall[i]
    ax.imshow(tmp, cmap='bwr', vmin=np.percentile(Gall[i], 30), vmax=np.percentile(Gall[i], 95))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, left=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = Gpc[i]
    x, y = np.arange(n_bins), tmp
    # x, y = np.arange(Gpc[i].shape[0]), Gpc[i]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_CGcorr_impute.pdf', transparent=True)
../../_images/21f38d887eb16c49fa3ce43815f98c523544077fb456c06b2012e621d7d4aeca.png
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins)) + 0.06
    tmp[np.ix_(binfilter, binfilter)] = Gall[i]    
    ax.imshow(tmp[ll:rr, ll:rr], cmap='bwr', vmin=0, vmax=0.12)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, left=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = Gpc[i]
    x, y = np.arange(rr-ll), tmp[ll:rr]
    # x, y = np.arange(Gpc[i][ll:rr].shape[0]), Gpc[i][ll:rr]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_CGcorr_impute_zoomin.pdf', transparent=True)
../../_images/c5392f2cd0a6ea28d457853bbf59335971eb9b48c75575bc13575257107f2656.png
Hall = []
Hpc = []
for i,ct in enumerate(['L5_IT', 'Sst', 'ASC', 'MGC']):
    cool = cooler.Cooler(f'{indir}{ct}/{ct}.CHcorr.cool')
    Q = cool.matrix(balance=False, sparse=True).fetch(chrom).toarray()
    Q = Q - np.diag(np.diag(Q))
    Q = Q[binfilter][:, binfilter]
    Hall.append(Q)
    pc = pca.fit_transform(Q)
    cpgtmp = cpg.loc[cpg['#1_usercol']==chrom, 'CpG_density'].values[binfilter]
    if pearsonr(cpgtmp, pc[:,0])[0]>0:
        pc = pc[:,0]
    else:
        pc = -pc[:,0]
    Hpc.append(pc)
    print(ct)
L5_IT
Sst
ASC
MGC
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins)) + 0.05
    tmp[np.ix_(binfilter, binfilter)] = Hall[i]
    ax.imshow(tmp, cmap='bwr', vmin=-0.1, vmax=0.2)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, left=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = Hpc[i]
    x, y = np.arange(n_bins), tmp
    # x, y = np.arange(Hpc[i].shape[0]), Hpc[i]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_comp.pdf', transparent=True)
../../_images/c65c2d048ea727d26f7004a24cd566e7ee161e04949727a1c19db9bfc9504202.png
fig, axes = plt.subplots(2, 4, figsize=(12,4), gridspec_kw={'height_ratios':[4,0.5]}, sharex='col')
for i in range(4):
    ax = axes[0,i]
    tmp = np.zeros((n_bins, n_bins)) + 0.05
    tmp[np.ix_(binfilter, binfilter)] = Hall[i]    
    ax.imshow(tmp[ll:rr, ll:rr], cmap='bwr', vmin=-0.1, vmax=0.2)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(['L5_IT', 'Sst', 'ASC', 'MGC'][i], fontsize=15)

    ax = axes[1,i]
    # ax.set_title('PC1', fontsize=10)
    sns.despine(bottom=True, left=True, ax=ax)
    tmp = np.zeros(n_bins)
    tmp[binfilter] = Hpc[i]
    x, y = np.arange(rr-ll), tmp[ll:rr]
    # x, y = np.arange(Gpc[i][ll:rr].shape[0]), Gpc[i][ll:rr]
    ax.fill_between(x, y, 0, where=y >= 0, facecolor='C3', interpolate=True)
    ax.fill_between(x, y, 0, where=y <= 0, facecolor='C0', interpolate=True)
    ax.set_yticks([])
    ax.set_ylim([np.percentile(y, 1), np.percentile(y, 99)])

plt.tight_layout()
# plt.savefig(f'{indir}/plot/celltype_CGcorr_impute_zoomin.pdf', transparent=True)
../../_images/5736bdd3f314609cf05b64ae765840ae20f52e8507037f6997c2baec5cd7348e.png