{ "cells": [ { "cell_type": "markdown", "id": "075c71f7-bb14-4a49-b993-d8146a12ba46", "metadata": {}, "source": [ "# DipC clustering at 100kb resolution" ] }, { "cell_type": "code", "execution_count": 1, "id": "a202dbbe-f24a-44d4-91cb-50cfc64ee4e9", "metadata": { "execution": { "iopub.execute_input": "2023-06-26T17:20:04.583003Z", "iopub.status.busy": "2023-06-26T17:20:04.582607Z", "iopub.status.idle": "2023-06-26T17:20:19.491607Z", "shell.execute_reply": "2023-06-26T17:20:19.490889Z", "shell.execute_reply.started": "2023-06-26T17:20:04.582969Z" }, "tags": [] }, "outputs": [], "source": [ "import time\n", "import numpy as np\n", "import pandas as pd\n", "from glob import glob\n", "import anndata\n", "import scanpy as sc\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import LogNorm\n", "from matplotlib import cm as cm\n", "import seaborn as sns\n", "from scipy.sparse import csr_matrix\n", "from ALLCools.plot import *\n", "from ALLCools.clustering import *\n", "from sklearn.decomposition import TruncatedSVD\n", "from sklearn.preprocessing import normalize\n", "\n", "mpl.style.use('default')\n", "mpl.rcParams['pdf.fonttype'] = 42\n", "mpl.rcParams['ps.fonttype'] = 42\n", "mpl.rcParams['font.family'] = 'sans-serif'\n", "mpl.rcParams['font.sans-serif'] = 'Helvetica'\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "c15f1725-3a7d-4ac7-912b-f4c7e139ec44", "metadata": { "execution": { "iopub.execute_input": "2023-06-26T17:20:19.498477Z", "iopub.status.busy": "2023-06-26T17:20:19.498313Z", "iopub.status.idle": "2023-06-26T17:20:19.501468Z", "shell.execute_reply": "2023-06-26T17:20:19.500907Z", "shell.execute_reply.started": "2023-06-26T17:20:19.498461Z" }, "tags": [] }, "outputs": [], "source": [ "indir = '/data/test_schicluster/Tan2021/scool/dataset/'" ] }, { "cell_type": "code", "execution_count": 3, "id": "fe9fbfb5-9b8a-4810-891f-eaf7baf7db20", "metadata": { "execution": { "iopub.execute_input": "2023-06-26T17:20:19.502968Z", "iopub.status.busy": "2023-06-26T17:20:19.502787Z", "iopub.status.idle": "2023-06-26T17:20:19.813653Z", "shell.execute_reply": "2023-06-26T17:20:19.813127Z", "shell.execute_reply.started": "2023-06-26T17:20:19.502949Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0\n", "chr1 195471971\n", "chr2 182113224\n", "chr3 160039680\n", "chr4 156508116\n", "chr5 151834684\n", "chr6 149736546\n", "chr7 145441459\n", "chr8 129401213\n", "chr9 124595110\n", "chr10 130694993\n", "chr11 122082543\n", "chr12 120129022\n", "chr13 120421639\n", "chr14 124902244\n", "chr15 104043685\n", "chr16 98207768\n", "chr17 94987271\n", "chr18 90702639\n", "chr19 61431566\n", "Name: 1, dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chrom_sizes = pd.read_csv('/data/ref/mm10/genome/mm10.autosome.chrom.sizes', index_col=0, header=None, sep='\\t')[1]\n", "chrom_sizes" ] }, { "cell_type": "code", "execution_count": 4, "id": "a38d08c2-29f1-4607-adb8-7b66845601cc", "metadata": { "execution": { "iopub.execute_input": "2023-06-26T17:20:19.814579Z", "iopub.status.busy": "2023-06-26T17:20:19.814391Z", "iopub.status.idle": "2023-06-26T17:20:20.117090Z", "shell.execute_reply": "2023-06-26T17:20:20.116554Z", "shell.execute_reply.started": "2023-06-26T17:20:19.814562Z" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | 1 | \n", "
---|---|
0 | \n", "\n", " |
cortex-p028-cb_116 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-visual-control-p007-b6_182 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-p028-cb_112 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-visual-control-p001-b6_061 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-p056-cb_216 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
... | \n", "... | \n", "
cortex-visual-control-p021-b6_090 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-visual-control-p021-b6_012 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
hippocampus-p007-cb_046 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-visual-dark-p014-b6_106 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
cortex-visual-control-p021-b6_174 | \n", "/anvil/scratch/x-zhou/Tan2021/scool/impute/100... | \n", "
3646 rows × 1 columns
\n", "\n", " | tissue | \n", "treatment | \n", "age | \n", "sex | \n", "father | \n", "mother | \n", "restriction enzyme | \n", "cell-type cluster | \n", "reads | \n", "read length (bp) | \n", "raw throughput (Gb) | \n", "raw contacts | \n", "raw intra (%) | \n", "dup rate (%) | \n", "contacts | \n", "intra (%) | \n", "phased legs (%) | \n", "raw contacts per read (%) | \n", "20kb RMS RMSD | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
cell | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
cortex-p001-cb_001 | \n", "cortex | \n", "control | \n", "P1 | \n", "female | \n", "C57BL/6J | \n", "CAST/EiJ | \n", "MboI | \n", "Neonatal Astrocyte | \n", "13,675,687 | \n", "150 | \n", "4.1 | \n", "1,703,101 | \n", "70.8 | \n", "70.4 | \n", "503,696 | \n", "68.1 | \n", "47.0 | \n", "12.5 | \n", "0.50 | \n", "
cortex-p001-cb_002 | \n", "cortex | \n", "control | \n", "P1 | \n", "female | \n", "C57BL/6J | \n", "CAST/EiJ | \n", "MboI | \n", "Neonatal Neuron 1 | \n", "10,057,130 | \n", "150 | \n", "3.0 | \n", "1,202,070 | \n", "76.5 | \n", "68.6 | \n", "377,718 | \n", "73.6 | \n", "47.0 | \n", "12.0 | \n", "10.07 | \n", "
cortex-p001-cb_003 | \n", "cortex | \n", "control | \n", "P1 | \n", "female | \n", "C57BL/6J | \n", "CAST/EiJ | \n", "MboI | \n", "Neonatal Neuron 2 | \n", "12,673,668 | \n", "150 | \n", "3.8 | \n", "1,163,722 | \n", "84.2 | \n", "68.6 | \n", "365,317 | \n", "80.2 | \n", "46.5 | \n", "9.2 | \n", "11.35 | \n", "
cortex-p001-cb_004 | \n", "cortex | \n", "control | \n", "P1 | \n", "female | \n", "C57BL/6J | \n", "CAST/EiJ | \n", "MboI | \n", "Unknown | \n", "12,936,354 | \n", "150 | \n", "3.9 | \n", "1,289,148 | \n", "84.6 | \n", "69.2 | \n", "396,869 | \n", "81.0 | \n", "46.5 | \n", "10.0 | \n", "14.27 | \n", "
cortex-p001-cb_005 | \n", "cortex | \n", "control | \n", "P1 | \n", "female | \n", "C57BL/6J | \n", "CAST/EiJ | \n", "MboI | \n", "Neonatal Neuron 2 | \n", "14,086,574 | \n", "150 | \n", "4.2 | \n", "1,510,084 | \n", "76.9 | \n", "71.2 | \n", "434,395 | \n", "73.4 | \n", "46.9 | \n", "10.7 | \n", "0.97 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
cortex-visual-dark-p028-b6_188 | \n", "visual cortex | \n", "dark rearing | \n", "P28 | \n", "male | \n", "C57BL/6N | \n", "C57BL/6N | \n", "NlaIII | \n", "Neuron | \n", "5,796,965 | \n", "150 | \n", "1.7 | \n", "603,448 | \n", "92.2 | \n", "57.7 | \n", "255,561 | \n", "90.4 | \n", "0.0 | \n", "10.4 | \n", "NaN | \n", "
cortex-visual-dark-p028-b6_189 | \n", "visual cortex | \n", "dark rearing | \n", "P28 | \n", "male | \n", "C57BL/6N | \n", "C57BL/6N | \n", "NlaIII | \n", "Neuron | \n", "6,520,182 | \n", "150 | \n", "2.0 | \n", "736,632 | \n", "90.6 | \n", "59.8 | \n", "296,289 | \n", "88.7 | \n", "0.0 | \n", "11.3 | \n", "NaN | \n", "
cortex-visual-dark-p028-b6_190 | \n", "visual cortex | \n", "dark rearing | \n", "P28 | \n", "male | \n", "C57BL/6N | \n", "C57BL/6N | \n", "NlaIII | \n", "Neuron | \n", "10,724,783 | \n", "150 | \n", "3.2 | \n", "1,296,262 | \n", "90.6 | \n", "70.6 | \n", "381,381 | \n", "87.9 | \n", "0.0 | \n", "12.1 | \n", "NaN | \n", "
cortex-visual-dark-p028-b6_191 | \n", "visual cortex | \n", "dark rearing | \n", "P28 | \n", "male | \n", "C57BL/6N | \n", "C57BL/6N | \n", "NlaIII | \n", "Unknown | \n", "6,720,196 | \n", "150 | \n", "2.0 | \n", "473,515 | \n", "85.4 | \n", "58.8 | \n", "195,243 | \n", "83.0 | \n", "0.0 | \n", "7.0 | \n", "NaN | \n", "
cortex-visual-dark-p028-b6_192 | \n", "visual cortex | \n", "dark rearing | \n", "P28 | \n", "male | \n", "C57BL/6N | \n", "C57BL/6N | \n", "NlaIII | \n", "Neuron | \n", "6,859,779 | \n", "150 | \n", "2.1 | \n", "909,985 | \n", "91.5 | \n", "62.6 | \n", "340,432 | \n", "89.8 | \n", "0.0 | \n", "13.3 | \n", "NaN | \n", "
4272 rows × 19 columns
\n", "