Spatial gradients in Xenium breast cancer tumor

import sys
import os
from collections import defaultdict
import pandas as pd
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
from glmpca import glmpca
from itertools import combinations
import torch

import sys
from importlib import reload

import gaston
from gaston import neural_net,cluster_plotting, dp_related, segmented_fit, restrict_spots, model_selection
from gaston import binning_and_plotting, isodepth_scaling, run_slurm_scripts, parse_adata
from gaston import spatial_gene_classification, plot_cell_types, filter_genes, process_NN_output

import seaborn as sns
import math

Step 1: pre-processing

Download data from here: https://cf.10xgenomics.com/samples/xenium/1.0.1/Xenium_FFPE_Human_Breast_Cancer_Rep1/Xenium_FFPE_Human_Breast_Cancer_Rep1_outs.zip

!mkdir -p xenium_brca_tutorial_outputs
# CHANGE TO: location of Xenium data folder
folder='/n/fs/ragr-data/datasets/xenium/Xenium_FFPE_Human_Breast_Cancer_Rep1_outs/outs/'
counts_mat,coords_mat,gene_labels,cell_types=parse_adata.get_gaston_input_xenium(folder, filter_zero_cells=True)
/n/fs/ragr-data/users/uchitra/miniconda3/envs/gaston-package/lib/python3.11/site-packages/anndata/_core/anndata.py:1113: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(df_full[k]):

Option 2: use PCs of analytic Pearson residuals

num_dims=20
clip=0.01 # have to clip values to be very small!

A = parse_adata.get_top_pearson_residuals(num_dims,counts_mat,coords_mat,clip=clip)

np.save('xenium_tumor_data/analytic_pearson.npy', A)
/n/fs/ragr-data/users/uchitra/miniconda3/envs/gaston-package/lib/python3.11/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
/n/fs/ragr-data/users/uchitra/miniconda3/envs/gaston-package/lib/python3.11/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)
/n/fs/ragr-data/users/uchitra/miniconda3/envs/gaston-package/lib/python3.11/site-packages/anndata/_core/anndata.py:1113: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(df_full[k]):
/n/fs/ragr-data/users/uchitra/miniconda3/envs/gaston-package/lib/python3.11/site-packages/anndata/_core/anndata.py:1113: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(df_full[k]):
# visualize top GLM-PCs
R=5
C=4
fig,axs=plt.subplots(R,C,figsize=(5*C,5*R))
for r in range(R):
    for c in range(C):
        i=r*C+c
        axs[r,c].scatter(-1*coords_mat[:,0], -1*coords_mat[:,1], c=A[:,i],cmap='Reds',s=0.5)
        axs[r,c].set_title(f'PC{i}')
../../_images/92bc60053abb1d2d7ac414ff5730b1ae66cc9214da231525b5278c604391b2a7.png

Step 1.5 (dataset-specific): extract atypical ductal hyperplasia (ADH) crop of tissue

We will extract a crop of the tissue corresponding to the ADH region of the tissue. This is custom code for this specific dataset.

# choose x and y limits
xmax=3250
xmin=1500

ymin=2000
ymax=4000

# visualize cropped region
plt.scatter(-1*coords_mat[:,0], coords_mat[:,1], c=A[:,1],cmap='Reds',s=1)

plt.axvline(-1*xmax,ls='--',color='black')
plt.axvline(-1*xmin,ls='--',color='black')

plt.axhline(ymax,ls='--',color='black')
plt.axhline(ymin,ls='--',color='black')
<matplotlib.lines.Line2D at 0x7f7ba88abf50>
../../_images/d885fec23042e3528f97fe4a0acd10fcf6220ef45bc4a2fd3e104df952223002.png
# save cropped region

pts_bounded=np.where( (coords_mat[:,0] > xmin) & (coords_mat[:,0] < xmax) & 
                     (coords_mat[:,1] > ymin) & (coords_mat[:,1] < ymax) )[0]

counts_mat_bounded=counts_mat[pts_bounded,:]
coords_mat_bounded=coords_mat[pts_bounded,:]
pcs_bounded=A[pts_bounded,:]
cell_types_bounded=cell_types[pts_bounded]

np.save('xenium_tumor_data/coords_mat_bounded.npy', coords_mat_bounded)
np.save('xenium_tumor_data/glmpca_bounded.npy', pcs_bounded)
np.save('xenium_tumor_data/cell_labels_bounded.npy', cell_types_bounded)
np.save('xenium_tumor_data/counts_mat_bounded.npy', counts_mat[pts_bounded,:])
np.save('xenium_tumor_data/gene_labels.npy', gene_labels)

Step 2: Train GASTON network

We include how to train the neural network with two options: (1) a command line script and (2) in a notebook. We typically train the neural network 30 different times, each with a different seed, and we use the NN with lowest loss.

Option 1: Slurm

For option (1), the code below creates 30 different Slurm jobs, one for each initialization. To train the NN for a single initialization run the following command:

gaston -i /path/to/coords.npy -o /path/to/glmpca.npy -d /path/to/output_dir -e 10000 -c 500 -p 20 20 -x 20 20 -z adam -s SEED

for a given SEED value (integer)

NOTE: please wait for models to finish training before running below code. You can check on their status by running squeue -u uchitra (replacing uchitra with your username)

# Load data generated above
path_to_coords='xenium_tumor_data/coords_mat_bounded.npy'
path_to_glmpca='xenium_tumor_data/glmpca_bounded.npy'

# NN parameters
epochs = 100000 # number of epochs to train NN, lower for shorter runtime (eg use 20000 if you want to run in 5-10 min)
time="0-01:00:00" # limit slurm jobs to 1hr

isodepth_arch=[20,20] # architecture (two hidden layers of size 20) for isodepth neural network d(x,y)
expression_arch=[20,20] # architecture (two hidden layers of size 20) for 1-D expression function
checkpoint = 500 # save model after number of epochs = multiple of checkpoint
optimizer = "adam"
num_restarts=30 # number of initializations

output_dir='xenium_brca_tutorial_outputs' # folder to save model runs
conda_environment='gaston-package'
path_to_conda_folder='/n/fs/ragr-data/users/uchitra/miniconda3/bin/activate'

run_slurm_scripts.train_NN_parallel(path_to_coords, path_to_glmpca, isodepth_arch, expression_arch, 
                      output_dir, conda_environment, path_to_conda_folder,
                      epochs=epochs, checkpoint=checkpoint, 
                      num_seeds=num_restarts,time=time)
jobId: 20575781
jobId: 20575782
jobId: 20575783
jobId: 20575784
jobId: 20575785
jobId: 20575786
jobId: 20575787
jobId: 20575788
jobId: 20575789
jobId: 20575790
jobId: 20575791
jobId: 20575792
jobId: 20575793
jobId: 20575794
jobId: 20575795
jobId: 20575796
jobId: 20575797
jobId: 20575798
jobId: 20575799
jobId: 20575800
jobId: 20575801
jobId: 20575802
jobId: 20575803
jobId: 20575804
jobId: 20575805
jobId: 20575806
jobId: 20575807
jobId: 20575808
jobId: 20575809
jobId: 20575810

Option 2: train in notebook

path_to_coords='xenium_tumor_data/coords_mat_bounded.npy'
path_to_glmpca='xenium_tumor_data/glmpca_bounded.npy'

A=np.load(path_to_glmpca)
S=np.load(path_to_coords)

# z-score normalize S and A
S_torch, A_torch = neural_net.load_rescale_input_data(S,A)
######################################
# NEURAL NET PARAMETERS (USER CAN CHANGE)
# architectures are encoded as list, eg [20,20] means two hidden layers of size 20 hidden neurons
isodepth_arch=[20,20] # architecture for isodepth neural network d(x,y) : R^2 -> R 
expression_fn_arch=[20,20] # architecture for 1-D expression function h(w) : R -> R^G

num_epochs = 200000 # number of epochs to train NN (NOTE: it is sometimes beneficial to train longer)
checkpoint = 500 # save model after number of epochs = multiple of checkpoint
out_dir='xenium_brca_tutorial_outputs' # folder to save model runs
optimizer = "adam"
num_restarts=30
device='cuda' # change to 'cpu' if you don't have a GPU

######################################

seed_list=range(num_restarts)
for seed in seed_list:
    print(f'training neural network for seed {seed}')
    out_dir_seed=f"{out_dir}/rep{seed}"
    os.makedirs(out_dir_seed, exist_ok=True)
    mod, loss_list = neural_net.train(S_torch, A_torch,
                          S_hidden_list=isodepth_arch, A_hidden_list=expression_fn_arch, 
                          epochs=num_epochs, checkpoint=checkpoint, device=device,
                          save_dir=out_dir_seed, optim=optimizer, seed=seed, save_final=True)

Step 3: process GASTON output

We note that plots may differ from those in the main text due to non-determinism of PyTorch training.

gaston_model, A, S= process_NN_output.process_files('xenium_brca_tutorial_outputs') # model trained above


# May need to re-load data
counts_mat=np.load('xenium_tumor_data/counts_mat_bounded.npy',allow_pickle=True)
coords_mat=np.load('xenium_tumor_data/coords_mat_bounded.npy',allow_pickle=True)
gene_labels=np.load('xenium_tumor_data/gene_labels.npy',allow_pickle=True)
cell_labels_bounded=np.load('xenium_tumor_data/cell_labels_bounded.npy',allow_pickle=True)
best model: xenium_brca_tutorial_outputs/seed11

Model selection for choosing number of domains

model_selection.plot_ll_curve(gaston_model, A, S, max_domain_num=10, start_from=2)
Kneedle number of domains: 4
../../_images/8bda86e4f1da486593b64f3f2ba6491cb478f377e66af103d468a41868cd8616.png
num_layers=4 # CHANGE FOR YOUR APPLICATION (use number of layers from above)
gaston_isodepth, gaston_labels=dp_related.get_isodepth_labels(gaston_model,A,S,num_layers)

# DATASET-SPECIFIC: so domains are ordered outer to inner
gaston_isodepth=np.max(gaston_isodepth) - gaston_isodepth
gaston_labels=(num_layers-1)-gaston_labels
# WITH VISUALIZATION
gaston_isodepth,scaling_factors=isodepth_scaling.adjust_isodepth(gaston_isodepth, gaston_labels, coords_mat, num_domains=num_layers,
                                 q_vals=[0.1,0.1,0.1,0.05], visualize=True, figsize=(12,12),num_rows=2,
                                                return_scaling_factors=True)
../../_images/a0a95ed5299f7d0c75eb7f62dfe070fdd02aba8fbbf1d6927751b02e81d4efb1.png
show_streamlines=True

linear_transform=np.array([[-1,0],[0,1]]) # reflect points across x axis
cluster_plotting.plot_isodepth(gaston_isodepth, S, gaston_model, 
                               figsize=(7,6), streamlines=show_streamlines, s=1,
                              scaling_factors=scaling_factors,
                              gaston_labels_for_scaling=gaston_labels,
                              streamlines_lw=2, contour_levels=10, 
                               cbar_fs=20, contour_fs=15,
                               linear_transform=linear_transform)
../../_images/f2a9754f69a1b882d75ab0949211653eafb12fb9cc639648ccc83e21f8e542a0.png
layer_colors=['darksalmon', 'deepskyblue', 'limegreen', 'red']
labels=['Invasive tumor domain', 'Stromal domain', 'Immune domain', 'DCIS domain']

cluster_plotting.plot_clusters(gaston_labels, S, figsize=(5.5,6), colors=layer_colors, 
                               s=8, lgd=True, labels=labels,
                              bbox_to_anchor=(1.05,1),linear_transform=linear_transform)
../../_images/222b1aecfd7acbeb0907fee1fe2a38a24ffb43c8b19b1afb9de1cefd45564e3c.png

Cell types vs isodepth

cell_type_df=process_NN_output.create_cell_type_df(cell_labels_bounded)
cell_type_df
B_Cells CD4+_T_Cells CD8+_T_Cells DCIS_1 DCIS_2 Endothelial IRF7+_DCs Invasive_Tumor LAMP3+_DCs Macrophages_1 Macrophages_2 Mast_Cells Myoepi_ACTA2+ Myoepi_KRT15+ Perivascular-Like Prolif_Invasive_Tumor Stromal Stromal_&_T_Cell_Hybrid T_Cell_&_Tumor_Hybrid Unlabeled
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16616 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
16617 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
16618 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
16619 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
16620 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

16621 rows × 20 columns

ct_colors={'CD8+_T_Cells': 'gold',
           'Invasive_Tumor': 'darkorange',
           'B_Cells': 'forestgreen',
           'Myoepi_ACTA2+': 'darkturquoise',
           'DCIS_2': 'orangered',
           'CD4+_T_Cells': 'darkviolet',
           'Endothelial': 'palegreen',
           'Stromal': 'dodgerblue'}

plot_cell_types.plot_ct_props(cell_type_df, gaston_labels, gaston_isodepth, 
                              num_bins_per_domain=[7,10,7,7], ct_colors=None,
                              include_lgd=True, figsize=(15,7), ticksize=30, width1=8, width2=2, 
                              domain_ct_threshold=0.7)
plt.xlabel('Isodepth (µm)', fontsize=40)
plt.ylabel('Cell type proportion', fontsize=40)
(7, 31) ['CD8+_T_Cells', 'CD4+_T_Cells', 'Invasive_Tumor', 'DCIS_2', 'Myoepi_ACTA2+', 'Stromal', 'B_Cells']
Text(0, 0.5, 'Cell type proportion')
../../_images/e003bd7bbf8970e4a1259a4087e2492a1e9fe58883c1a158627502a5be6838ef.png

Spatially varying gene analysis

# Cell types for which to compute cell type-specific expression functions.
# ct_list=[] # USE THIS EITHER: (1) for speed if you don't care about cell type specific effects or (2) you don't have cell type dict
ct_list=plot_cell_types.domain_cts_svg(cell_type_df, gaston_labels, gaston_isodepth, domain_ct_threshold=0.7)

print(f'Cell types to compute expression functions for: {ct_list}')

# if you want to get rid of warnings
import warnings
warnings.filterwarnings("ignore")

# Piecewise linear fit parameters
t=0.1 # set slope=0 if LLR p-value > 0.1
umi_threshold=500 # only compute fit for genes with total UMI > 500
zero_fit_threshold=0 # xenium is not sparse so can compute for all cell types

pw_fit_dict=segmented_fit.pw_linear_fit(counts_mat, gaston_labels, gaston_isodepth, 
                                          cell_type_df, ct_list, zero_fit_threshold=zero_fit_threshold,
                                        t=t,umi_threshold=umi_threshold,
                                       isodepth_mult_factor=0.1) # isodepth_mult_factor for stability, if range of isodepth values is too large
Cell types to compute expression functions for: ['B_Cells' 'CD4+_T_Cells' 'CD8+_T_Cells' 'DCIS_2' 'Invasive_Tumor'
 'Myoepi_ACTA2+' 'Stromal']
Poisson regression for ALL cell types
100%|██████████| 280/280 [00:09<00:00, 28.34it/s]
Poisson regression for cell type: B_Cells
100%|██████████| 280/280 [00:05<00:00, 54.34it/s]
Poisson regression for cell type: CD4+_T_Cells
100%|██████████| 280/280 [00:03<00:00, 72.85it/s]
Poisson regression for cell type: CD8+_T_Cells
100%|██████████| 280/280 [00:06<00:00, 41.25it/s]
Poisson regression for cell type: DCIS_2
100%|██████████| 280/280 [00:02<00:00, 133.76it/s]
Poisson regression for cell type: Invasive_Tumor
100%|██████████| 280/280 [00:04<00:00, 58.01it/s]
Poisson regression for cell type: Myoepi_ACTA2+
100%|██████████| 280/280 [00:02<00:00, 131.37it/s]
Poisson regression for cell type: Stromal
100%|██████████| 280/280 [00:08<00:00, 34.14it/s]
binning_output=binning_and_plotting.bin_data(counts_mat.T, gaston_labels, gaston_isodepth, 
                         cell_type_df, gene_labels, num_bins_per_domain=[7,10,7,7], umi_threshold=umi_threshold)
domain_cts=plot_cell_types.get_domain_cts(binning_output, 0.7)
discont_genes_layer=spatial_gene_classification.get_discont_genes(pw_fit_dict, binning_output)

cont_genes_layer=spatial_gene_classification.get_cont_genes(pw_fit_dict, binning_output, q=0.9)

cont_genes_ct=spatial_gene_classification.get_cont_genes(pw_fit_dict, binning_output, ct_attributable=True, domain_cts=domain_cts,q=0.9)
cont_genes_ct
{'ABCC11': [(1, 'Stromal')],
 'ADH1B': [(2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (2, 'B_Cells')],
 'AQP1': [(0, 'Other')],
 'AR': [(1, 'Stromal')],
 'AVPR1A': [(1, 'Stromal')],
 'BANK1': [(0, 'Other'), (2, 'Stromal'), (2, 'CD4+_T_Cells')],
 'BTNL9': [(0, 'Invasive_Tumor')],
 'C15orf48': [(2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (2, 'B_Cells')],
 'C5orf46': [(2, 'Stromal')],
 'CCDC80': [(3, 'Other')],
 'CCL5': [(3, 'Other')],
 'CCL8': [(0, 'Other')],
 'CD14': [(2, 'Stromal'), (2, 'CD8+_T_Cells')],
 'CD19': [(2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (3, 'Other')],
 'CD1C': [(3, 'Myoepi_ACTA2+'), (3, 'DCIS_2')],
 'CD247': [(3, 'Stromal')],
 'CD27': [(3, 'DCIS_2')],
 'CD3D': [(3, 'Myoepi_ACTA2+'), (3, 'Stromal')],
 'CD3E': [(3, 'Stromal')],
 'CD3G': [(3, 'Other')],
 'CD69': [(3, 'Stromal')],
 'CD79A': [(0, 'Other'),
  (2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (3, 'Stromal')],
 'CD79B': [(0, 'Other'), (2, 'Stromal')],
 'CD8A': [(3, 'Stromal')],
 'CD9': [(1, 'Stromal')],
 'CD93': [(0, 'Other')],
 'CENPF': [(1, 'Stromal')],
 'CLEC14A': [(0, 'Other'), (1, 'Other')],
 'CPA3': [(0, 'Other')],
 'CRISPLD2': [(2, 'CD8+_T_Cells')],
 'CTSG': [(0, 'Invasive_Tumor')],
 'CXCL12': [(3, 'Stromal')],
 'DERL3': [(0, 'Other')],
 'DPT': [(3, 'Myoepi_ACTA2+'), (3, 'Stromal')],
 'EDN1': [(0, 'Other')],
 'EDNRB': [(3, 'Stromal')],
 'ELF3': [(1, 'Stromal')],
 'ENAH': [(2, 'Stromal'), (2, 'CD8+_T_Cells')],
 'EPCAM': [(1, 'Stromal')],
 'ESM1': [(1, 'Stromal')],
 'FASN': [(1, 'Stromal')],
 'FBLN1': [(3, 'Other')],
 'FOXA1': [(1, 'Stromal')],
 'FSTL3': [(2, 'CD8+_T_Cells'), (2, 'B_Cells')],
 'GATA3': [(1, 'Stromal')],
 'GJB2': [(2, 'Stromal'), (2, 'CD8+_T_Cells'), (2, 'B_Cells')],
 'GNLY': [(0, 'Other')],
 'HOXD9': [(0, 'Other')],
 'IGF1': [(3, 'Stromal')],
 'IL2RA': [(2, 'Stromal'), (2, 'CD8+_T_Cells'), (2, 'B_Cells')],
 'IL2RG': [(3, 'Stromal')],
 'IL3RA': [(0, 'Other'), (2, 'Stromal'), (2, 'CD8+_T_Cells'), (2, 'B_Cells')],
 'IL7R': [(3, 'Myoepi_ACTA2+')],
 'KLF5': [(1, 'Stromal')],
 'KRT6B': [(2, 'Stromal'), (2, 'CD4+_T_Cells')],
 'KRT7': [(1, 'Stromal')],
 'KRT8': [(1, 'Stromal')],
 'LILRA4': [(2, 'Stromal'), (2, 'CD4+_T_Cells'), (2, 'CD8+_T_Cells')],
 'LRRC15': [(0, 'Invasive_Tumor'),
  (2, 'Stromal'),
  (2, 'CD8+_T_Cells'),
  (3, 'Myoepi_ACTA2+')],
 'LTB': [(3, 'Stromal')],
 'LUM': [(3, 'Myoepi_ACTA2+')],
 'MLPH': [(1, 'Stromal')],
 'MMP1': [(2, 'Stromal')],
 'MMP2': [(3, 'Other')],
 'MMRN2': [(0, 'Other')],
 'MS4A1': [(0, 'Other'),
  (2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (3, 'Stromal')],
 'MYH11': [(0, 'Other'), (1, 'Stromal')],
 'MYO5B': [(1, 'Stromal')],
 'MZB1': [(2, 'Stromal'), (2, 'CD4+_T_Cells')],
 'NOSTRIN': [(0, 'Other')],
 'PCLAF': [(1, 'Stromal')],
 'PCOLCE': [(2, 'Stromal')],
 'PECAM1': [(0, 'Invasive_Tumor')],
 'POSTN': [(2, 'CD8+_T_Cells')],
 'PRDM1': [(0, 'Other')],
 'PTGDS': [(1, 'Stromal'), (1, 'Invasive_Tumor'), (3, 'Stromal')],
 'PTPRC': [(3, 'Other')],
 'RAMP2': [(0, 'Other')],
 'SCD': [(1, 'Stromal')],
 'SELL': [(2, 'Stromal'), (2, 'CD8+_T_Cells')],
 'SERHL2': [(1, 'Stromal')],
 'SFRP1': [(1, 'Stromal'), (1, 'Invasive_Tumor')],
 'SFRP4': [(3, 'Myoepi_ACTA2+'), (3, 'Stromal')],
 'SLAMF7': [(0, 'Other')],
 'SOX17': [(1, 'Other')],
 'SPIB': [(2, 'CD4+_T_Cells'), (2, 'CD8+_T_Cells')],
 'STC1': [(1, 'Other')],
 'TAC1': [(0, 'Invasive_Tumor')],
 'TCL1A': [(2, 'Stromal'),
  (2, 'CD4+_T_Cells'),
  (2, 'CD8+_T_Cells'),
  (2, 'B_Cells')],
 'TENT5C': [(2, 'Stromal'), (2, 'CD8+_T_Cells')],
 'TFAP2A': [(1, 'Stromal')],
 'TIMP4': [(0, 'Other')],
 'TNFRSF17': [(0, 'Other'),
  (1, 'Stromal'),
  (2, 'Stromal'),
  (2, 'CD8+_T_Cells'),
  (3, 'Stromal')],
 'TOP2A': [(1, 'Stromal')],
 'TPD52': [(2, 'Stromal')],
 'VWF': [(0, 'Other')]}

Plot two examples of genes with continuous gradients in layer 2, from above dictionary: SELL and TCL1A

gene_name='SELL'
print(f'gene {gene_name}, continuous gradients {cont_genes_ct[gene_name]}')

binning_and_plotting.plot_gene_pwlinear(gene_name, pw_fit_dict, gaston_labels, gaston_isodepth, 
                                        binning_output, cell_type_list=None, pt_size=50, colors=None, 
                                        linear_fit=True, ticksize=15, figsize=(4,2.5), offset=10**6, lw=3,
                                       domain_boundary_plotting=True)
# plt.ylim((1.5,8.5))

# show cell type-specific functions in domain 1 (granule)
domain=2
cts=domain_cts[domain]
#remove stromal
cts=[x for x in cts if x != 'Stromal']

binning_and_plotting.plot_gene_pwlinear(gene_name, pw_fit_dict, gaston_labels, gaston_isodepth, binning_output,
                                        cell_type_list=cts,ct_colors=None, spot_threshold=0.05, pt_size=60, 
                                        colors=None, linear_fit=True, domain_list=[domain],ticksize=15, 
                                        figsize=(1.5,2.5), offset=10**6, lw=3, alpha=0.8,show_lgd=True)
cts_list=' '.join(map(str, cts))
plt.title(f'{cts_list} only')
gene SELL, continuous gradients [(2, 'Stromal'), (2, 'CD8+_T_Cells')]
Text(0.5, 1.0, 'CD4+_T_Cells CD8+_T_Cells B_Cells only')
../../_images/b382a47a7923c34c99228efe857ea2c06525a27a3c06ad535fd19296b3866356.png ../../_images/668505a0fdd63e540e09e5374cc28ffb255348e11879f1a4935ffe68d26e9264.png
gene_name='TCL1A'
print(f'gene {gene_name}, continuous gradients {cont_genes_ct[gene_name]}')

binning_and_plotting.plot_gene_pwlinear(gene_name, pw_fit_dict, gaston_labels, gaston_isodepth, 
                                        binning_output, cell_type_list=None, pt_size=50, colors=None, 
                                        linear_fit=True, ticksize=15, figsize=(4,2.5), offset=10**6, lw=3,
                                       domain_boundary_plotting=True)
# plt.ylim((1.5,8.5))

# show cell type-specific functions in domain 1 (granule)
domain=2
cts=domain_cts[domain]
#remove stromal
cts=[x for x in cts if x != 'Stromal']

binning_and_plotting.plot_gene_pwlinear(gene_name, pw_fit_dict, gaston_labels, gaston_isodepth, binning_output,
                                        cell_type_list=cts,ct_colors=None, spot_threshold=0.05, pt_size=60, 
                                        colors=None, linear_fit=True, domain_list=[domain],ticksize=15, 
                                        figsize=(1.5,2.5), offset=10**6, lw=3, alpha=0.8,show_lgd=True)
cts_list=' '.join(map(str, cts))
plt.title(f'{cts_list} only')
gene TCL1A, continuous gradients [(2, 'Stromal'), (2, 'CD4+_T_Cells'), (2, 'CD8+_T_Cells'), (2, 'B_Cells')]
Text(0.5, 1.0, 'CD4+_T_Cells CD8+_T_Cells B_Cells only')
../../_images/4d92e98de418c5da948f97ff6c7d112c143dd12a2314bf2b0483d81c1cbf23bf.png ../../_images/2941b0453e933e5edae6e449b0f4919e55d2a11e141247a62ffe81db41c25a11.png