Simulation¶

We'll explore three key scenarios: Independent, Common, and Divergent responses.

Setup¶

First, let's import the necessary libraries and C-score functions:

In [ ]:

Copied!





import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

# Import C-score functionality
try:
    from cscore.core import compute_score
    print("C-score module loaded successfully")
except ImportError:
    print("please install cscore: pip install -e .")
    print("   or run this notebook from the cscore repository root")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

# Import C-score functionality
try:
    from cscore.core import compute_score
    print("C-score module loaded successfully")
except ImportError:
    print("please install cscore: pip install -e .")
    print("   or run this notebook from the cscore repository root")

Data Generation Functions¶

Let's create functions to simulate different biological scenarios:

In [5]:

Copied!





def generate_gene_data(n_genes=1000, scenario="common", seed=42, noise_level=0.3):
    """
    Generate simulated differential expression data for two comparisons.
    
    Parameters:
    -----------
    n_genes : int
        Number of genes to simulate
    scenario : str
        Type of relationship ("common", "divergent", "mixed", "independent")
    seed : int
        Random seed for reproducibility
    noise_level : float
        Amount of noise to add (0 = perfect correlation, 1 = high noise)
    
    Returns:
    --------
    tuple: (comparison1_data, comparison2_data) as numpy arrays
           Each array has shape (n_genes, 2) with columns [fold_change, fdr]
    """
    rng = np.random.default_rng(seed)
    
    # Generate base fold changes
    fc1 = rng.normal(0, 1.0, n_genes)
    
    if scenario == "common":
        # Genes with similar responses in both comparisons
        fc2 = fc1 + rng.normal(0, noise_level, n_genes)
        description = "Genes respond similarly in both comparisons"
        
    elif scenario == "divergent":
        # Genes with opposite responses
        fc2 = -fc1 + rng.normal(0, noise_level, n_genes)
        description = "Genes respond oppositely in the two comparisons"
        
    elif scenario == "mixed":
        # Mix of common and divergent responses
        mask = rng.random(n_genes) < 0.5
        fc2 = np.where(mask, 
                      fc1 + rng.normal(0, noise_level, n_genes),  # Common
                      -fc1 + rng.normal(0, noise_level, n_genes))  # Divergent
        description = "Mix of common and divergent responses"
        
    elif scenario == "independent":
        # Independent responses (no relationship)
        fc2 = rng.normal(0, 1.0, n_genes)
        description = "Independent responses (no relationship)"
        
    else:
        raise ValueError(f"Unknown scenario: {scenario}")
    
    # Generate FDR values (20% significant genes, 80% non-significant)
    sig_fraction = 0.2
    sig_mask = rng.random(n_genes) < sig_fraction
    
    fdr1 = np.where(sig_mask, 
                   rng.uniform(1e-6, 0.05, n_genes),   # Significant
                   rng.uniform(0.05, 1.0, n_genes))    # Non-significant
    
    fdr2 = np.where(sig_mask, 
                   rng.uniform(1e-6, 0.05, n_genes),   # Significant
                   rng.uniform(0.05, 1.0, n_genes))    # Non-significant
    
    # Format as required by compute_score: [fold_change, fdr]
    comp1 = np.column_stack([fc1, fdr1])
    comp2 = np.column_stack([fc2, fdr2])
    
    print(f"📊 {scenario.title()} scenario: {description}")
    print(f"   Generated {n_genes:,} genes ({sig_fraction*100:.0f}% significant)")
    
    return comp1, comp2
def generate_gene_data(n_genes=1000, scenario="common", seed=42, noise_level=0.3):
    """
    Generate simulated differential expression data for two comparisons.
    
    Parameters:
    -----------
    n_genes : int
        Number of genes to simulate
    scenario : str
        Type of relationship ("common", "divergent", "mixed", "independent")
    seed : int
        Random seed for reproducibility
    noise_level : float
        Amount of noise to add (0 = perfect correlation, 1 = high noise)
    
    Returns:
    --------
    tuple: (comparison1_data, comparison2_data) as numpy arrays
           Each array has shape (n_genes, 2) with columns [fold_change, fdr]
    """
    rng = np.random.default_rng(seed)
    
    # Generate base fold changes
    fc1 = rng.normal(0, 1.0, n_genes)
    
    if scenario == "common":
        # Genes with similar responses in both comparisons
        fc2 = fc1 + rng.normal(0, noise_level, n_genes)
        description = "Genes respond similarly in both comparisons"
        
    elif scenario == "divergent":
        # Genes with opposite responses
        fc2 = -fc1 + rng.normal(0, noise_level, n_genes)
        description = "Genes respond oppositely in the two comparisons"
        
    elif scenario == "mixed":
        # Mix of common and divergent responses
        mask = rng.random(n_genes) < 0.5
        fc2 = np.where(mask, 
                      fc1 + rng.normal(0, noise_level, n_genes),  # Common
                      -fc1 + rng.normal(0, noise_level, n_genes))  # Divergent
        description = "Mix of common and divergent responses"
        
    elif scenario == "independent":
        # Independent responses (no relationship)
        fc2 = rng.normal(0, 1.0, n_genes)
        description = "Independent responses (no relationship)"
        
    else:
        raise ValueError(f"Unknown scenario: {scenario}")
    
    # Generate FDR values (20% significant genes, 80% non-significant)
    sig_fraction = 0.2
    sig_mask = rng.random(n_genes) < sig_fraction
    
    fdr1 = np.where(sig_mask, 
                   rng.uniform(1e-6, 0.05, n_genes),   # Significant
                   rng.uniform(0.05, 1.0, n_genes))    # Non-significant
    
    fdr2 = np.where(sig_mask, 
                   rng.uniform(1e-6, 0.05, n_genes),   # Significant
                   rng.uniform(0.05, 1.0, n_genes))    # Non-significant
    
    # Format as required by compute_score: [fold_change, fdr]
    comp1 = np.column_stack([fc1, fdr1])
    comp2 = np.column_stack([fc2, fdr2])
    
    print(f"📊 {scenario.title()} scenario: {description}")
    print(f"   Generated {n_genes:,} genes ({sig_fraction*100:.0f}% significant)")
    
    return comp1, comp2

Scenario 1: Independent Responses¶

Let's start with the random scenario - genes that respond independently in the two comparisons (no relationship):

In [7]:

Copied!





# Generate independent response data
comp1_indep, comp2_indep = generate_gene_data(n_genes=2000, scenario="independent", seed=789)

# Compute C-scores
scores_independent = compute_score(comp1_indep, comp2_indep)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_independent):.3f}")
print(f"Std:  {np.std(scores_independent):.3f}")
print(f"Range: [{np.min(scores_independent):.3f}, {np.max(scores_independent):.3f}]")

positive_pct = np.mean(scores_independent > 0) * 100
negative_pct = np.mean(scores_independent < 0) * 100
zero_pct = np.mean(scores_independent == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_independent, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")
# Generate independent response data
comp1_indep, comp2_indep = generate_gene_data(n_genes=2000, scenario="independent", seed=789)

# Compute C-scores
scores_independent = compute_score(comp1_indep, comp2_indep)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_independent):.3f}")
print(f"Std:  {np.std(scores_independent):.3f}")
print(f"Range: [{np.min(scores_independent):.3f}, {np.max(scores_independent):.3f}]")

positive_pct = np.mean(scores_independent > 0) * 100
negative_pct = np.mean(scores_independent < 0) * 100
zero_pct = np.mean(scores_independent == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_independent, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")

📊 Independent scenario: Independent responses (no relationship)
   Generated 2,000 genes (20% significant)
C-score Statistics:
Mean: -0.003
Std:  0.882
Range: [-4.307, 8.530]

Score Distribution:
Positive (common): 49.6%
Negative (divergent): 50.4%
Zero: 0.0%

Quantiles:
     1%:  -2.506
     5%:  -1.339
    25%:  -0.301
    50%:  -0.003
    75%:   0.264
    95%:   1.323
    99%:   2.856

In [8]:

Copied!





# Visualize the independent response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_indep[:, 0], comp2_indep[:, 0], alpha=0.6, s=20, color='orange')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Independent Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add reference lines
lims = ax1.get_xlim()
ax1.plot(lims, [0, 0], 'k-', alpha=0.3, linewidth=0.5)  # Horizontal line
ax1.plot([0, 0], lims, 'k-', alpha=0.3, linewidth=0.5)  # Vertical line
ax1.text(0.02, 0.98, 'No correlation\n(random scatter)', transform=ax1.transAxes,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Histogram of C-scores with quantile markers
ax2.hist(scores_independent, bins=50, alpha=0.7, color='orange', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['darkorange', 'gold', 'blue', 'gold', 'darkorange']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_independent, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Independent Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()
# Visualize the independent response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_indep[:, 0], comp2_indep[:, 0], alpha=0.6, s=20, color='orange')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Independent Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add reference lines
lims = ax1.get_xlim()
ax1.plot(lims, [0, 0], 'k-', alpha=0.3, linewidth=0.5)  # Horizontal line
ax1.plot([0, 0], lims, 'k-', alpha=0.3, linewidth=0.5)  # Vertical line
ax1.text(0.02, 0.98, 'No correlation\n(random scatter)', transform=ax1.transAxes,
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

# Histogram of C-scores with quantile markers
ax2.hist(scores_independent, bins=50, alpha=0.7, color='orange', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['darkorange', 'gold', 'blue', 'gold', 'darkorange']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_independent, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Independent Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()

No description has been provided for this image

Scenario 2: Common Responses¶

Now let's examine genes that respond similarly in both comparisons:

In [9]:

Copied!





# Generate common response data
comp1_common, comp2_common = generate_gene_data(n_genes=2000, scenario="common", seed=42)

# Compute C-scores
scores_common = compute_score(comp1_common, comp2_common)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_common):.3f}")
print(f"Std:  {np.std(scores_common):.3f}")
print(f"Range: [{np.min(scores_common):.3f}, {np.max(scores_common):.3f}]")

positive_pct = np.mean(scores_common > 0) * 100
negative_pct = np.mean(scores_common < 0) * 100
zero_pct = np.mean(scores_common == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_common, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")
# Generate common response data
comp1_common, comp2_common = generate_gene_data(n_genes=2000, scenario="common", seed=42)

# Compute C-scores
scores_common = compute_score(comp1_common, comp2_common)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_common):.3f}")
print(f"Std:  {np.std(scores_common):.3f}")
print(f"Range: [{np.min(scores_common):.3f}, {np.max(scores_common):.3f}]")

positive_pct = np.mean(scores_common > 0) * 100
negative_pct = np.mean(scores_common < 0) * 100
zero_pct = np.mean(scores_common == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_common, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")

📊 Common scenario: Genes respond similarly in both comparisons
   Generated 2,000 genes (20% significant)
C-score Statistics:
Mean: 0.768
Std:  1.409
Range: [-0.425, 14.915]

Score Distribution:
Positive (common): 89.5%
Negative (divergent): 10.5%
Zero: 0.0%

Quantiles:
     1%:  -0.140
     5%:  -0.027
    25%:   0.053
    50%:   0.252
    75%:   0.855
    95%:   3.284
    99%:   6.854

In [10]:

Copied!





# Visualize the common response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_common[:, 0], comp2_common[:, 0], alpha=0.6, s=20, color='green')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Common Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add diagonal line (perfect correlation)
lims = ax1.get_xlim()
ax1.plot(lims, lims, 'r--', alpha=0.8, label='Perfect correlation')
ax1.legend()

# Histogram of C-scores with quantile markers
ax2.hist(scores_common, bins=50, alpha=0.7, color='green', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['red', 'orange', 'blue', 'orange', 'red']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_common, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Common Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()
# Visualize the common response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_common[:, 0], comp2_common[:, 0], alpha=0.6, s=20, color='green')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Common Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add diagonal line (perfect correlation)
lims = ax1.get_xlim()
ax1.plot(lims, lims, 'r--', alpha=0.8, label='Perfect correlation')
ax1.legend()

# Histogram of C-scores with quantile markers
ax2.hist(scores_common, bins=50, alpha=0.7, color='green', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['red', 'orange', 'blue', 'orange', 'red']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_common, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Common Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()

Scenario 3: Divergent Responses¶

Finally, let's examine genes that respond oppositely in the two comparisons:

In [11]:

Copied!





# Generate divergent response data
comp1_div, comp2_div = generate_gene_data(n_genes=2000, scenario="divergent", seed=123)

# Compute C-scores
scores_divergent = compute_score(comp1_div, comp2_div)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_divergent):.3f}")
print(f"Std:  {np.std(scores_divergent):.3f}")
print(f"Range: [{np.min(scores_divergent):.3f}, {np.max(scores_divergent):.3f}]")

positive_pct = np.mean(scores_divergent > 0) * 100
negative_pct = np.mean(scores_divergent < 0) * 100
zero_pct = np.mean(scores_divergent == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_divergent, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")
# Generate divergent response data
comp1_div, comp2_div = generate_gene_data(n_genes=2000, scenario="divergent", seed=123)

# Compute C-scores
scores_divergent = compute_score(comp1_div, comp2_div)

# Display statistics
print(f"C-score Statistics:")
print(f"Mean: {np.mean(scores_divergent):.3f}")
print(f"Std:  {np.std(scores_divergent):.3f}")
print(f"Range: [{np.min(scores_divergent):.3f}, {np.max(scores_divergent):.3f}]")

positive_pct = np.mean(scores_divergent > 0) * 100
negative_pct = np.mean(scores_divergent < 0) * 100
zero_pct = np.mean(scores_divergent == 0) * 100

print(f"\nScore Distribution:")
print(f"Positive (common): {positive_pct:.1f}%")
print(f"Negative (divergent): {negative_pct:.1f}%")
print(f"Zero: {zero_pct:.1f}%")

# Print quantiles
quantiles = [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]
print(f"\nQuantiles:")
for q in quantiles:
    value = np.quantile(scores_divergent, q)
    print(f"  {q*100:4.0f}%: {value:7.3f}")

📊 Divergent scenario: Genes respond oppositely in the two comparisons
   Generated 2,000 genes (20% significant)
C-score Statistics:
Mean: -0.713
Std:  1.092
Range: [-8.748, 0.343]

Score Distribution:
Positive (common): 8.9%
Negative (divergent): 91.1%
Zero: 0.0%

Quantiles:
     1%:  -5.301
     5%:  -2.952
    25%:  -0.868
    50%:  -0.289
    75%:  -0.069
    95%:   0.016
    99%:   0.083

In [13]:

Copied!





# Visualize the divergent response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_div[:, 0], comp2_div[:, 0], alpha=0.6, s=20, color='red')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Divergent Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add anti-diagonal line (perfect anti-correlation)
lims = ax1.get_xlim()
ax1.plot(lims, [-lims[0], -lims[1]], 'r--', alpha=0.8, label='Perfect anti-correlation')
ax1.legend()

# Histogram of C-scores with quantile markers
ax2.hist(scores_divergent, bins=50, alpha=0.7, color='red', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['darkred', 'orange', 'blue', 'orange', 'darkred']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_divergent, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Divergent Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()
# Visualize the divergent response scenario
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Scatter plot of fold changes
ax1.scatter(comp1_div[:, 0], comp2_div[:, 0], alpha=0.6, s=20, color='red')
ax1.set_xlabel('Comparison 1 (log2 fold change)')
ax1.set_ylabel('Comparison 2 (log2 fold change)')
ax1.set_title('Divergent Response: Fold Changes')
ax1.grid(True, alpha=0.3)

# Add anti-diagonal line (perfect anti-correlation)
lims = ax1.get_xlim()
ax1.plot(lims, [-lims[0], -lims[1]], 'r--', alpha=0.8, label='Perfect anti-correlation')
ax1.legend()

# Histogram of C-scores with quantile markers
ax2.hist(scores_divergent, bins=50, alpha=0.7, color='red', edgecolor='white', density=True)
ax2.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=2, label='Zero')

# Add quantile lines
quantiles_to_show = [0.05, 0.25, 0.5, 0.75, 0.95]
colors = ['darkred', 'orange', 'blue', 'orange', 'darkred']
for q, color in zip(quantiles_to_show, colors):
    value = np.quantile(scores_divergent, q)
    ax2.axvline(value, color=color, linestyle=':', alpha=0.8, 
                label=f'{q*100:.0f}% = {value:.2f}')

ax2.set_xlabel('C-score')
ax2.set_ylabel('Density')
ax2.set_title('Divergent Response: C-score Distribution')
ax2.grid(True, alpha=0.3)
ax2.legend()

plt.tight_layout()
plt.show()

Scenario Comparison¶

Let's compare all three scenarios side by side:

In [14]:

Copied!





# Create comprehensive comparison
scenarios = {
    'Independent': scores_independent,
    'Common': scores_common,
    'Divergent': scores_divergent
}

colors = ['orange', 'green', 'red']

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, (name, scores) in enumerate(scenarios.items()):
    ax = axes[i]
    
    # Create histogram
    ax.hist(scores, bins=40, alpha=0.7, color=colors[i], edgecolor='white', linewidth=0.5, density=True)
    
    # Add vertical line at zero
    ax.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=1)
    
    # Add quantile lines
    quantiles_to_show = [0.05, 0.5, 0.95]
    for q in quantiles_to_show:
        value = np.quantile(scores, q)
        ax.axvline(value, color='darkblue', linestyle=':', alpha=0.6, linewidth=1)
    
    # Formatting
    ax.set_title(f'{name} Response\n(n={len(scores):,} genes)', fontweight='bold')
    ax.set_xlabel('C-score')
    ax.set_ylabel('Density')
    ax.grid(True, alpha=0.3)
    
    # Add statistics text box
    mean_score = np.mean(scores)
    pos_pct = np.mean(scores > 0) * 100
    
    stats_text = f'Mean: {mean_score:.2f}\nPositive: {pos_pct:.0f}%'
    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, 
            verticalalignment='top', 
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.suptitle('C-score Distributions: Independent → Common → Divergent', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
# Create comprehensive comparison
scenarios = {
    'Independent': scores_independent,
    'Common': scores_common,
    'Divergent': scores_divergent
}

colors = ['orange', 'green', 'red']

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, (name, scores) in enumerate(scenarios.items()):
    ax = axes[i]
    
    # Create histogram
    ax.hist(scores, bins=40, alpha=0.7, color=colors[i], edgecolor='white', linewidth=0.5, density=True)
    
    # Add vertical line at zero
    ax.axvline(0, color='black', linestyle='--', alpha=0.5, linewidth=1)
    
    # Add quantile lines
    quantiles_to_show = [0.05, 0.5, 0.95]
    for q in quantiles_to_show:
        value = np.quantile(scores, q)
        ax.axvline(value, color='darkblue', linestyle=':', alpha=0.6, linewidth=1)
    
    # Formatting
    ax.set_title(f'{name} Response\n(n={len(scores):,} genes)', fontweight='bold')
    ax.set_xlabel('C-score')
    ax.set_ylabel('Density')
    ax.grid(True, alpha=0.3)
    
    # Add statistics text box
    mean_score = np.mean(scores)
    pos_pct = np.mean(scores > 0) * 100
    
    stats_text = f'Mean: {mean_score:.2f}\nPositive: {pos_pct:.0f}%'
    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, 
            verticalalignment='top', 
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.suptitle('C-score Distributions: Independent → Common → Divergent', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

In [15]:

Copied!





# Create detailed summary statistics table
summary_data = []

for name, scores in scenarios.items():
    # Calculate quantiles
    q01, q05, q25, q50, q75, q95, q99 = np.quantile(scores, [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])
    
    summary_data.append({
        'Scenario': name,
        'Mean': f"{np.mean(scores):.3f}",
        'Std': f"{np.std(scores):.3f}",
        'Min': f"{np.min(scores):.3f}",
        'Max': f"{np.max(scores):.3f}",
        '1%': f"{q01:.3f}",
        '5%': f"{q05:.3f}",
        '25%': f"{q25:.3f}",
        'Median': f"{q50:.3f}",
        '75%': f"{q75:.3f}",
        '95%': f"{q95:.3f}",
        '99%': f"{q99:.3f}",
        'Positive %': f"{np.mean(scores > 0) * 100:.1f}%",
        'Negative %': f"{np.mean(scores < 0) * 100:.1f}%"
    })

summary_df = pd.DataFrame(summary_data)

print("Comprehensive C-score Summary Statistics")
print("=" * 100)
print(summary_df.to_string(index=False))

print("\nKey Insights:")
print("• Independent scenario: Near-zero mean, symmetric distribution around zero (baseline)")
print("• Common scenario: Strongly positive mean, 89%+ positive scores")
print("• Divergent scenario: Strongly negative mean, 89%+ negative scores") 
print("• Range of C-scores: Typically between -3 to +3 for these simulations")
# Create detailed summary statistics table
summary_data = []

for name, scores in scenarios.items():
    # Calculate quantiles
    q01, q05, q25, q50, q75, q95, q99 = np.quantile(scores, [0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])
    
    summary_data.append({
        'Scenario': name,
        'Mean': f"{np.mean(scores):.3f}",
        'Std': f"{np.std(scores):.3f}",
        'Min': f"{np.min(scores):.3f}",
        'Max': f"{np.max(scores):.3f}",
        '1%': f"{q01:.3f}",
        '5%': f"{q05:.3f}",
        '25%': f"{q25:.3f}",
        'Median': f"{q50:.3f}",
        '75%': f"{q75:.3f}",
        '95%': f"{q95:.3f}",
        '99%': f"{q99:.3f}",
        'Positive %': f"{np.mean(scores > 0) * 100:.1f}%",
        'Negative %': f"{np.mean(scores < 0) * 100:.1f}%"
    })

summary_df = pd.DataFrame(summary_data)

print("Comprehensive C-score Summary Statistics")
print("=" * 100)
print(summary_df.to_string(index=False))

print("\nKey Insights:")
print("• Independent scenario: Near-zero mean, symmetric distribution around zero (baseline)")
print("• Common scenario: Strongly positive mean, 89%+ positive scores")
print("• Divergent scenario: Strongly negative mean, 89%+ negative scores") 
print("• Range of C-scores: Typically between -3 to +3 for these simulations")

Comprehensive C-score Summary Statistics
====================================================================================================
   Scenario   Mean   Std    Min    Max     1%     5%    25% Median    75%   95%   99% Positive % Negative %
Independent -0.003 0.882 -4.307  8.530 -2.506 -1.339 -0.301 -0.003  0.264 1.323 2.856      49.6%      50.4%
     Common  0.768 1.409 -0.425 14.915 -0.140 -0.027  0.053  0.252  0.855 3.284 6.854      89.5%      10.5%
  Divergent -0.713 1.092 -8.748  0.343 -5.301 -2.952 -0.868 -0.289 -0.069 0.016 0.083       8.9%      91.1%

Key Insights:
• Independent scenario: Near-zero mean, symmetric distribution around zero (baseline)
• Common scenario: Strongly positive mean, 89%+ positive scores
• Divergent scenario: Strongly negative mean, 89%+ negative scores
• Range of C-scores: Typically between -3 to +3 for these simulations