// Case studies — three layer-scoped pages, all wired to live /api endpoints.
// Case 01: Geometry annotates features missing DB labels (cross_family_geometry.json)
// Case 02: Geometry is more granular than biology (subdomain_case_study.json)
// Case 03: Transfer to metagenomic proteins (nmpfam_case_study.json)
// ────────────────────────────────────────────────────────────────────────
// Case study 01 — DB-silent, geometry-significant features.
// ────────────────────────────────────────────────────────────────────────
function CaseStudyGeometry({ layer, onPickFeature }) {
const cs = useFetch(() => API.crossFamilyGeometry(layer), [layer]);
if (cs.loading) return
Features whose four database / sequence methods (InterPro residue + protein, CATH residue, MEME motif,
sequence position) are all not significant, but where the geometric Cα classifier reaches
BH q < 0.05. This is the population the paper calls "geometry-primary".
Groups of features that share an InterPro-residue or CATH-residue annotation, but split into distinct
geometric sub-signatures (mean pairwise cosine of the 44-dim importance vector < 0.5). Where the
database says one thing, geometry can resolve sub-structure.
);
}
function CS2GroupRow({ g, source, onPick }) {
const cosine = g.mean_cosine_similarity ?? g.mean_cos;
const f1 = source === "interpro" ? (g.mean_residue_f1 ?? g.ipr_f1) : (g.mean_residue_f1 ?? g.cath_f1);
return (
);
}
// ────────────────────────────────────────────────────────────────────────
// Case study 03 — NMPFam metagenomic transfer (paper §4.3 + Table 4 + Fig. 4).
//
// Data flow:
// /api/layers/{L}/nmpfam-transfer-summary returns
// { table4: {...}, features: [{feature_id, max_prauc, median_prauc,
// n_hits, n_strong, sequences_annotated, top_hits[]} ...] }
// The features list is *already gated* server-side to Table 4 column 3
// (geometry q-significant AND median per-family PR-AUC > 0.5) and sorted
// by max_prauc descending. We just render it.
// ────────────────────────────────────────────────────────────────────────
function CaseStudyMetagenomic({ layer, onPickFeature }) {
const cs = useFetch(
() => API.nmpfamTransferSummary(layer).catch(e => ({ __error: e })),
[layer]
);
const [sortKey, setSortKey] = React.useState("max_prauc");
// ALL hooks must run on every render — keep useMemo above any early
// returns (React's rules-of-hooks: same hook order every render).
const data = cs.data || {};
const t = data.table4 || {};
const features = data.features || [];
const sorted = React.useMemo(
() => [...features].sort((a, b) => (b[sortKey] || 0) - (a[sortKey] || 0)),
[features, sortKey]
);
if (cs.loading) return
;
if (data.__error) {
return (
Case study · 03 · Layer {layer}
Transfer to metagenomic proteins
Not built for layer {layer}.
Run python scripts/build_nmpfam_transfer_summary.py --analysis-dir analysis/l{layer}
{' '}to generate the per-feature transfer aggregates.
);
}
return (
Case study · 03 · Layer {layer} · §4.3
Geometric annotation transfers to metagenomic proteins
For every SAE feature that fires on NMPFams metagenomic clusters, we run the pre-trained
Swiss-Prot geometric classifier on each metagenomic protein and report PR-AUC. A feature
whose median per-family PR-AUC > 0.5 indicates the geometric annotation
generalises beyond the training distribution.
{/* Table 4 stats — exactly the columns from the paper */}
Transferring features
{features.length.toLocaleString()} features pass the Table 4 column 3 gate
Geometry q < 0.05 AND median per-family PR-AUC > 0.5.
Default sort by max PR-AUC. Click a row to open the per-feature transfer view.
);
}
// Compact table of gated features. Each row is one feature; columns are
// the per-feature transfer aggregates from the precompute step.
function CS3FeatureTable({ rows, onPickFeature }) {
if (!rows.length) {
return (
No features pass the Table 4 column 3 gate for this layer.
);
}
return (
{/* Explicit column widths — without these the inherited `max-width: 0`
on .ftable tbody td clips everything (e.g. f/337 → f/3). */}
ID
Hits
Strong
Max PR-AUC
Median PR-AUC
Sequences annotated
Top family · max PR-AUC
{rows.map((r) => {
const top = r.top_hits?.[0];
return (
{features.length} member features
{cosine != null ? ` · mean pairwise cosine ${fmt(cosine, 3)}` : ''}
{cosine != null ? (cosine < 0.5
? GEOM split (cos < 0.5)
: not split by geom) : null}
{group.mean_geom_pr_auc != null ? ` · mean geom PR-AUC ${fmt(group.mean_geom_pr_auc, 3)}` : ''}
{group.mean_residue_f1 != null ? ` · mean residue F1 ${fmt(group.mean_residue_f1, 3)}` : ''}
Members
Feature list
Click a feature to open its full page. Top descriptor names and category come from the per-feature geometric classifier.
{features.map((m) => (
))}
Pairwise cosine
How similar are these features in the 44-dim importance space?
Cosine similarity between every pair of member features' GBM importance vectors.
Lighter cells = members lean on different geometric descriptors.
m.feature_id)} />
44-dim importance
Where each member sits in the geometric descriptor space
Rows are member features, columns are the 44 Cα descriptors used by the geometric GBM.
A bright row signals a few dominant descriptors; a flat row means importance is spread out.
Shared protein
Different members hit different residues on the same protein
Pick a UniProt accession that ≥ 2 members evaluate on. We overlay each covering feature's
SAE activation (solid) and geometric probability (dashed) along that protein's residues.
);
}
function CS2MemberRow({ m, layer, onPickFeature }) {
return (
);
}
// Pairwise cosine matrix as a Plotly heatmap. Falls back to a placeholder
// if the case study payload didn't include cosine_matrix for this group.
function CS2CosineHeatmap({ matrix, ids }) {
const ref = React.useRef(null);
React.useEffect(() => {
if (!ref.current || !window.Plotly) return;
if (!matrix || !matrix.length) {
ref.current.innerHTML = '
No protein appears in the top-activating sample of ≥ 2 features in this family.
);
}
return (
Shared protein
);
}
// ────────────────────────────────────────────────────────────────────────
// Case study 03 — per-feature side-by-side SwissProt vs NMPFam detail
// (the visual idiom in the paper's Figure 4: a Swiss-Prot reference
// protein on the left, a metagenomic NMPFam family on the right, each
// with structure on top and per-residue activation + geom probability
// on the bottom). Picks the strongest hit by default and lets the user
// scrub to other strong hits via dropdowns.
// ────────────────────────────────────────────────────────────────────────
function CaseStudyMetagenomicDetail({ layer, featureId, onBack, onOpenFeaturePage }) {
const transfer = useFetch(() => API.nmpfamTransferSummary(layer).catch(() => null), [layer]);
const geom = useFetch(() => API.geometry(layer, featureId).catch(() => null), [layer, featureId]);
const nmp = useFetch(() => API.nmpfam(layer, featureId).catch(() => null), [layer, featureId]);
const sig = useFetch(() => API.significance(layer, featureId).catch(() => null), [layer, featureId]);
if (transfer.loading || geom.loading || nmp.loading || sig.loading) {
return
;
}
const featAgg = (transfer.data?.features || []).find(f => f.feature_id === featureId);
const swissProteins = geom.data?.plot_data?.top_proteins || [];
const nmpHits = nmp.data?.nmpfam_hits || [];
// Prefer hits with definable PR-AUC, sorted by max PR-AUC. Fall back to
// raw enrichment order if our transfer summary doesn't have this feature.
const ranked = [...nmpHits].sort((a, b) => (b.max_geom_prob || 0) - (a.max_geom_prob || 0));
return (
Case study · 03 · Layer {layer} · §4.3f/{featureId}
Geometric annotation transfer · f/{featureId}
Side-by-side: a Swiss-Prot reference protein the SAE feature was originally trained on,
and an NMPFam metagenomic family it transfers to. Each panel shows the predicted
structure (coloured by activation intensity) and the per-residue SAE activation against
the Swiss-Prot-trained GBM's geometric probability.
{/* Per-feature transfer aggregates */}
{featAgg && (
Max PR-AUC
{fmt(featAgg.max_prauc, 3)}
best metagenomic transfer
Median PR-AUC
{fmt(featAgg.median_prauc, 3)}
across {featAgg.n_hits.toLocaleString()} NMPFam hits ({featAgg.n_strong} strong)
No SwissProt top proteins or NMPFam hits available for this feature.
);
}
// The SwissProt threshold lives on the geometry payload (geometric_residue_level
// .activation_threshold) but we only need it to draw the truth band; the Swiss
// panel here just shows act vs geom_prob from the pre-computed plot_data, so
// we don't need to recompute confusion.
return (
);
}
function CS3Panel({ kind, title, proteins, idx, setIdx, protein, featureMaxAct, feat, nmpThreshold }) {
const containerRef = React.useRef(null);
const plotRef = React.useRef(null);
// Lazy 3D viewer (white→orange activation intensity, same scheme used elsewhere).
React.useEffect(() => {
if (!protein || !containerRef.current) return;
const acc = kind === "swiss" ? protein.accession : protein.family_id;
if (!acc) return;
let observer, initialized = false;
const init = () => {
if (initialized || !containerRef.current) return;
initialized = true;
const acts = (kind === "swiss" ? protein.sae_activation_profile : protein.sae_activation_profile) || [];
const norm = featureMaxAct && featureMaxAct > 0
? featureMaxAct
: Math.max(...acts, 0.001);
const colorMap = buildActivationColorMap(acts, norm);
const url = kind === "swiss"
? `/api/pdb/${acc}`
: `/api/nmpfam-pdb/${acc}`;
init3DViewerWithMap(containerRef.current, url, colorMap);
};
if ("IntersectionObserver" in window) {
observer = new IntersectionObserver((entries) => {
for (const e of entries) if (e.isIntersecting) { init(); observer.disconnect(); break; }
}, { rootMargin: "120px" });
observer.observe(containerRef.current);
} else {
init();
}
return () => { if (observer) observer.disconnect(); };
}, [protein, kind, featureMaxAct]);
// Per-residue dual-axis line plot: SAE activation (orange, left axis) +
// geom probability (blue, right axis). Vertical band on residues above
// the SAE truth threshold so the reader sees where geometry should fire.
React.useEffect(() => {
if (!plotRef.current || !window.Plotly) return;
if (!protein) { plotRef.current.innerHTML = ''; return; }
const acts = protein.sae_activation_profile || [];
const probs = protein.geom_prob_profile || [];
const x = Array.from({ length: acts.length }, (_, i) => i + 1);
const traces = [
{
x, y: acts, name: 'SAE activation',
type: 'scatter', mode: 'lines',
line: { color: '#C97A00', width: 1.6 },
yaxis: 'y',
},
{
x, y: probs, name: 'Geom probability',
type: 'scatter', mode: 'lines',
line: { color: '#5C82BD', width: 1.6, dash: 'dash' },
yaxis: 'y2',
},
];
// Truth band: residues above the SAE threshold get a faint orange shade.
let threshold = 0;
if (kind === 'swiss') {
// SwissProt threshold isn't in the geometry payload's plot_data,
// approximate with feature-level max × 0.5 (consistent with other places
// we don't have the exact value).
threshold = (featureMaxAct || 0) * 0.5;
} else {
threshold = nmpThreshold ?? 0;
}
const shapes = [];
if (threshold > 0) {
let bandStart = null;
for (let i = 0; i < acts.length; i++) {
const above = acts[i] > threshold;
if (above && bandStart === null) bandStart = i;
if ((!above || i === acts.length - 1) && bandStart !== null) {
const bandEnd = above ? i : i - 1;
shapes.push({
type: 'rect', xref: 'x', yref: 'paper',
x0: bandStart + 1, x1: bandEnd + 1,
y0: 0, y1: 1,
fillcolor: '#C97A00', opacity: 0.10, line: { width: 0 },
});
bandStart = null;
}
}
}
const layout = {
height: 220,
margin: { l: 50, r: 50, t: 10, b: 36 },
xaxis: { title: 'Residue', tickfont: { family: 'IBM Plex Mono', size: 10 } },
yaxis: { title: 'SAE act', side: 'left',
tickfont: { family: 'IBM Plex Mono', size: 10 } },
yaxis2: { title: 'Geom prob', side: 'right', overlaying: 'y',
range: [0, 1],
tickfont: { family: 'IBM Plex Mono', size: 10 } },
legend: { orientation: 'h', x: 0, y: -0.32, font: { size: 10, family: 'IBM Plex Mono' } },
font: { family: 'IBM Plex Sans' },
shapes,
paper_bgcolor: 'rgba(0,0,0,0)',
plot_bgcolor: 'rgba(0,0,0,0)',
};
Plotly.react(plotRef.current, traces, layout, { displayModeBar: false, responsive: true });
}, [protein, kind, featureMaxAct, nmpThreshold]);
if (!protein) {
return (
{title}
No {kind === 'swiss' ? 'SwissProt' : 'NMPFam'} protein available.