bbarad/split_surface.py

## split_surface.py
#!/usr/bin/env python
"""
Split a PyCurv GT surface into two membrane components (e.g. inner/outer phagophore).

Strategy:
  1. For each triangle, compute the dot product of the voted normal (n_v) with the
     radial outward direction from the surface centroid. One membrane's normals
     point radially inward, the other's point outward — the dot product cleanly
     separates them with near-perfectly connected initial halves.
  2. The largest connected component of each initial half is used as seeds for
     BFS region growing to cover the full surface.
  3. Each output is a single connected component by construction.

Usage:
  python split_surface.py surface.gt [--threshold 50] [--seed-percentile 50] [--method centroid_dot]
"""

import argparse
import os
from collections import deque

import numpy as np
from graph_tool import Graph, load_graph
from graph_tool.topology import label_components
from pycurv import TriangleGraph
from pycurv import pycurv_io as io


def largest_component_mask(g, mask):
    """Return mask restricted to the largest connected component within mask."""
    if not mask.any():
        return mask.copy()
    vfilt = g.new_vertex_property("bool")
    vfilt.a = mask
    g.set_vertex_filter(vfilt)
    comp_labels, hist = label_components(g)
    g.clear_filters()
    largest = int(np.argmax(hist))
    return mask & (comp_labels.a == largest)


def bfs_fill(g, pos_seeds, neg_seeds, n_triangles):
    """
    BFS region growing from two seed sets.
    Each unlabelled triangle takes the label of the first labelled neighbour found.
    """
    labels = np.full(n_triangles, -1, dtype=np.int8)
    labels[pos_seeds] = 1
    labels[neg_seeds] = 0

    queue = deque()
    for v_idx in np.where(pos_seeds | neg_seeds)[0]:
        for nb in g.get_all_neighbors(int(v_idx)):
            if labels[nb] == -1:
                queue.append(int(nb))

    while queue:
        v = queue.popleft()
        if labels[v] != -1:
            continue
        for nb in g.get_all_neighbors(v):
            nb = int(nb)
            if labels[nb] != -1:
                labels[v] = labels[nb]
                break
        if labels[v] != -1:
            for nb in g.get_all_neighbors(v):
                nb = int(nb)
                if labels[nb] == -1:
                    queue.append(nb)

    unassigned = labels == -1
    if unassigned.any():
        print(f"  Note: {unassigned.sum()} vertices unreachable by BFS; "
              f"assigning by direct score sign")
        # Fallback: assign by score sign
        pass

    return labels.astype(bool)


def split_surface(gt_path, seed_percentile=0, threshold_percentile=50, method="centroid_dot"):
    """
    Split a GT surface into two membrane components.

    Parameters
    ----------
    gt_path : str
        Path to the input .gt file.
    seed_percentile : float
        Percentile of |score − threshold| within each sign class above which triangles
        become seeds. The largest connected component of each seed set is used.
        Default 0 (use all triangles in each initial half as seeds).
    threshold_percentile : float
        Percentile of the score distribution used as the split threshold. Default 50
        (median). Lowering this (e.g. to 45 or 40) shifts the threshold toward more-
        negative scores, expanding the positive (inner membrane) seed region into the
        rim zone and moving the cut toward the outer membrane. Raising it does the
        opposite.
    method : str
        Signal used to separate the membranes:
        'centroid_dot' (default): dot product of voted normal with the radial
            outward direction from the surface centroid. The most robust signal
            for cup-shaped double membranes — directly encodes which membrane
            faces inward vs outward relative to the phagophore center.
        'mean_curvature': mean_curvature_VV from pycurv.
        'normal_pca': first PCA component of the voted normals (useful when the
            membranes are roughly planar and face each other, not cup-shaped).
        'kappa1': raw kappa_1 principal curvature.
    """
    tg = TriangleGraph()
    tg.graph = load_graph(gt_path)
    g = tg.graph
    n_triangles = g.num_vertices()
    print(f"Loaded {gt_path}: {n_triangles} triangles, {g.num_edges()} edges")

    # --- Get positions and voted normals ---
    xyz = g.vertex_properties["xyz"].get_2d_array([0, 1, 2]).T  # (n, 3)
    nv = g.vertex_properties["n_v"].get_2d_array([0, 1, 2]).T   # (n, 3)
    centroid = xyz.mean(axis=0)

    # --- Compute splitting scores ---
    if method == "centroid_dot":
        r = xyz - centroid
        r_hat = r / (np.linalg.norm(r, axis=1, keepdims=True) + 1e-12)
        scores = (nv * r_hat).sum(axis=1)
        print(f"Method: centroid_dot (n_v · r̂ from centroid {centroid.astype(int)})")
        print(f"  Score range [{scores.min():.4f}, {scores.max():.4f}], "
              f"median {np.median(scores):.4f}")
    elif method == "normal_pca":
        mean_nv = nv.mean(axis=0)
        _, _, Vt = np.linalg.svd(nv - mean_nv, full_matrices=False)
        axis = Vt[0]
        scores = nv @ axis
        if (scores > 0).sum() < (scores < 0).sum():
            scores = -scores
            axis = -axis
        print(f"Method: normal_pca; axis = [{axis[0]:.4f}, {axis[1]:.4f}, {axis[2]:.4f}]")
    elif method == "mean_curvature":
        scores = tg.get_vertex_property_array("mean_curvature_VV")
        print("Method: mean_curvature_VV")
    elif method == "kappa1":
        scores = tg.get_vertex_property_array("kappa_1")
        print("Method: kappa_1")
    else:
        raise ValueError(f"Unknown method: {method!r}")

    # Split at chosen percentile threshold
    threshold = float(np.percentile(scores, threshold_percentile))
    pos_mask = scores > threshold
    neg_mask = scores <= threshold
    print(f"  Split at p{threshold_percentile} ({threshold:.4f}): "
          f"{pos_mask.sum()} positive, {neg_mask.sum()} negative")

    # --- Select seeds: high-confidence, single connected component ---
    abs_dev = np.abs(scores - threshold)
    pos_thresh = np.percentile(abs_dev[pos_mask], seed_percentile) if pos_mask.any() else np.inf
    neg_thresh = np.percentile(abs_dev[neg_mask], seed_percentile) if neg_mask.any() else np.inf

    pos_seeds = largest_component_mask(g, pos_mask & (abs_dev >= pos_thresh))
    neg_seeds = largest_component_mask(g, neg_mask & (abs_dev >= neg_thresh))
    print(f"  Seeds (p{seed_percentile} by |score−median|, largest component): "
          f"{pos_seeds.sum()} positive, {neg_seeds.sum()} negative")

    if pos_seeds.sum() == 0 or neg_seeds.sum() == 0:
        raise RuntimeError("One seed set is empty — lower --seed-percentile")

    # --- BFS fill ---
    print("Filling rim via BFS...")
    positive_mask = bfs_fill(g, pos_seeds, neg_seeds, n_triangles)

    n_pos = int(positive_mask.sum())
    n_neg = n_triangles - n_pos
    print(f"Final partition: {n_pos} positive ({100*n_pos/n_triangles:.1f}%), "
          f"{n_neg} negative ({100*n_neg/n_triangles:.1f}%)")

    # Report which component is likely outer membrane (larger mean radius from centroid)
    radii = np.linalg.norm(xyz - centroid, axis=1)
    r_pos = radii[positive_mask].mean()
    r_neg = radii[~positive_mask].mean()
    outer_label = "positive" if r_pos > r_neg else "negative"
    print(f"  Mean radius from centroid: positive={r_pos:.1f}nm, negative={r_neg:.1f}nm "
          f"→ '{outer_label}' is likely the outer membrane")

    # --- Connectivity check ---
    for label, mask in [("positive", positive_mask), ("negative", ~positive_mask)]:
        vfilt = g.new_vertex_property("bool")
        vfilt.a = mask
        g.set_vertex_filter(vfilt)
        _, hist = label_components(g)
        g.clear_filters()
        if len(hist) != 1:
            sizes = sorted(hist, reverse=True)
            print(f"  WARNING: {label} has {len(hist)} components (sizes: {sizes[:6]})")
        else:
            print(f"  {label}: single connected component (OK)")

    # --- Save outputs ---
    base, _ = os.path.splitext(gt_path)
    outputs = {}
    for label, mask in [("positive", positive_mask), ("negative", ~positive_mask)]:
        if mask.sum() == 0:
            print(f"  {label}: 0 triangles (skipping)")
            continue

        vfilt = g.new_vertex_property("bool")
        vfilt.a = mask
        g.set_vertex_filter(vfilt)
        sub_g = Graph(g, prune=True)
        g.clear_filters()

        gt_out = f"{base}_{label}.gt"
        vtp_out = f"{base}_{label}.vtp"
        sub_g.save(gt_out)

        sub_tg = TriangleGraph()
        sub_tg.graph = sub_g
        surf = sub_tg.graph_to_triangle_poly()
        io.save_vtp(surf, vtp_out)

        print(f"  {label}: {sub_g.num_vertices()} triangles -> {gt_out}, {vtp_out}")
        outputs[label] = (gt_out, vtp_out)

    return outputs


def main():
    parser = argparse.ArgumentParser(
        description="Split a phagophore GT surface into two membrane components")
    parser.add_argument("gt_file", help="Path to input .gt file")
    parser.add_argument(
        "--method",
        choices=["centroid_dot", "normal_pca", "mean_curvature", "kappa1"],
        default="centroid_dot",
        help="Splitting signal. centroid_dot (default): n_v · radial outward direction "
             "from the surface centroid — robust for cup-shaped double membranes. "
             "normal_pca: first PC of voted normals (better for planar membranes). "
             "mean_curvature: mean_curvature_VV. kappa1: kappa_1.")
    parser.add_argument(
        "--threshold-percentile", type=float, default=50,
        help="Percentile of the score distribution used as the split threshold. "
             "Default 50 (median). Lower values (e.g. 45, 40) expand the positive "
             "(inner membrane) region into the rim, moving the cut toward the outer "
             "membrane. Raise to do the opposite.")
    parser.add_argument(
        "--seed-percentile", type=float, default=0,
        help="Percentile of |score−median| (within each initial half) above which "
             "triangles become region-growing seeds. Default 0 uses all triangles in "
             "the initial half — appropriate when the centroid_dot halves are already "
             "well-connected. Raise (e.g. 50) to restrict to more confident seeds "
             "if the initial split is noisy.")
    args = parser.parse_args()

    split_surface(args.gt_file,
                  seed_percentile=args.seed_percentile,
                  threshold_percentile=args.threshold_percentile,
                  method=args.method)


if __name__ == "__main__":
    main()
	#!/usr/bin/env python
	"""
	Split a PyCurv GT surface into two membrane components (e.g. inner/outer phagophore).

	Strategy:
	1. For each triangle, compute the dot product of the voted normal (n_v) with the
	radial outward direction from the surface centroid. One membrane's normals
	point radially inward, the other's point outward — the dot product cleanly
	separates them with near-perfectly connected initial halves.
	2. The largest connected component of each initial half is used as seeds for
	BFS region growing to cover the full surface.
	3. Each output is a single connected component by construction.

	Usage:
	python split_surface.py surface.gt [--threshold 50] [--seed-percentile 50] [--method centroid_dot]
	"""

	import argparse
	import os
	from collections import deque

	import numpy as np
	from graph_tool import Graph, load_graph
	from graph_tool.topology import label_components
	from pycurv import TriangleGraph
	from pycurv import pycurv_io as io


	def largest_component_mask(g, mask):
	"""Return mask restricted to the largest connected component within mask."""
	if not mask.any():
	return mask.copy()
	vfilt = g.new_vertex_property("bool")
	vfilt.a = mask
	g.set_vertex_filter(vfilt)
	comp_labels, hist = label_components(g)
	g.clear_filters()
	largest = int(np.argmax(hist))
	return mask & (comp_labels.a == largest)


	def bfs_fill(g, pos_seeds, neg_seeds, n_triangles):
	"""
	BFS region growing from two seed sets.
	Each unlabelled triangle takes the label of the first labelled neighbour found.
	"""
	labels = np.full(n_triangles, -1, dtype=np.int8)
	labels[pos_seeds] = 1
	labels[neg_seeds] = 0

	queue = deque()
	for v_idx in np.where(pos_seeds \| neg_seeds)[0]:
	for nb in g.get_all_neighbors(int(v_idx)):
	if labels[nb] == -1:
	queue.append(int(nb))

	while queue:
	v = queue.popleft()
	if labels[v] != -1:
	continue
	for nb in g.get_all_neighbors(v):
	nb = int(nb)
	if labels[nb] != -1:
	labels[v] = labels[nb]
	break
	if labels[v] != -1:
	for nb in g.get_all_neighbors(v):
	nb = int(nb)
	if labels[nb] == -1:
	queue.append(nb)

	unassigned = labels == -1
	if unassigned.any():
	print(f" Note: {unassigned.sum()} vertices unreachable by BFS; "
	f"assigning by direct score sign")
	# Fallback: assign by score sign
	pass

	return labels.astype(bool)


	def split_surface(gt_path, seed_percentile=0, threshold_percentile=50, method="centroid_dot"):
	"""
	Split a GT surface into two membrane components.

	Parameters
	----------
	gt_path : str
	Path to the input .gt file.
	seed_percentile : float
	Percentile of \|score − threshold\| within each sign class above which triangles
	become seeds. The largest connected component of each seed set is used.
	Default 0 (use all triangles in each initial half as seeds).
	threshold_percentile : float
	Percentile of the score distribution used as the split threshold. Default 50
	(median). Lowering this (e.g. to 45 or 40) shifts the threshold toward more-
	negative scores, expanding the positive (inner membrane) seed region into the
	rim zone and moving the cut toward the outer membrane. Raising it does the
	opposite.
	method : str
	Signal used to separate the membranes:
	'centroid_dot' (default): dot product of voted normal with the radial
	outward direction from the surface centroid. The most robust signal
	for cup-shaped double membranes — directly encodes which membrane
	faces inward vs outward relative to the phagophore center.
	'mean_curvature': mean_curvature_VV from pycurv.
	'normal_pca': first PCA component of the voted normals (useful when the
	membranes are roughly planar and face each other, not cup-shaped).
	'kappa1': raw kappa_1 principal curvature.
	"""
	tg = TriangleGraph()
	tg.graph = load_graph(gt_path)
	g = tg.graph
	n_triangles = g.num_vertices()
	print(f"Loaded {gt_path}: {n_triangles} triangles, {g.num_edges()} edges")

	# --- Get positions and voted normals ---
	xyz = g.vertex_properties["xyz"].get_2d_array([0, 1, 2]).T # (n, 3)
	nv = g.vertex_properties["n_v"].get_2d_array([0, 1, 2]).T # (n, 3)
	centroid = xyz.mean(axis=0)

	# --- Compute splitting scores ---
	if method == "centroid_dot":
	r = xyz - centroid
	r_hat = r / (np.linalg.norm(r, axis=1, keepdims=True) + 1e-12)
	scores = (nv * r_hat).sum(axis=1)
	print(f"Method: centroid_dot (n_v · r̂ from centroid {centroid.astype(int)})")
	print(f" Score range [{scores.min():.4f}, {scores.max():.4f}], "
	f"median {np.median(scores):.4f}")
	elif method == "normal_pca":
	mean_nv = nv.mean(axis=0)
	_, _, Vt = np.linalg.svd(nv - mean_nv, full_matrices=False)
	axis = Vt[0]
	scores = nv @ axis
	if (scores > 0).sum() < (scores < 0).sum():
	scores = -scores
	axis = -axis
	print(f"Method: normal_pca; axis = [{axis[0]:.4f}, {axis[1]:.4f}, {axis[2]:.4f}]")
	elif method == "mean_curvature":
	scores = tg.get_vertex_property_array("mean_curvature_VV")
	print("Method: mean_curvature_VV")
	elif method == "kappa1":
	scores = tg.get_vertex_property_array("kappa_1")
	print("Method: kappa_1")
	else:
	raise ValueError(f"Unknown method: {method!r}")

	# Split at chosen percentile threshold
	threshold = float(np.percentile(scores, threshold_percentile))
	pos_mask = scores > threshold
	neg_mask = scores <= threshold
	print(f" Split at p{threshold_percentile} ({threshold:.4f}): "
	f"{pos_mask.sum()} positive, {neg_mask.sum()} negative")

	# --- Select seeds: high-confidence, single connected component ---
	abs_dev = np.abs(scores - threshold)
	pos_thresh = np.percentile(abs_dev[pos_mask], seed_percentile) if pos_mask.any() else np.inf
	neg_thresh = np.percentile(abs_dev[neg_mask], seed_percentile) if neg_mask.any() else np.inf

	pos_seeds = largest_component_mask(g, pos_mask & (abs_dev >= pos_thresh))
	neg_seeds = largest_component_mask(g, neg_mask & (abs_dev >= neg_thresh))
	print(f" Seeds (p{seed_percentile} by \|score−median\|, largest component): "
	f"{pos_seeds.sum()} positive, {neg_seeds.sum()} negative")

	if pos_seeds.sum() == 0 or neg_seeds.sum() == 0:
	raise RuntimeError("One seed set is empty — lower --seed-percentile")

	# --- BFS fill ---
	print("Filling rim via BFS...")
	positive_mask = bfs_fill(g, pos_seeds, neg_seeds, n_triangles)

	n_pos = int(positive_mask.sum())
	n_neg = n_triangles - n_pos
	print(f"Final partition: {n_pos} positive ({100*n_pos/n_triangles:.1f}%), "
	f"{n_neg} negative ({100*n_neg/n_triangles:.1f}%)")

	# Report which component is likely outer membrane (larger mean radius from centroid)
	radii = np.linalg.norm(xyz - centroid, axis=1)
	r_pos = radii[positive_mask].mean()
	r_neg = radii[~positive_mask].mean()
	outer_label = "positive" if r_pos > r_neg else "negative"
	print(f" Mean radius from centroid: positive={r_pos:.1f}nm, negative={r_neg:.1f}nm "
	f"→ '{outer_label}' is likely the outer membrane")

	# --- Connectivity check ---
	for label, mask in [("positive", positive_mask), ("negative", ~positive_mask)]:
	vfilt = g.new_vertex_property("bool")
	vfilt.a = mask
	g.set_vertex_filter(vfilt)
	_, hist = label_components(g)
	g.clear_filters()
	if len(hist) != 1:
	sizes = sorted(hist, reverse=True)
	print(f" WARNING: {label} has {len(hist)} components (sizes: {sizes[:6]})")
	else:
	print(f" {label}: single connected component (OK)")

	# --- Save outputs ---
	base, _ = os.path.splitext(gt_path)
	outputs = {}
	for label, mask in [("positive", positive_mask), ("negative", ~positive_mask)]:
	if mask.sum() == 0:
	print(f" {label}: 0 triangles (skipping)")
	continue

	vfilt = g.new_vertex_property("bool")
	vfilt.a = mask
	g.set_vertex_filter(vfilt)
	sub_g = Graph(g, prune=True)
	g.clear_filters()

	gt_out = f"{base}_{label}.gt"
	vtp_out = f"{base}_{label}.vtp"
	sub_g.save(gt_out)

	sub_tg = TriangleGraph()
	sub_tg.graph = sub_g
	surf = sub_tg.graph_to_triangle_poly()
	io.save_vtp(surf, vtp_out)

	print(f" {label}: {sub_g.num_vertices()} triangles -> {gt_out}, {vtp_out}")
	outputs[label] = (gt_out, vtp_out)

	return outputs


	def main():
	parser = argparse.ArgumentParser(
	description="Split a phagophore GT surface into two membrane components")
	parser.add_argument("gt_file", help="Path to input .gt file")
	parser.add_argument(
	"--method",
	choices=["centroid_dot", "normal_pca", "mean_curvature", "kappa1"],
	default="centroid_dot",
	help="Splitting signal. centroid_dot (default): n_v · radial outward direction "
	"from the surface centroid — robust for cup-shaped double membranes. "
	"normal_pca: first PC of voted normals (better for planar membranes). "
	"mean_curvature: mean_curvature_VV. kappa1: kappa_1.")
	parser.add_argument(
	"--threshold-percentile", type=float, default=50,
	help="Percentile of the score distribution used as the split threshold. "
	"Default 50 (median). Lower values (e.g. 45, 40) expand the positive "
	"(inner membrane) region into the rim, moving the cut toward the outer "
	"membrane. Raise to do the opposite.")
	parser.add_argument(
	"--seed-percentile", type=float, default=0,
	help="Percentile of \|score−median\| (within each initial half) above which "
	"triangles become region-growing seeds. Default 0 uses all triangles in "
	"the initial half — appropriate when the centroid_dot halves are already "
	"well-connected. Raise (e.g. 50) to restrict to more confident seeds "
	"if the initial split is noisy.")
	args = parser.parse_args()

	split_surface(args.gt_file,
	seed_percentile=args.seed_percentile,
	threshold_percentile=args.threshold_percentile,
	method=args.method)


	if __name__ == "__main__":
	main()
No results found