Created
March 9, 2026 20:45
-
-
Save phyllisstein/97d42f07b92562361ea79da2a3d59df5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //! Exploration of different strategies for handling optional/missing Neo4j node | |
| //! properties in neo4rs 0.8. | |
| //! | |
| //! The problem: Neo4j nodes seeded at different times may or may not have a | |
| //! given property. In Rust, we model this as `Option<String>`. But neo4rs's | |
| //! `Node::get::<Option<String>>("words")` fails with `NoSuchProperty` when the | |
| //! key is entirely absent from the node — the HashMap lookup fails before the | |
| //! `Option` type parameter ever gets a chance to run. | |
| //! | |
| //! This binary exercises seven different approaches against live Neo4j data, | |
| //! demonstrating where each one succeeds, fails, and gets weird. | |
| //! | |
| //! Run with: | |
| //! cargo run --bin explore_deser | |
| use anyhow::{Context, Result}; | |
| use neo4rs::{query, Graph, Node}; | |
| use serde::Deserialize; | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach 0: The Bug | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // This is the original broken code. It calls `node.get::<Option<String>>("words")` | |
| // and expects the `Option` to absorb missing properties. It doesn't. | |
| async fn approach_0_the_bug(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH 0: The Bug (node.get::<Option<String>>)"); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, p, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let uid: String = s.get("uid")?; | |
| // This is the line that blows up on st-002. Node::get does a HashMap | |
| // lookup first. If the key "words" isn't in the property map at all, | |
| // it returns Err(NoSuchProperty) immediately — before TryFrom<BoltType> | |
| // or serde's Option deserializer ever run. | |
| // | |
| // The type parameter Option<String> is *irrelevant* to the failure. | |
| // It might as well be Option<i64> or Option<Vec<u8>>. The error fires | |
| // at the map lookup layer, not the type conversion layer. | |
| let words: std::result::Result<Option<String>, _> = s.get("words"); | |
| match words { | |
| Ok(w) => println!( | |
| " {} => words: {:?}", | |
| uid, | |
| w.map(|s| s[..30.min(s.len())].to_string()) | |
| ), | |
| Err(e) => println!(" {} => ERROR: {}", uid, e), | |
| } | |
| // The root cause in neo4rs source (map.rs:45-53): | |
| // | |
| // pub fn get<'this, T>(&'this self, key: &str) -> Result<T, DeError> | |
| // where T: Deserialize<'this> | |
| // { | |
| // match self.value.get(key) { // <-- HashMap::get | |
| // Some(v) => v.to(), // <-- serde runs HERE | |
| // None => Err(DeError::NoSuchProperty), // <-- short-circuit | |
| // } | |
| // } | |
| // | |
| // Two completely different error semantics are collapsed: | |
| // 1. "key absent from map" (→ should be None for Option<T>) | |
| // 2. "key present but wrong type" (→ genuine error) | |
| // | |
| // The get() method can't distinguish them because it never sees the | |
| // type parameter in the None branch. | |
| } | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach A: Cypher Projection ("push nullability into the query") | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // Instead of returning whole nodes, project individual properties in the RETURN | |
| // clause. Cypher's property access (`s.words`) returns `null` for missing | |
| // properties — not an absent key. So `row.get::<Option<String>>("words")` | |
| // receives a BoltType::Null, which the serde Deserializer handles correctly | |
| // via `deserialize_option` → `visitor.visit_none()`. | |
| // | |
| // The key insight: this moves the "absent → null" coercion from Rust into | |
| // Cypher. The Cypher engine does the normalization for us. | |
| async fn approach_a_cypher_projection(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH A: Cypher Projection (s.words AS words)"); | |
| println!("{}", "=".repeat(72)); | |
| // Notice: we return `s.words AS words` instead of the whole node `s`. | |
| // Cypher evaluates `s.words` and, if the property doesn't exist on the | |
| // node, yields `null`. That null travels over Bolt as BoltType::Null, | |
| // arrives in the Row's BoltMap, and serde's Option deserializer handles it. | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s.uid AS uid, | |
| s.text AS text, | |
| s.words AS words, | |
| p.name AS speaker, | |
| c.startTime AS start_time, | |
| c.endTime AS end_time | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let uid: String = row.get("uid")?; | |
| let _text: String = row.get("text")?; | |
| let words: Option<String> = row.get("words")?; // Works! BoltType::Null → None | |
| let speaker: String = row.get("speaker")?; | |
| let start: f64 = row.get("start_time")?; | |
| println!( | |
| " {} [{}] @ {:.1}s => words: {}", | |
| uid, | |
| speaker, | |
| start, | |
| words.as_ref().map_or("None", |_| "Some(...)") | |
| ); | |
| } | |
| // Trade-offs: | |
| // | |
| // + Simplest mental model — the query tells you exactly what you get. | |
| // + No intermediate structs, no serde derives needed. | |
| // + Works with plain row.get() calls. | |
| // | |
| // - Breaks the "return whole nodes" convention. The query now returns a | |
| // flat bag of scalars, losing the structural information about which | |
| // values came from which node. | |
| // | |
| // - Column names become an implicit contract between Cypher and Rust. | |
| // Rename `AS words` to `AS word_timing` and Rust silently gets | |
| // NoSuchProperty at runtime — no compile-time safety net. | |
| // | |
| // - Scales poorly: as nodes gain properties, the RETURN clause grows into | |
| // a long list of `s.foo AS foo, s.bar AS bar, ...` projections. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach A': Cypher COALESCE — the weirder sibling | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // What if you don't want null at all? COALESCE lets you substitute a sentinel | |
| // value for missing properties. This can be useful if downstream code can't | |
| // handle Option at all, or if you want to distinguish "property was null" from | |
| // "property was absent" (though Neo4j itself doesn't store null — a null | |
| // property is just an absent property). | |
| async fn approach_a_prime_coalesce(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH A': Cypher COALESCE (sentinel substitution)"); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s.uid AS uid, | |
| s.text AS text, | |
| COALESCE(s.words, '[]') AS words, | |
| p.name AS speaker | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let uid: String = row.get("uid")?; | |
| let words: String = row.get("words")?; // Always a String — never None | |
| let speaker: String = row.get("speaker")?; | |
| // Now `words` is always a valid JSON array string. We can parse it | |
| // directly without an Option wrapper. | |
| let word_count: usize = serde_json::from_str::<Vec<serde_json::Value>>(&words) | |
| .map(|v| v.len()) | |
| .unwrap_or(0); | |
| println!(" {} [{}] => {} words", uid, speaker, word_count); | |
| } | |
| // This is weird because it pushes *business logic* into the query: | |
| // "an absent words property means an empty JSON array." That's a semantic | |
| // decision that lives in Cypher now, invisible to Rust's type system. | |
| // | |
| // It also means the Rust type is `String` (not `Option<String>`), which | |
| // loses the ability to distinguish "no word timing" from "word timing with | |
| // zero words." Probably fine here, but a real footgun in other contexts. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach B: The serde(default) Intermediate Struct (chosen solution) | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // Create a struct that covers only the node-native properties, with | |
| // `#[serde(default)]` on optional fields. Use `node.to::<StatementNode>()`. | |
| // | |
| // `node.to()` drives neo4rs's custom serde Deserializer. The Deserializer's | |
| // `deserialize_outer_struct` method (element.rs:60-93) iterates the struct's | |
| // declared fields. For fields NOT present in the node's property map, it | |
| // synthesizes an `AdditionalData::Element` entry — a phantom deserializer | |
| // that, when asked to `deserialize_option`, calls `visitor.visit_none()`, | |
| // and when asked to `deserialize_any`, returns `PropertyMissingButRequired`. | |
| // | |
| // So the path for `words: Option<String>` with `#[serde(default)]` on a node | |
| // that lacks the property: | |
| // | |
| // 1. serde's generated Deserialize impl sees "words" isn't in the data | |
| // 2. #[serde(default)] kicks in → calls Default::default() → None | |
| // 3. The AdditionalData deserializer is never consulted at all | |
| // | |
| // Without `#[serde(default)]`, serde would try to deserialize the | |
| // AdditionalData::Element, which calls `deserialize_option` → | |
| // `visitor.visit_none()` → Ok(None). So actually, for `Option<T>` fields | |
| // specifically, `#[serde(default)]` is redundant! The neo4rs Deserializer | |
| // already handles it. But `#[serde(default)]` is more explicit and protects | |
| // against changes in neo4rs's Deserializer implementation. | |
| #[derive(Deserialize, Debug)] | |
| struct StatementNode { | |
| uid: String, | |
| text: String, | |
| #[serde(default)] | |
| words: Option<String>, | |
| } | |
| async fn approach_b_serde_default(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH B: #[serde(default)] Intermediate Struct"); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, p, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let sn: StatementNode = s.to()?; // Whole-node deserialization | |
| let p: Node = row.get("p")?; | |
| let speaker: String = p.get("name")?; | |
| println!( | |
| " {} [{}] => words: {:?}", | |
| sn.uid, | |
| speaker, | |
| sn.words | |
| .as_ref() | |
| .map(|s| format!("{}...", &s[..20.min(s.len())])) | |
| ); | |
| } | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach C: Row::to() — Flatten the Entire Row into a Single Struct | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // Row::to() (row.rs:170-178) tries two things: | |
| // 1. `to_strict()` — deserialize the row's BoltMap directly as a struct | |
| // 2. If that fails AND the row has exactly one column, try deserializing | |
| // that single value as the struct | |
| // | |
| // With `to_strict`, the row's column names become struct field names. If we | |
| // carefully name our RETURN columns to match struct fields, we can deserialize | |
| // the whole row in one shot — including nested nodes. | |
| // | |
| // This is the weirdest approach because it treats the Row itself as a | |
| // first-class map deserializer, blurring the line between "query result shape" | |
| // and "application data model." | |
| #[derive(Deserialize, Debug)] | |
| struct FlatRow { | |
| uid: String, | |
| text: String, | |
| #[serde(default)] | |
| words: Option<String>, | |
| speaker: String, | |
| start_time: f64, | |
| end_time: f64, | |
| } | |
| async fn approach_c_row_to(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH C: row.to::<FlatRow>() (full-row deserialization)"); | |
| println!("{}", "=".repeat(72)); | |
| // Column aliases in the RETURN must exactly match FlatRow field names. | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s.uid AS uid, | |
| s.text AS text, | |
| s.words AS words, | |
| p.name AS speaker, | |
| c.startTime AS start_time, | |
| c.endTime AS end_time | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let flat: FlatRow = row.to()?; | |
| println!( | |
| " {} [{}] @ {:.1}-{:.1}s => words: {}", | |
| flat.uid, | |
| flat.speaker, | |
| flat.start_time, | |
| flat.end_time, | |
| flat.words.as_ref().map_or("None", |_| "Some(...)") | |
| ); | |
| } | |
| // Trade-offs: | |
| // | |
| // + One deserialization call for the whole row. Very ergonomic. | |
| // + Compile-time struct fields enforce that all expected columns exist. | |
| // | |
| // - Same Cypher projection problem as Approach A — you're returning | |
| // scalar columns, not whole nodes. | |
| // - The query and the struct are tightly coupled by field name. Refactoring | |
| // either one requires changing the other. | |
| // - Can't nest: if you want `Person` as a sub-struct, you'd need to return | |
| // `p` as a column and somehow tell serde to deserialize a BoltNode into | |
| // a nested field. (Spoiler: Approach D does this.) | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach D: Row::to() with Nested Node Deserialization | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // Can we return whole nodes in the RETURN clause AND deserialize the entire | |
| // row into a struct with nested sub-structs? Yes — with a catch. | |
| // | |
| // row.to_strict() deserializes the Row's BoltMap, where each column is a key. | |
| // If a column's value is a BoltType::Node, neo4rs's Deserializer will call | |
| // BoltNodeDeserializer, which iterates the node's properties and feeds them | |
| // to the nested struct's Deserialize impl. | |
| // | |
| // So `RETURN s, p` gives a BoltMap with two entries: | |
| // "s" → BoltType::Node(...) | |
| // "p" → BoltType::Node(...) | |
| // | |
| // And row.to::<NestedRow>() can deserialize `s` into a StatementNode and | |
| // `p` into a PersonNode, as nested fields. | |
| #[derive(Deserialize, Debug)] | |
| struct PersonNode { | |
| #[allow(dead_code)] | |
| uid: String, | |
| name: String, | |
| } | |
| #[derive(Deserialize, Debug)] | |
| struct NestedRow { | |
| s: StatementNode, | |
| p: PersonNode, | |
| // Scalars projected from the query still work alongside whole nodes: | |
| is_interviewer: bool, | |
| } | |
| async fn approach_d_nested_row(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH D: row.to::<NestedRow>() (nested node deserialization)"); | |
| println!("{}", "=".repeat(72)); | |
| // Mix of whole nodes and scalar projections in the RETURN clause.~ | |
| // The column names must match the struct field names. | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| MATCH (i:Interview)-[:HAS_TRANSCRIPT]->(t) | |
| OPTIONAL MATCH (i)-[:INTERVIEWED_BY]->(interviewer:Person) | |
| WHERE interviewer = p | |
| RETURN s, p, | |
| interviewer IS NOT NULL AS is_interviewer | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let nr: NestedRow = row.to()?; | |
| println!( | |
| " {} [{}{}] => words: {:?}", | |
| nr.s.uid, | |
| nr.p.name, | |
| if nr.is_interviewer { | |
| " (interviewer)" | |
| } else { | |
| "" | |
| }, | |
| nr.s.words | |
| .as_ref() | |
| .map(|s| format!("{}...", &s[..20.min(s.len())])) | |
| ); | |
| } | |
| // This is genuinely cool: | |
| // | |
| // + Whole nodes in the RETURN clause — structural information preserved. | |
| // + Single deserialization call for the entire row. | |
| // + Nested structs keep node data organized. | |
| // + #[serde(default)] on StatementNode.words still works — the node | |
| // deserializer handles missing properties the same way. | |
| // | |
| // - Column names must match struct field names. RETURN `s` means the struct | |
| // field must be named `s`, not `statement`. You can work around this with | |
| // `RETURN statement AS s` or `#[serde(rename)]`, but it's a constraint. | |
| // - Relationships (like :CONTAINS with timing data) are awkward. BoltRelation | |
| // has id, start_node_id, end_node_id, typ, and properties — it doesn't | |
| // deserialize as cleanly as nodes because of the extra structural fields. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach E: Extension Trait — Fallible get-or-default | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // Instead of changing the query or adding intermediate structs, patch the | |
| // behavior of Node::get itself with an extension trait. This wraps the | |
| // NoSuchProperty error and converts it to the type's Default value. | |
| // | |
| // This is philosophically different from the other approaches: instead of | |
| // working around the gap between "absent key" and "null value," it papers | |
| // over it at the API boundary with a trait that says "if the key is missing, | |
| // pretend it was the default." | |
| trait NodeExt { | |
| /// Like `node.get()`, but returns `T::default()` when the property | |
| /// is absent from the node. Only suitable for types where Default is | |
| /// a meaningful "not present" sentinel (e.g., Option<T>, Vec<T>, String). | |
| fn get_or_default<'a, T>(&'a self, key: &str) -> anyhow::Result<T> | |
| where | |
| T: serde::Deserialize<'a> + Default; | |
| } | |
| impl NodeExt for Node { | |
| fn get_or_default<'a, T>(&'a self, key: &str) -> anyhow::Result<T> | |
| where | |
| T: serde::Deserialize<'a> + Default, | |
| { | |
| match self.get(key) { | |
| Ok(v) => Ok(v), | |
| Err(e) => { | |
| // neo4rs::DeError doesn't expose variant-level matching publicly, | |
| // so we match on the error's Display string. This is admittedly | |
| // fragile — but the alternative (reaching into neo4rs internals) | |
| // is worse. In production you'd want neo4rs to expose this variant | |
| // or provide a `get_opt()` method. | |
| let msg = e.to_string(); | |
| if msg.contains("does not exist") { | |
| // NoSuchProperty means the key wasn't in the map at all. | |
| // For Option<T>, Default::default() is None — exactly what we want. | |
| // For String, it's "" — probably fine as a sentinel. | |
| // For i64, it's 0 — almost certainly wrong and dangerous. | |
| // | |
| // This is why the trait bound says Default but the *wisdom* | |
| // of using it depends on the semantics of the type. The compiler | |
| // can't enforce "Default means absent" — that's a human contract. | |
| Ok(T::default()) | |
| } else { | |
| Err(anyhow::anyhow!(e)) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| async fn approach_e_extension_trait(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH E: Extension Trait (get_or_default)"); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, p, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let uid: String = s.get("uid")?; | |
| let words: Option<String> = s.get_or_default("words")?; // NoSuchProperty → None | |
| let p: Node = row.get("p")?; | |
| let speaker: String = p.get("name")?; | |
| println!( | |
| " {} [{}] => words: {}", | |
| uid, | |
| speaker, | |
| words.as_ref().map_or("None", |_| "Some(...)") | |
| ); | |
| } | |
| // Trade-offs: | |
| // | |
| // + Minimal code change — just swap `get` for `get_or_default`. | |
| // + Keeps whole-node returns, no intermediate structs, no query changes. | |
| // + Explicit at the call site: you see `get_or_default` and know this | |
| // field tolerates absence. | |
| // | |
| // - The Default bound is too permissive. `get_or_default::<i64>("count")` | |
| // silently returns 0 for missing properties — a real value that could | |
| // mask bugs. Only safe for types where Default genuinely means "absent." | |
| // | |
| // - Matching on the error string is fragile — if neo4rs changes the | |
| // wording of NoSuchProperty, this breaks silently. | |
| // | |
| // - Mixes concerns: every call site decides individually whether absence | |
| // is OK. With serde(default), the struct itself declares its defaults — | |
| // the knowledge lives in one place. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach F: Cypher Map Projection — Reshape the Node in the Query | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // This is the weirdest Cypher-side approach. Instead of returning the raw node | |
| // or individual properties, use Cypher's map projection syntax to reshape the | |
| // node into a map *inside the query*. The projected map includes all properties | |
| // we care about, and absent properties become null keys in the map (rather than | |
| // absent keys). | |
| // | |
| // Syntax: `s { .uid, .text, .words }` produces a map like: | |
| // { uid: "st-001", text: "...", words: "[...]" } | |
| // or, for a node without `words`: | |
| // { uid: "st-002", text: "...", words: null } | |
| // | |
| // The crucial difference from `RETURN s`: the raw node's property map omits | |
| // keys for absent properties, but map projection *includes* them as null. | |
| async fn approach_f_map_projection(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!( | |
| "APPROACH F: Cypher Map Projection (s {{{{ .uid, .text, .words }}}})" | |
| ); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s { .uid, .text, .words } AS statement, | |
| p { .uid, .name } AS person, | |
| c.startTime AS start_time | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| // The projected map arrives as a BoltType::Map (not BoltType::Node!). | |
| // We can still deserialize it into a struct via row.get(), because | |
| // row.get() calls BoltType::to(), and BoltType::Map's Deserializer | |
| // uses serde's standard MapDeserializer over the map entries. | |
| // | |
| // Since the map projection *includes* the "words" key with a null | |
| // value (BoltType::Null), serde's Option deserializer handles it: | |
| // BoltType::Null → deserialize_option → visitor.visit_none() → None | |
| // | |
| // No #[serde(default)] needed — the key IS present. | |
| // Deserialize the projected map directly into our struct: | |
| let sn: StatementNode = row.get("statement")?; | |
| let pn: PersonNode = row.get("person")?; | |
| let start: f64 = row.get("start_time")?; | |
| println!( | |
| " {} [{}] @ {:.1}s => words: {:?}", | |
| sn.uid, | |
| pn.name, | |
| start, | |
| sn.words | |
| .as_ref() | |
| .map(|s| format!("{}...", &s[..20.min(s.len())])) | |
| ); | |
| } | |
| // Trade-offs: | |
| // | |
| // + The null is *in the data*, not inferred. The BoltMap has the key. | |
| // + No #[serde(default)] needed — the key IS present (with null value). | |
| // + You choose which properties to project — acts as a query-level | |
| // select list, like a SQL SELECT clause. | |
| // + Returns a BoltMap, not a BoltNode — no neo4j node ID, labels, or | |
| // other metadata. Lighter weight over the wire. | |
| // | |
| // - The projected map is a BoltMap, not a BoltNode. You lose labels, IDs, | |
| // and the ability to use neo4rs's node-specific deserialization features | |
| // (like extracting Labels or Id newtypes). | |
| // - You must enumerate every property in the projection. If the node gains | |
| // a new property later, you must update the query — it won't appear | |
| // automatically like it would with `RETURN s`. | |
| // | |
| // This is the "SQL mindset" approach: treat the query as a SELECT that | |
| // defines its output shape, and let the struct match that shape. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach G: The Relationship Problem — Why Edges Don't Nest Like Nodes | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // In Approach D, we saw that `row.to::<NestedRow>()` can deserialize whole | |
| // nodes into nested structs. Can we do the same with relationships? | |
| // | |
| // The answer is: yes, but with a structural mismatch you need to understand. | |
| // | |
| // When neo4rs deserializes a BoltRelation into a user struct via | |
| // `BoltRelationDeserializer::deserialize_struct`, it calls the same | |
| // `deserialize_outer_struct` that nodes use. This function: | |
| // | |
| // 1. Extracts the relationship's property map (e.g. {startTime: 0.0, endTime: 2.5}) | |
| // 2. Looks at which struct fields the caller declared | |
| // 3. For each declared field NOT in the property map, creates an | |
| // AdditionalData::Element entry pointing back at the BoltRelation itself | |
| // | |
| // So if your struct declares a field called `startTime`, that's found in the | |
| // property map and deserialized normally. But if your struct declares a field | |
| // called `start_node_id` — that's NOT a property, it's structural metadata | |
| // on the relationship. The AdditionalData::Element deserializer will try to | |
| // resolve it by matching against ElementDataKey variants (Id, StartNodeId, | |
| // EndNodeId, Type, Labels, Properties, etc.) | |
| // | |
| // Here's the catch: the struct field name must match a *serde newtype struct | |
| // name* (like "StartNodeId"), not a snake_case field name. The Element | |
| // deserializer dispatches on `deserialize_newtype_struct(name, ...)` where | |
| // `name` comes from the serde-derived code — and serde uses the struct's | |
| // Rust name as the newtype name. So you need the neo4rs newtype wrappers. | |
| // | |
| // Compare with nodes: | |
| // - BoltNode has: id, labels, properties (3 structural fields) | |
| // - BoltRelation: id, start_node_id, end_node_id, typ, properties (5 structural fields) | |
| // | |
| // Relationships have 2 extra structural fields and a type. When you use | |
| // `relation.to::<T>()`, the property map is flattened into T's fields, but | |
| // the structural metadata (id, start/end node IDs, type) requires special | |
| // neo4rs newtype wrappers to extract. | |
| // | |
| // This means a "natural" Rust struct with plain fields can't absorb both | |
| // property data AND relationship metadata in one shot — you need neo4rs's | |
| // Id, StartNodeId, EndNodeId, and Type newtypes. | |
| // neo4rs re-exports these from its public API | |
| use neo4rs::{Id, StartNodeId, EndNodeId, Relation}; | |
| /// A struct that deserializes directly from a `:CONTAINS` relationship, | |
| /// extracting ONLY properties (not structural metadata). | |
| #[derive(Deserialize, Debug)] | |
| struct ContainsEdgePropsOnly { | |
| #[serde(rename = "startTime")] | |
| start_time: f64, | |
| #[serde(rename = "endTime")] | |
| end_time: f64, | |
| } | |
| /// A struct that tries to extract structural metadata alongside properties | |
| /// using neo4rs's newtype wrappers. | |
| /// | |
| /// The key insight: the struct field names don't matter here — what matters | |
| /// is the *type*. When serde encounters a field typed `Id`, it calls | |
| /// `deserialize_newtype_struct("Id", ...)`, and the ElementDataDeserializer | |
| /// matches the string "Id" to ElementDataKey::Id to extract the structural | |
| /// id. The field could be called `id`, `rel_id`, or `potato` — serde cares | |
| /// about the type name, not the field name. | |
| #[derive(Deserialize, Debug)] | |
| struct ContainsEdgeWithMeta { | |
| #[serde(rename = "startTime")] | |
| start_time: f64, | |
| #[serde(rename = "endTime")] | |
| end_time: f64, | |
| // These use neo4rs newtypes. The serde codegen emits: | |
| // deserialize_newtype_struct("Id", visitor) | |
| // which the ElementDataDeserializer intercepts and resolves to | |
| // the relationship's structural id field. | |
| id: Id, | |
| start_node: StartNodeId, | |
| end_node: EndNodeId, | |
| } | |
| /// What happens if you try plain fields instead of newtypes? | |
| #[derive(Deserialize, Debug)] | |
| struct ContainsEdgeNaive { | |
| #[serde(rename = "startTime")] | |
| start_time: f64, | |
| #[serde(rename = "endTime")] | |
| end_time: f64, | |
| // This will NOT extract the relationship's internal ID — there's no | |
| // "id" key in the property map, and the AdditionalData::Element | |
| // deserializer won't resolve a plain u64 field to the structural ID. | |
| // It goes through deserialize_any → PropertyMissingButRequired. | |
| // | |
| // With #[serde(default)], it silently becomes 0. | |
| // Without #[serde(default)], it blows up. | |
| #[serde(default)] | |
| id: u64, | |
| } | |
| /// Full row that nests both nodes AND a relationship. | |
| #[derive(Deserialize, Debug)] | |
| struct FullNestedRow { | |
| s: StatementNode, | |
| p: PersonNode, | |
| c: ContainsEdge, | |
| is_interviewer: bool, | |
| } | |
| async fn approach_g_relationship_nesting(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH G: Relationship Deserialization (the edge problem)"); | |
| println!("{}", "=".repeat(72)); | |
| // --- G.1: Relation::get (manual property extraction) --- | |
| println!("\n G.1: Relation::get (manual, like the production code does)"); | |
| { | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, p, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let sn: StatementNode = s.to()?; | |
| let c: Relation = row.get("c")?; | |
| let start: f64 = c.get("startTime")?; | |
| let end: f64 = c.get("endTime")?; | |
| println!( | |
| " {} @ {:.1}-{:.1}s (rel id={}, {} -> {})", | |
| sn.uid, start, end, | |
| c.id(), c.start_node_id(), c.end_node_id() | |
| ); | |
| } | |
| } | |
| // --- G.2: relation.to::<ContainsEdgePropsOnly>() — just properties --- | |
| println!("\n G.2: relation.to::<ContainsEdgePropsOnly>() — properties only, no metadata"); | |
| { | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let sn: StatementNode = s.to()?; | |
| let c: Relation = row.get("c")?; | |
| let ce: ContainsEdgePropsOnly = c.to()?; | |
| println!( | |
| " {} @ {:.1}-{:.1}s (works fine — no structural fields requested)", | |
| sn.uid, ce.start_time, ce.end_time, | |
| ); | |
| } | |
| } | |
| // --- G.3: relation.to::<ContainsEdgeWithMeta>() with neo4rs newtypes --- | |
| println!("\n G.3: relation.to::<ContainsEdgeWithMeta>() with neo4rs Id/StartNodeId newtypes"); | |
| { | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let s: Node = row.get("s")?; | |
| let sn: StatementNode = s.to()?; | |
| let c: Relation = row.get("c")?; | |
| match c.to::<ContainsEdgeWithMeta>() { | |
| Ok(ce) => println!( | |
| " {} @ {:.1}-{:.1}s id={:?}, start_node={:?}, end_node={:?}", | |
| sn.uid, ce.start_time, ce.end_time, | |
| ce.id, ce.start_node, ce.end_node, | |
| ), | |
| Err(e) => { | |
| println!(" {} => ERROR: {}", sn.uid, e); | |
| println!(" (neo4rs newtypes must be the type, not wrapped in Option)"); | |
| } | |
| } | |
| } | |
| } | |
| // --- G.4: relation.to::<ContainsEdgeNaive>() — what goes wrong --- | |
| println!("\n G.4: relation.to::<ContainsEdgeNaive>() — plain u64 id field"); | |
| { | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s, c | |
| ORDER BY c.startTime LIMIT 1", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let c: Relation = row.get("c")?; | |
| match c.to::<ContainsEdgeNaive>() { | |
| Ok(ce) => println!( | |
| " @ {:.1}-{:.1}s id={} (should be {}, got 0 from Default!)", | |
| ce.start_time, ce.end_time, ce.id, c.id() | |
| ), | |
| Err(e) => println!(" ERROR: {}", e), | |
| } | |
| } | |
| } | |
| // --- G.4: row.to::<FullNestedRow>() — the whole enchilada --- | |
| println!("\n G.4: row.to::<FullNestedRow>() — nodes + relationship + scalar in one shot"); | |
| { | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| MATCH (i:Interview)-[:HAS_TRANSCRIPT]->(t) | |
| OPTIONAL MATCH (i)-[:INTERVIEWED_BY]->(interviewer:Person) | |
| WHERE interviewer = p | |
| RETURN s, p, c, | |
| interviewer IS NOT NULL AS is_interviewer | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let full: FullNestedRow = row.to()?; | |
| println!( | |
| " {} [{}{}] @ {:.1}-{:.1}s => words: {}", | |
| full.s.uid, | |
| full.p.name, | |
| if full.is_interviewer { " (interviewer)" } else { "" }, | |
| full.c.start_time, | |
| full.c.end_time, | |
| full.s.words.as_ref().map_or("None", |_| "Some(...)"), | |
| ); | |
| } | |
| } | |
| // Summary: | |
| // | |
| // Relationships DO nest into row.to() structs — the BoltRelation | |
| // deserializer works exactly like BoltNodeDeserializer for properties. | |
| // The wrinkle is structural metadata: | |
| // | |
| // Node structural: id, labels → via Id, Labels newtypes | |
| // Relationship structural: id, start_node_id, end_node_id, typ | |
| // → via Id, StartNodeId, EndNodeId, Type newtypes | |
| // | |
| // If you don't need the structural metadata (just properties), relationships | |
| // and nodes deserialize identically. But if you want `c.id()` or | |
| // `c.start_node_id()`, you need the neo4rs newtypes — a plain `id: u64` | |
| // field won't find it in the property map and will either error or default | |
| // to 0. | |
| // | |
| // The deeper issue: this is a consequence of Bolt's wire format. Bolt nodes | |
| // and relationships are not "just maps." They're tagged structures with | |
| // separate slots for id, labels/type, and properties. The properties are a | |
| // map, but the id and labels/type sit outside it. neo4rs's Deserializer | |
| // papers over this with the AdditionalData/ElementData machinery — it | |
| // merges the structural fields into the property map during deserialization, | |
| // but only if you ask for them with the right newtype names. | |
| // | |
| // This is the fundamental tension: Rust structs are flat bags of named | |
| // fields, but Bolt entities have two *layers* of named data (structural | |
| // metadata + properties). The Deserializer bridges them, but the bridge | |
| // has rules. | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // Approach H: Cypher Relationship Map Projection — Merging Properties with | |
| // Structural Data | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| // | |
| // If Approach F showed that node map projection (`s { .uid, .text }`) converts | |
| // a BoltNode into a BoltMap with null-normalized keys, can we do the same | |
| // for relationships? Not quite — Cypher doesn't support `c { .startTime }` | |
| // syntax on relationships. But we CAN use map construction: | |
| // | |
| // { start: c.startTime, end: c.endTime, id: id(c) } | |
| // | |
| // This gives us a BoltMap that merges property data WITH structural metadata, | |
| // all as plain key-value pairs. No newtypes needed on the Rust side. | |
| #[derive(Deserialize, Debug)] | |
| struct ContainsFlat { | |
| start_time: f64, | |
| end_time: f64, | |
| rel_id: i64, | |
| start_node: i64, | |
| end_node: i64, | |
| } | |
| async fn approach_h_relationship_map_construction(graph: &Graph) -> Result<()> { | |
| println!("\n{}", "=".repeat(72)); | |
| println!("APPROACH H: Cypher Map Construction for Relationships"); | |
| println!("{}", "=".repeat(72)); | |
| let mut stream = graph | |
| .execute(query( | |
| "MATCH (t:Transcript)-[c:CONTAINS]->(s:Statement)<-[:SAYS]-(p:Person) | |
| RETURN s { .uid, .text, .words } AS s, | |
| p { .name } AS p, | |
| { start_time: c.startTime, | |
| end_time: c.endTime, | |
| rel_id: id(c), | |
| start_node: id(startNode(c)), | |
| end_node: id(endNode(c)) } AS c | |
| ORDER BY c.startTime", | |
| )) | |
| .await?; | |
| while let Some(row) = stream.next().await? { | |
| let sn: StatementNode = row.get("s")?; | |
| let pn: PersonNode = row.get("p")?; | |
| let ce: ContainsFlat = row.get("c")?; | |
| println!( | |
| " {} [{}] @ {:.1}-{:.1}s rel_id={}, {} -> {} => words: {}", | |
| sn.uid, | |
| pn.name, | |
| ce.start_time, | |
| ce.end_time, | |
| ce.rel_id, | |
| ce.start_node, | |
| ce.end_node, | |
| sn.words.as_ref().map_or("None", |_| "Some(...)"), | |
| ); | |
| } | |
| // This fully normalizes the deserialization problem: | |
| // | |
| // + Everything is a BoltMap on the wire. No BoltNode, no BoltRelation. | |
| // + Struct field names match map keys. Plain types, no newtypes. | |
| // + null-normalization for absent properties on the node side. | |
| // + Structural metadata (ids) promoted to map keys on the relationship side. | |
| // | |
| // - The Cypher query is now doing data modeling work. It's constructing | |
| // the exact shape the Rust struct needs, which makes the query fragile | |
| // and verbose. | |
| // - You lose the ability to use neo4rs's graph-aware APIs (c.typ(), | |
| // c.start_node_id(), etc.) because there IS no Relation object. | |
| // - This is basically writing a SQL SELECT at that point. If you're going | |
| // to reshape everything in the query, why use a graph database? | |
| // (That's a bit harsh — the graph is still doing the pattern matching | |
| // and traversal. But the result shape is relational.) | |
| Ok(()) | |
| } | |
| // ═══════════════════════════════════════════════════════════════════════════════ | |
| #[tokio::main] | |
| async fn main() -> Result<()> { | |
| let uri = | |
| std::env::var("NEO4J_URI").unwrap_or_else(|_| "bolt://localhost:7687".to_string()); | |
| let user = | |
| std::env::var("NEO4J_USERNAME").unwrap_or_else(|_| "neo4j".to_string()); | |
| let pass = | |
| std::env::var("NEO4J_PASSWORD").unwrap_or_else(|_| "auohpauohp".to_string()); | |
| let graph = Graph::new(&uri, &user, &pass) | |
| .await | |
| .context("connecting to Neo4j")?; | |
| println!( | |
| "{}", | |
| "╔══════════════════════════════════════════════════════════════════════╗" | |
| ); | |
| println!( | |
| "{}", | |
| "║ Exploring Neo4j Node Deserialization Strategies (neo4rs 0.8) ║" | |
| ); | |
| println!( | |
| "{}", | |
| "╠══════════════════════════════════════════════════════════════════════╣" | |
| ); | |
| println!( | |
| "{}", | |
| "║ 3 statements in the DB: st-001 (has words), st-002 (NO words), ║" | |
| ); | |
| println!( | |
| "{}", | |
| "║ st-003 (has words). Approach 0 should fail on st-002. ║" | |
| ); | |
| println!( | |
| "{}", | |
| "╚══════════════════════════════════════════════════════════════════════╝" | |
| ); | |
| // Approach 0: Show the bug | |
| if let Err(e) = approach_0_the_bug(&graph).await { | |
| println!(" [Approach 0 errored out: {}]", e); | |
| } | |
| // Approach A: Cypher projection | |
| approach_a_cypher_projection(&graph).await?; | |
| // Approach A': COALESCE sentinel | |
| approach_a_prime_coalesce(&graph).await?; | |
| // Approach B: serde(default) intermediate struct | |
| approach_b_serde_default(&graph).await?; | |
| // Approach C: Row::to with flat struct | |
| approach_c_row_to(&graph).await?; | |
| // Approach D: Row::to with nested node deserialization | |
| approach_d_nested_row(&graph).await?; | |
| // Approach E: Extension trait | |
| approach_e_extension_trait(&graph).await?; | |
| // Approach F: Map projection | |
| approach_f_map_projection(&graph).await?; | |
| // Approach G: Relationship deserialization | |
| approach_g_relationship_nesting(&graph).await?; | |
| // Approach H: Cypher map construction for relationships | |
| approach_h_relationship_map_construction(&graph).await?; | |
| println!("\n{}", "=".repeat(72)); | |
| println!("SUMMARY"); | |
| println!("{}", "=".repeat(72)); | |
| println!( | |
| " 0 node.get::<Option<String>>() BROKEN HashMap miss before serde runs" | |
| ); | |
| println!( | |
| " A Cypher s.words AS words Works Cypher coerces absent -> null" | |
| ); | |
| println!( | |
| " A' COALESCE(s.words, '[]') Works sentinel substitution, no Option needed" | |
| ); | |
| println!( | |
| " B node.to::<StatementNode>() Works #[serde(default)] on struct field" | |
| ); | |
| println!( | |
| " C row.to::<FlatRow>() Works full-row flat deserialization" | |
| ); | |
| println!( | |
| " D row.to::<NestedRow>() w/ nodes Works nested node structs in one shot" | |
| ); | |
| println!( | |
| " E node.get_or_default() Works extension trait, NoSuchProperty -> Default" | |
| ); | |
| println!( | |
| " F s {{{{ .uid, .text, .words }}}} Works map projection includes null keys" | |
| ); | |
| println!( | |
| " G relation.to / row.to w/ edges Works newtypes needed for structural metadata" | |
| ); | |
| println!( | |
| " H Cypher map construction Works fully flattened, no newtypes, most SQL-like" | |
| ); | |
| Ok(()) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment