From a5a1ae526ca4ebf7ffcf3dcb2c7b335d13eb448a Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Thu, 23 Oct 2025 20:11:57 +0900 Subject: [PATCH 01/14] feat: add new api (Node::children and Range::extended_by) --- .../postgresql-cst-parser/src/tree_sitter.rs | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 24f6a59..36fac3a 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -98,6 +98,24 @@ impl std::fmt::Display for Range { } } +impl Range { + pub fn extended_by(&self, other: &Self) -> Self { + Range { + start_byte: self.start_byte.min(other.start_byte), + end_byte: self.end_byte.max(other.end_byte), + + start_position: Point { + row: self.start_position.row.min(other.start_position.row), + column: self.start_position.column.min(other.start_position.column), + }, + end_position: Point { + row: self.end_position.row.max(other.end_position.row), + column: self.end_position.column.max(other.end_position.column), + }, + } + } +} + impl<'a> Node<'a> { pub fn walk(&self) -> TreeCursor<'a> { TreeCursor { @@ -144,6 +162,20 @@ impl<'a> Node<'a> { } } + pub fn children(&self) -> Vec> { + if let Some(node) = self.node_or_token.as_node() { + node.children_with_tokens() + .map(|node| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: node, + }) + .collect() + } else { + vec![] + } + } + pub fn next_sibling(&self) -> Option> { self.node_or_token .next_sibling_or_token() From 7b9018465930c91e1539e5b23242798fb86d65f0 Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Fri, 31 Oct 2025 17:37:37 +0900 Subject: [PATCH 02/14] feat(tree-sitter module): add previous sibling apis --- .../postgresql-cst-parser/src/tree_sitter.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 36fac3a..68e03aa 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -186,6 +186,16 @@ impl<'a> Node<'a> { }) } + pub fn prev_sibling(&self) -> Option> { + self.node_or_token + .prev_sibling_or_token() + .map(|sibling| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: sibling, + }) + } + pub fn parent(&self) -> Option> { self.node_or_token.parent().map(|parent| Node { input: self.input, @@ -246,6 +256,15 @@ impl<'a> TreeCursor<'a> { } } + pub fn goto_prev_sibling(&mut self) -> bool { + if let Some(sibling) = self.node_or_token.prev_sibling_or_token() { + self.node_or_token = sibling; + true + } else { + false + } + } + pub fn is_comment(&self) -> bool { matches!( self.node_or_token.kind(), From ea6b1a50c41ec1cea043a1dd1e587372eebb9b97 Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Tue, 4 Nov 2025 18:38:55 +0900 Subject: [PATCH 03/14] feat: add Node::last_token, Node::descendants --- .../postgresql-cst-parser/src/tree_sitter.rs | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 68e03aa..0255316 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -207,6 +207,59 @@ impl<'a> Node<'a> { pub fn is_comment(&self) -> bool { matches!(self.kind(), SyntaxKind::C_COMMENT | SyntaxKind::SQL_COMMENT) } + + /// Return the rightmost token in the subtree of this node + /// this is not tree-sitter's API + pub fn last_token(&self) -> Option> { + match &self.node_or_token { + NodeOrToken::Node(node) => node.last_token().map(|token| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: NodeOrToken::Token(token), + }), + NodeOrToken::Token(token) => Some(Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: NodeOrToken::Token(token), + }), + } + } + + /// Returns an iterator over all descendant nodes (not including tokens) + /// this is not tree-sitter's API + pub fn descendants(&self) -> impl Iterator> { + struct Descendants<'a> { + input: &'a str, + range_map: Rc>, + iter: Box + 'a>, + } + + impl<'a> Iterator for Descendants<'a> { + type Item = Node<'a>; + + fn next(&mut self) -> Option { + self.iter.next().map(|node| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: NodeOrToken::Node(node), + }) + } + } + + if let Some(node) = self.node_or_token.as_node() { + Descendants { + input: self.input, + range_map: Rc::clone(&self.range_map), + iter: Box::new(node.descendants()), + } + } else { + Descendants { + input: self.input, + range_map: Rc::clone(&self.range_map), + iter: Box::new(std::iter::empty()), + } + } + } } impl<'a> From> for TreeCursor<'a> { @@ -513,4 +566,36 @@ from assert_eq!(stmt_count, 2); } + + #[test] + fn test_last_token_returns_rightmost_token() { + let src = "SELECT u.*, (v).id, name;"; + let tree = parse(src).unwrap(); + let root = tree.root_node(); + + let target_list = root + .descendants() + .find(|node| node.kind() == SyntaxKind::target_list) + .expect("should find target_list"); + + // last token of the target_list is returned + let last_token = target_list.last_token().expect("should have last token"); + assert_eq!(last_token.text(), "name"); + + let target_els = target_list + .children() + .into_iter() + .filter(|node| node.kind() == SyntaxKind::target_el) + .collect::>(); + + let mut last_tokens = target_els + .iter() + .map(|node| node.last_token().expect("should have last token")); + + // last token of each target_el is returned + assert_eq!(last_tokens.next().unwrap().text(), "*"); + assert_eq!(last_tokens.next().unwrap().text(), "id"); + assert_eq!(last_tokens.next().unwrap().text(), "name"); + assert!(last_tokens.next().is_none()); + } } From 2cd1edc2e71b359bb0358234d103e425edd2b41d Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Tue, 4 Nov 2025 19:02:11 +0900 Subject: [PATCH 04/14] change: rename last_token to last_node --- .../postgresql-cst-parser/src/tree_sitter.rs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 0255316..e6eaaab 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -210,7 +210,7 @@ impl<'a> Node<'a> { /// Return the rightmost token in the subtree of this node /// this is not tree-sitter's API - pub fn last_token(&self) -> Option> { + pub fn last_node(&self) -> Option> { match &self.node_or_token { NodeOrToken::Node(node) => node.last_token().map(|token| Node { input: self.input, @@ -568,7 +568,7 @@ from } #[test] - fn test_last_token_returns_rightmost_token() { + fn test_last_node_returns_rightmost_node() { let src = "SELECT u.*, (v).id, name;"; let tree = parse(src).unwrap(); let root = tree.root_node(); @@ -578,9 +578,9 @@ from .find(|node| node.kind() == SyntaxKind::target_list) .expect("should find target_list"); - // last token of the target_list is returned - let last_token = target_list.last_token().expect("should have last token"); - assert_eq!(last_token.text(), "name"); + // last node of the target_list is returned + let last_node = target_list.last_node().expect("should have last node"); + assert_eq!(last_node.text(), "name"); let target_els = target_list .children() @@ -588,14 +588,14 @@ from .filter(|node| node.kind() == SyntaxKind::target_el) .collect::>(); - let mut last_tokens = target_els + let mut last_nodes = target_els .iter() - .map(|node| node.last_token().expect("should have last token")); + .map(|node| node.last_node().expect("should have last node")); - // last token of each target_el is returned - assert_eq!(last_tokens.next().unwrap().text(), "*"); - assert_eq!(last_tokens.next().unwrap().text(), "id"); - assert_eq!(last_tokens.next().unwrap().text(), "name"); - assert!(last_tokens.next().is_none()); + // last node of each target_el is returned + assert_eq!(last_nodes.next().unwrap().text(), "*"); + assert_eq!(last_nodes.next().unwrap().text(), "id"); + assert_eq!(last_nodes.next().unwrap().text(), "name"); + assert!(last_nodes.next().is_none()); } } From d235260e31af923d526059802cbdb27fc891e767 Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Thu, 13 Nov 2025 15:24:37 +0900 Subject: [PATCH 05/14] fix Node::descendants and add Node::next_token --- .../postgresql-cst-parser/src/tree_sitter.rs | 70 +++++++++++++++---- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index e6eaaab..02c9098 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -225,37 +225,60 @@ impl<'a> Node<'a> { } } - /// Returns an iterator over all descendant nodes (not including tokens) + /// Returns the next token in the tree. + /// This is not necessarily a direct sibling of this node/token, + /// but will always be further right in the tree. /// this is not tree-sitter's API - pub fn descendants(&self) -> impl Iterator> { + pub fn next_token(&self) -> Option> { + match &self.node_or_token { + NodeOrToken::Token(token) => token.next_token().map(|next_token| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: NodeOrToken::Token(next_token), + }), + NodeOrToken::Node(node) => { + // For a node, find its last token and then get the next token + node.last_token() + .and_then(|last_token| last_token.next_token()) + .map(|next_token| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: NodeOrToken::Token(next_token), + }) + } + } + } + + /// Returns an iterator over all descendant nodes (including tokens) + /// this is not tree-sitter's API + pub fn descendants(&self) -> impl Iterator> + '_ { struct Descendants<'a> { - input: &'a str, - range_map: Rc>, - iter: Box + 'a>, + iter: Box> + 'a>, } impl<'a> Iterator for Descendants<'a> { type Item = Node<'a>; fn next(&mut self) -> Option { - self.iter.next().map(|node| Node { - input: self.input, - range_map: Rc::clone(&self.range_map), - node_or_token: NodeOrToken::Node(node), - }) + self.iter.next() } } if let Some(node) = self.node_or_token.as_node() { + let input = self.input; + let range_map = Rc::clone(&self.range_map); Descendants { - input: self.input, - range_map: Rc::clone(&self.range_map), - iter: Box::new(node.descendants()), + iter: Box::new( + node.descendants_with_tokens() + .map(move |node_or_token| Node { + input, + range_map: Rc::clone(&range_map), + node_or_token, + }), + ), } } else { Descendants { - input: self.input, - range_map: Rc::clone(&self.range_map), iter: Box::new(std::iter::empty()), } } @@ -598,4 +621,21 @@ from assert_eq!(last_nodes.next().unwrap().text(), "name"); assert!(last_nodes.next().is_none()); } + + #[test] + fn test_next_token() { + let src = "SELECT tbl.name as n from TBL;"; + let tree = parse(src).unwrap(); + let root = tree.root_node(); + + let name = root + .descendants() + .find(|node| node.kind() == SyntaxKind::NAME_P) + .expect("should find NAME_P"); + + // Even if not a direct sibling or not belonging to the same subtree, the next_token can retrieve the next token. + let next_token = name.next_token().expect("should have next token"); + assert_eq!(next_token.text(), "as"); + assert_eq!(next_token.kind(), SyntaxKind::AS); + } } From 7a37385e198c94b412813bb596ea1c36db0c6748 Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Thu, 13 Nov 2025 16:25:49 +0900 Subject: [PATCH 06/14] add Range::is_adjacent --- crates/postgresql-cst-parser/src/tree_sitter.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 02c9098..b4168ca 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -114,6 +114,10 @@ impl Range { }, } } + + pub fn is_adjacent(&self, other: &Self) -> bool { + self.end_byte == other.start_byte || self.start_byte == other.end_byte + } } impl<'a> Node<'a> { From 5dba7fbaac3b57e71e61b39bb8524e2eb97f2e5c Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Fri, 14 Nov 2025 16:26:17 +0900 Subject: [PATCH 07/14] feat: add Node::first_child, Node::last_child --- .../postgresql-cst-parser/src/tree_sitter.rs | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index b4168ca..ece9647 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -180,6 +180,34 @@ impl<'a> Node<'a> { } } + /// Returns the first child element of this node. + /// this is not tree-sitter's API + pub fn first_child(&self) -> Option> { + if let Some(node) = self.node_or_token.as_node() { + node.first_child_or_token().map(|child| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: child, + }) + } else { + None + } + } + + /// Returns the last child element of this node. + /// this is not tree-sitter's API + pub fn last_child(&self) -> Option> { + if let Some(node) = self.node_or_token.as_node() { + node.last_child_or_token().map(|child| Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: child, + }) + } else { + None + } + } + pub fn next_sibling(&self) -> Option> { self.node_or_token .next_sibling_or_token() @@ -212,7 +240,7 @@ impl<'a> Node<'a> { matches!(self.kind(), SyntaxKind::C_COMMENT | SyntaxKind::SQL_COMMENT) } - /// Return the rightmost token in the subtree of this node + /// Returns the rightmost token in the subtree of this node. /// this is not tree-sitter's API pub fn last_node(&self) -> Option> { match &self.node_or_token { From 06c5455fe0559229e1ae5c6cf539c0f4530af59f Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Tue, 2 Dec 2025 15:21:49 +0900 Subject: [PATCH 08/14] feat: impelment PartialEq and Eq for tree_sitter::Node --- crates/postgresql-cst-parser/src/tree_sitter.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index ece9647..3037154 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -63,6 +63,14 @@ pub struct Node<'a> { pub node_or_token: NodeOrToken<'a>, } +impl<'a> PartialEq for Node<'a> { + fn eq(&self, other: &Self) -> bool { + self.node_or_token == other.node_or_token + } +} + +impl<'a> Eq for Node<'a> {} + #[derive(Debug, Clone)] pub struct TreeCursor<'a> { pub input: &'a str, From b288b7bfee0eed5be9c65dff17379acaf8961f6c Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Mon, 25 May 2026 17:41:40 +0900 Subject: [PATCH 09/14] trigger github actions From 0fe01661acaf62938c271aef4fb5552c8f76d81d Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Mon, 25 May 2026 18:17:04 +0900 Subject: [PATCH 10/14] feat: rename last_node to last_token --- .../postgresql-cst-parser/src/tree_sitter.rs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 3037154..5df02bf 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -250,7 +250,7 @@ impl<'a> Node<'a> { /// Returns the rightmost token in the subtree of this node. /// this is not tree-sitter's API - pub fn last_node(&self) -> Option> { + pub fn last_token(&self) -> Option> { match &self.node_or_token { NodeOrToken::Node(node) => node.last_token().map(|token| Node { input: self.input, @@ -631,7 +631,7 @@ from } #[test] - fn test_last_node_returns_rightmost_node() { + fn test_last_token_returns_rightmost_token() { let src = "SELECT u.*, (v).id, name;"; let tree = parse(src).unwrap(); let root = tree.root_node(); @@ -641,9 +641,9 @@ from .find(|node| node.kind() == SyntaxKind::target_list) .expect("should find target_list"); - // last node of the target_list is returned - let last_node = target_list.last_node().expect("should have last node"); - assert_eq!(last_node.text(), "name"); + // last token of the target_list is returned + let last_token = target_list.last_token().expect("should have last token"); + assert_eq!(last_token.text(), "name"); let target_els = target_list .children() @@ -651,15 +651,15 @@ from .filter(|node| node.kind() == SyntaxKind::target_el) .collect::>(); - let mut last_nodes = target_els + let mut last_tokens = target_els .iter() - .map(|node| node.last_node().expect("should have last node")); + .map(|node| node.last_token().expect("should have last token")); - // last node of each target_el is returned - assert_eq!(last_nodes.next().unwrap().text(), "*"); - assert_eq!(last_nodes.next().unwrap().text(), "id"); - assert_eq!(last_nodes.next().unwrap().text(), "name"); - assert!(last_nodes.next().is_none()); + // last token of each target_el is returned + assert_eq!(last_tokens.next().unwrap().text(), "*"); + assert_eq!(last_tokens.next().unwrap().text(), "id"); + assert_eq!(last_tokens.next().unwrap().text(), "name"); + assert!(last_tokens.next().is_none()); } #[test] From 1651d1445946d5dcea79cc85132d05212410fa0a Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Mon, 25 May 2026 18:43:05 +0900 Subject: [PATCH 11/14] feat: include token self in descendants --- crates/postgresql-cst-parser/src/tree_sitter.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 5df02bf..a67f3e1 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -289,7 +289,8 @@ impl<'a> Node<'a> { } } - /// Returns an iterator over all descendant nodes (including tokens) + /// Returns an iterator over all nodes in the subtree starting at this node, + /// including this node and tokens. /// this is not tree-sitter's API pub fn descendants(&self) -> impl Iterator> + '_ { struct Descendants<'a> { @@ -319,7 +320,11 @@ impl<'a> Node<'a> { } } else { Descendants { - iter: Box::new(std::iter::empty()), + iter: Box::new(std::iter::once(Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: self.node_or_token, + })), } } } From 787a18420708de65f6d3f8ac2c4c94f04c6b0d8c Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Mon, 25 May 2026 18:43:27 +0900 Subject: [PATCH 12/14] style: format rust sources --- .../src/parser_generator/lalr.rs | 3 +- .../src/parser_generator/lexer/generated.rs | 36 +++++++++++++------ .../src/lexer/lexer_ported.rs | 2 +- .../postgresql-cst-parser/src/lexer/util.rs | 2 +- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/crates/parser-generator/src/parser_generator/lalr.rs b/crates/parser-generator/src/parser_generator/lalr.rs index 9eb0d88..001edc9 100644 --- a/crates/parser-generator/src/parser_generator/lalr.rs +++ b/crates/parser-generator/src/parser_generator/lalr.rs @@ -250,7 +250,8 @@ impl Lalr { .map(|c| id_mapper.to_component_id(&c)) .or_else(|| { rule.components - .iter().rfind(|c| matches!(c, Component::Terminal(_))) + .iter() + .rfind(|c| matches!(c, Component::Terminal(_))) .map(|c| id_mapper.to_component_id(c)) }) .and_then(|component_id| assoc[component_id.0 as usize].clone()); diff --git a/crates/parser-generator/src/parser_generator/lexer/generated.rs b/crates/parser-generator/src/parser_generator/lexer/generated.rs index 466c3d0..6539e26 100644 --- a/crates/parser-generator/src/parser_generator/lexer/generated.rs +++ b/crates/parser-generator/src/parser_generator/lexer/generated.rs @@ -5,11 +5,11 @@ use std::collections::HashMap; use super::{ + Lexer, NAMEDATALEN, ParserError, TokenKind, Yylval, lexer_ported::{ get_char_by_byte_pos, is_highbit_set, is_utf16_surrogate_first, is_utf16_surrogate_second, surrogate_pair_to_codepoint, }, - Lexer, ParserError, TokenKind, Yylval, NAMEDATALEN, }; macro_rules! ereport { @@ -4027,11 +4027,18 @@ impl Lexer { self.set_yylloc(); if !STANDARD_CONFORMING_STRINGS { - ereport!(self, ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("unsafe use of string constant with Unicode escapes"), - errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."), - self.lexer_errposition())); + ereport!( + self, + ERROR, + ( + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsafe use of string constant with Unicode escapes"), + errdetail( + "String constants with Unicode escapes cannot be used when standard_conforming_strings is off." + ), + self.lexer_errposition() + ) + ); } self.begin(State::xus); self.literal.clear(); @@ -5370,11 +5377,18 @@ impl Lexer { self.set_yylloc(); if !STANDARD_CONFORMING_STRINGS { - ereport!(self, ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("unsafe use of string constant with Unicode escapes"), - errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off."), - self.lexer_errposition())); + ereport!( + self, + ERROR, + ( + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsafe use of string constant with Unicode escapes"), + errdetail( + "String constants with Unicode escapes cannot be used when standard_conforming_strings is off." + ), + self.lexer_errposition() + ) + ); } self.begin(State::xus); self.literal.clear(); diff --git a/crates/postgresql-cst-parser/src/lexer/lexer_ported.rs b/crates/postgresql-cst-parser/src/lexer/lexer_ported.rs index 0d1ed6b..3344d81 100644 --- a/crates/postgresql-cst-parser/src/lexer/lexer_ported.rs +++ b/crates/postgresql-cst-parser/src/lexer/lexer_ported.rs @@ -1,5 +1,5 @@ /// Ported sources from PostgreSQL -use super::{Lexer, Token, TokenKind, Yylval, parser_error::ParserError}; +use super::{parser_error::ParserError, Lexer, Token, TokenKind, Yylval}; pub fn is_highbit_set(c: char) -> u8 { (c as u8) & 0x80 diff --git a/crates/postgresql-cst-parser/src/lexer/util.rs b/crates/postgresql-cst-parser/src/lexer/util.rs index 0c28eed..f76a6db 100644 --- a/crates/postgresql-cst-parser/src/lexer/util.rs +++ b/crates/postgresql-cst-parser/src/lexer/util.rs @@ -1,8 +1,8 @@ #![allow(dead_code)] use super::{ + generated::{get_keyword_map, State}, Lexer, ScanReport, Yylval, - generated::{State, get_keyword_map}, }; impl Lexer { From 3e073149a14dfb937e5f97d61ed4ba9b82e0da9e Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Tue, 26 May 2026 14:10:21 +0900 Subject: [PATCH 13/14] refactor: simplify descendants iterator --- .../postgresql-cst-parser/src/tree_sitter.rs | 50 ++++++------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index a67f3e1..0ec5705 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -293,40 +293,22 @@ impl<'a> Node<'a> { /// including this node and tokens. /// this is not tree-sitter's API pub fn descendants(&self) -> impl Iterator> + '_ { - struct Descendants<'a> { - iter: Box> + 'a>, - } - - impl<'a> Iterator for Descendants<'a> { - type Item = Node<'a>; - - fn next(&mut self) -> Option { - self.iter.next() - } - } - - if let Some(node) = self.node_or_token.as_node() { - let input = self.input; - let range_map = Rc::clone(&self.range_map); - Descendants { - iter: Box::new( - node.descendants_with_tokens() - .map(move |node_or_token| Node { - input, - range_map: Rc::clone(&range_map), - node_or_token, - }), - ), - } - } else { - Descendants { - iter: Box::new(std::iter::once(Node { - input: self.input, - range_map: Rc::clone(&self.range_map), - node_or_token: self.node_or_token, - })), - } - } + let input = self.input; + let range_map = Rc::clone(&self.range_map); + + let iter: Box> + '_> = match self.node_or_token.as_node() { + Some(node) => Box::new( + node.descendants_with_tokens() + .map(move |node_or_token| Node { + input, + range_map: Rc::clone(&range_map), + node_or_token, + }), + ), + None => Box::new(std::iter::once(self.clone())), + }; + + iter } } From 97765f3d92874ebc608e67cbeee5e2c1389149c2 Mon Sep 17 00:00:00 2001 From: Taishi Naka Date: Tue, 26 May 2026 14:35:08 +0900 Subject: [PATCH 14/14] fix: preserve positions in range extension --- .../postgresql-cst-parser/src/tree_sitter.rs | 84 ++++++++++++++++--- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 0ec5705..4ec2a35 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -108,18 +108,22 @@ impl std::fmt::Display for Range { impl Range { pub fn extended_by(&self, other: &Self) -> Self { + let (start_byte, start_position) = if self.start_byte <= other.start_byte { + (self.start_byte, self.start_position.clone()) + } else { + (other.start_byte, other.start_position.clone()) + }; + let (end_byte, end_position) = if self.end_byte >= other.end_byte { + (self.end_byte, self.end_position.clone()) + } else { + (other.end_byte, other.end_position.clone()) + }; + Range { - start_byte: self.start_byte.min(other.start_byte), - end_byte: self.end_byte.max(other.end_byte), - - start_position: Point { - row: self.start_position.row.min(other.start_position.row), - column: self.start_position.column.min(other.start_position.column), - }, - end_position: Point { - row: self.end_position.row.max(other.end_position.row), - column: self.end_position.column.max(other.end_position.column), - }, + start_byte, + end_byte, + start_position, + end_position, } } @@ -547,6 +551,64 @@ from assert!(tokens.next().is_none()); } + #[test] + fn range_extended_by_keeps_original_positions_across_lines() { + let tree = parse("SELECT a,\n b").unwrap(); + let root = tree.root_node(); + let mut tokens = root + .descendants() + .filter(|node| node.kind() == SyntaxKind::IDENT) + .map(|node| (node.text(), node.range())); + + let (_, a_range) = tokens.next().expect("should find a token"); + let (_, b_range) = tokens.next().expect("should find b token"); + let extended = a_range.extended_by(&b_range); + let expected = super::Range { + start_byte: a_range.start_byte, + end_byte: b_range.end_byte, + start_position: a_range.start_position.clone(), + end_position: b_range.end_position.clone(), + }; + + assert_eq!(extended.start_byte, expected.start_byte); + assert_eq!(extended.end_byte, expected.end_byte); + assert_eq!( + extended.start_position.to_string(), + expected.start_position.to_string() + ); + assert_eq!( + extended.end_position.to_string(), + expected.end_position.to_string() + ); + } + + #[test] + fn range_extended_by_is_order_independent() { + let tree = parse("SELECT a,\n b").unwrap(); + let root = tree.root_node(); + let mut tokens = root + .descendants() + .filter(|node| node.kind() == SyntaxKind::IDENT) + .map(|node| node.range()); + + let a_range = tokens.next().expect("should find a token"); + let b_range = tokens.next().expect("should find b token"); + + let forward = a_range.extended_by(&b_range); + let backward = b_range.extended_by(&a_range); + + assert_eq!(forward.start_byte, backward.start_byte); + assert_eq!(forward.end_byte, backward.end_byte); + assert_eq!( + forward.start_position.to_string(), + backward.start_position.to_string() + ); + assert_eq!( + forward.end_position.to_string(), + backward.end_position.to_string() + ); + } + #[test] fn test_tree_basics() { let src = "SELECT id FROM users;";