comments.rs - source

sqlparser/ast/

comments.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4//
5// http://www.apache.org/licenses/LICENSE-2.0
6//
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13//! Provides a representation of source code comments in parsed SQL code.
14//!
15//! See [Comments::find] for an example.
16
17#[cfg(not(feature = "std"))]
18use alloc::{string::String, vec::Vec};
19
20use core::{
21    ops::{Bound, Deref, RangeBounds},
22    slice,
23};
24
25use crate::tokenizer::{Location, Span};
26
27/// An opaque container for comments from a parse SQL source code.
28#[derive(Default, Debug, Clone)]
29pub struct Comments(Vec<CommentWithSpan>);
30
31impl Comments {
32    /// Accepts `comment` if its the first or is located strictly after the
33    /// last accepted comment.  In other words, this method will skip the
34    /// comment if its comming out of order (as encountered in the parsed
35    /// source code.)
36    pub(crate) fn offer(&mut self, comment: CommentWithSpan) {
37        if self
38            .0
39            .last()
40            .map(|last| last.span < comment.span)
41            .unwrap_or(true)
42        {
43            self.0.push(comment);
44        }
45    }
46
47    /// Finds comments starting within the given location range. The order of
48    /// iterator reflects the order of the comments as encountered in the parsed
49    /// source code.
50    ///
51    /// # Example
52    /// ```rust
53    /// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location};
54    ///
55    /// let sql = r#"/*
56    ///  header comment ...
57    ///  ... spanning multiple lines
58    /// */
59    ///
60    ///  -- first statement
61    ///  SELECT 'hello' /* world */ FROM DUAL;
62    ///
63    ///  -- second statement
64    ///  SELECT 123 FROM DUAL;
65    ///
66    ///  -- trailing comment
67    /// "#;
68    ///
69    /// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap();
70    ///
71    /// // all comments appearing before line seven, i.e. before the first statement itself
72    /// assert_eq!(
73    ///    &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::<Vec<_>>(),
74    ///    &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]);
75    ///
76    /// // all comments appearing within the first statement
77    /// assert_eq!(
78    ///    &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::<Vec<_>>(),
79    ///    &[" world "]);
80    ///
81    /// // all comments appearing within or after the first statement
82    /// assert_eq!(
83    ///    &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::<Vec<_>>(),
84    ///    &[" world ", " second statement\n", " trailing comment\n"]);
85    /// ```
86    ///
87    /// The [Spanned](crate::ast::Spanned) trait allows you to access location
88    /// information for certain AST nodes.
89    pub fn find<R: RangeBounds<Location>>(&self, range: R) -> Iter<'_> {
90        let (start, end) = (
91            self.start_index(range.start_bound()),
92            self.end_index(range.end_bound()),
93        );
94        debug_assert!((0..=self.0.len()).contains(&start));
95        debug_assert!((0..=self.0.len()).contains(&end));
96        // in case the user specified a reverse range
97        Iter(if start <= end {
98            self.0[start..end].iter()
99        } else {
100            self.0[0..0].iter()
101        })
102    }
103
104    /// Find the index of the first comment starting "before" the given location.
105    ///
106    /// The returned index is _inclusive_ and within the range of `0..=self.0.len()`.
107    fn start_index(&self, location: Bound<&Location>) -> usize {
108        match location {
109            Bound::Included(location) => {
110                match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
111                    Ok(i) => i,
112                    Err(i) => i,
113                }
114            }
115            Bound::Excluded(location) => {
116                match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
117                    Ok(i) => i + 1,
118                    Err(i) => i,
119                }
120            }
121            Bound::Unbounded => 0,
122        }
123    }
124
125    /// Find the index of the first comment starting "after" the given location.
126    ///
127    /// The returned index is _exclusive_ and within the range of `0..=self.0.len()`.
128    fn end_index(&self, location: Bound<&Location>) -> usize {
129        match location {
130            Bound::Included(location) => {
131                match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
132                    Ok(i) => i + 1,
133                    Err(i) => i,
134                }
135            }
136            Bound::Excluded(location) => {
137                match self.0.binary_search_by(|c| c.span.start.cmp(location)) {
138                    Ok(i) => i,
139                    Err(i) => i,
140                }
141            }
142            Bound::Unbounded => self.0.len(),
143        }
144    }
145}
146
147impl From<Comments> for Vec<CommentWithSpan> {
148    fn from(comments: Comments) -> Self {
149        comments.0
150    }
151}
152
153/// A source code comment with information of its entire span.
154#[derive(Debug, Clone, PartialEq, Eq, Hash)]
155pub struct CommentWithSpan {
156    /// The source code comment iself
157    pub comment: Comment,
158    /// The span of the comment including its markers
159    pub span: Span,
160}
161
162impl Deref for CommentWithSpan {
163    type Target = Comment;
164
165    fn deref(&self) -> &Self::Target {
166        &self.comment
167    }
168}
169
170/// A unified type of the different source code comment formats.
171#[derive(Debug, Clone, PartialEq, Eq, Hash)]
172pub enum Comment {
173    /// A single line comment, typically introduced with a prefix and spanning
174    /// until end-of-line or end-of-file in the source code.
175    ///
176    /// Note: `content` will include the terminating new-line character, if any.
177    /// A single-line comment, typically introduced with a prefix and spanning
178    /// until end-of-line or end-of-file in the source code.
179    ///
180    /// Note: `content` will include the terminating new-line character, if any.
181    SingleLine {
182        /// The content of the comment (including trailing newline, if any).
183        content: String,
184        /// The prefix introducing the comment (e.g. `--`, `#`).
185        prefix: String,
186    },
187
188    /// A multi-line comment, typically enclosed in `/* .. */` markers. The
189    /// string represents the content excluding the markers.
190    MultiLine(String),
191}
192
193impl Comment {
194    /// Retrieves the content of the comment as string slice.
195    pub fn as_str(&self) -> &str {
196        match self {
197            Comment::SingleLine { content, prefix: _ } => content.as_str(),
198            Comment::MultiLine(content) => content.as_str(),
199        }
200    }
201}
202
203impl Deref for Comment {
204    type Target = str;
205
206    fn deref(&self) -> &Self::Target {
207        self.as_str()
208    }
209}
210
211/// An opaque iterator implementation over comments served by [Comments::find].
212pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>);
213
214impl<'a> Iterator for Iter<'a> {
215    type Item = &'a CommentWithSpan;
216
217    fn next(&mut self) -> Option<Self::Item> {
218        self.0.next()
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn test_find() {
228        let comments = {
229            // ```
230            // -- abc
231            //   /* hello */--, world
232            // /* def
233            //  ghi
234            //  jkl
235            // */
236            // ```
237            let mut c = Comments(Vec::new());
238            c.offer(CommentWithSpan {
239                comment: Comment::SingleLine {
240                    content: " abc".into(),
241                    prefix: "--".into(),
242                },
243                span: Span::new((1, 1).into(), (1, 7).into()),
244            });
245            c.offer(CommentWithSpan {
246                comment: Comment::MultiLine(" hello ".into()),
247                span: Span::new((2, 3).into(), (2, 14).into()),
248            });
249            c.offer(CommentWithSpan {
250                comment: Comment::SingleLine {
251                    content: ", world".into(),
252                    prefix: "--".into(),
253                },
254                span: Span::new((2, 14).into(), (2, 21).into()),
255            });
256            c.offer(CommentWithSpan {
257                comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()),
258                span: Span::new((3, 3).into(), (7, 1).into()),
259            });
260            c
261        };
262
263        fn find<R: RangeBounds<Location>>(comments: &Comments, range: R) -> Vec<&str> {
264            comments.find(range).map(|c| c.as_str()).collect::<Vec<_>>()
265        }
266
267        // ~ end-points only --------------------------------------------------
268        assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new());
269        assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]);
270        assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]);
271        assert_eq!(
272            find(&comments, ..=Location::new(2, 3)),
273            vec![" abc", " hello "]
274        );
275        assert_eq!(
276            find(&comments, ..=Location::new(2, 3)),
277            vec![" abc", " hello "]
278        );
279        assert_eq!(
280            find(&comments, ..Location::new(2, 15)),
281            vec![" abc", " hello ", ", world"]
282        );
283
284        // ~ start-points only ------------------------------------------------
285        assert_eq!(
286            find(&comments, Location::new(1000, 1000)..),
287            Vec::<&str>::new()
288        );
289        assert_eq!(
290            find(&comments, Location::new(2, 14)..),
291            vec![", world", " def\n ghi\n jkl\n"]
292        );
293        assert_eq!(
294            find(&comments, Location::new(2, 15)..),
295            vec![" def\n ghi\n jkl\n"]
296        );
297        assert_eq!(
298            find(&comments, Location::new(0, 0)..),
299            vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
300        );
301        assert_eq!(
302            find(&comments, Location::new(1, 1)..),
303            vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
304        );
305
306        // ~ ranges -----------------------------------------------------------
307        assert_eq!(
308            find(&comments, Location::new(2, 1)..Location::new(1, 1)),
309            Vec::<&str>::new()
310        );
311        assert_eq!(
312            find(&comments, Location::new(1, 1)..Location::new(2, 3)),
313            vec![" abc"]
314        );
315        assert_eq!(
316            find(&comments, Location::new(1, 1)..=Location::new(2, 3)),
317            vec![" abc", " hello "]
318        );
319        assert_eq!(
320            find(&comments, Location::new(1, 1)..=Location::new(2, 10)),
321            vec![" abc", " hello "]
322        );
323        assert_eq!(
324            find(&comments, Location::new(1, 1)..=Location::new(2, 14)),
325            vec![" abc", " hello ", ", world"]
326        );
327        assert_eq!(
328            find(&comments, Location::new(1, 1)..Location::new(2, 15)),
329            vec![" abc", " hello ", ", world"]
330        );
331
332        // ~ find everything --------------------------------------------------
333        assert_eq!(
334            find(&comments, ..),
335            vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"]
336        );
337    }
338}