1
use crate::commands::*;
2

            
3
#[derive(Clone, Debug)]
4
pub struct RequestTokenizer<'a> {
5
    pub(super) inner: &'a str,
6
    pub(super) cursor: usize,
7
}
8

            
9
impl<'a> RequestTokenizer<'a> {
10
    #[inline]
11
7
    pub fn new(input: &'a str) -> Self {
12
7
        Self {
13
7
            inner: input,
14
7
            cursor: 0,
15
7
        }
16
7
    }
17
}
18

            
19
#[inline]
20
7
fn parse_command_name(input: &str) -> Option<&str> {
21
    // TODO: Create static trie of command names on startup  for more efficient matching
22
7
    let mut sorted_commands = COMMAND_NAMES.to_vec();
23
11298
    sorted_commands.sort_by_key(|b| std::cmp::Reverse(b.len()));
24
7
    sorted_commands
25
7
        .into_iter()
26
754
        .find(|&command| input.starts_with(command))
27
7
}
28

            
29
impl<'a> Iterator for RequestTokenizer<'a> {
30
    type Item = &'a str;
31

            
32
    // Split on strings and whitespace
33
    #[inline]
34
22
    fn next(&mut self) -> Option<&'a str> {
35
        #[inline]
36
56
        fn is_whitespace(byte: u8) -> bool {
37
56
            byte.is_ascii_whitespace()
38
56
        }
39

            
40
22
        let bytes = self.inner.as_bytes();
41
22
        let len = bytes.len();
42

            
43
22
        let should_parse_command = self.cursor == 0;
44

            
45
        // Skip leading whitespace
46
37
        while self.cursor < len && is_whitespace(bytes[self.cursor]) {
47
15
            self.cursor += 1;
48
15
        }
49

            
50
22
        if should_parse_command {
51
7
            if let Some(command) = parse_command_name(&self.inner[self.cursor..]) {
52
5
                self.cursor += command.len();
53
5
                return Some(command);
54
            } else {
55
2
                return None;
56
            }
57
15
        }
58

            
59
15
        if self.cursor >= len {
60
5
            return None;
61
10
        }
62

            
63
10
        let start = self.cursor;
64

            
65
10
        if bytes[self.cursor] == b'"' {
66
            // Quoted string
67
4
            self.cursor += 1; // Skip opening quote
68
61
            while self.cursor < len {
69
60
                if bytes[self.cursor] == b'\\'
70
1
                    && self.cursor + 1 < len
71
1
                    && bytes[self.cursor + 1] == b'"'
72
                {
73
1
                    self.cursor += 2; // Skip escaped quote
74
1
                    continue;
75
59
                }
76
59
                if bytes[self.cursor] == b'"' {
77
3
                    let end = self.cursor;
78
3
                    self.cursor += 1; // Skip closing quote
79
3
                    return Some(&self.inner[start + 1..end]);
80
56
                }
81
56
                self.cursor += 1;
82
            }
83
            // If we reach here, there was no closing quote
84
1
            Some(&self.inner[start + 1..])
85
        } else {
86
            // Unquoted string
87
28
            while self.cursor < len && !is_whitespace(bytes[self.cursor]) {
88
22
                self.cursor += 1;
89
22
            }
90
6
            Some(&self.inner[start..self.cursor])
91
        }
92
22
    }
93

            
94
    #[inline]
95
    fn size_hint(&self) -> (usize, Option<usize>) {
96
        (0, Some(self.inner.len()))
97
    }
98
}
99

            
100
#[cfg(test)]
101
mod tests {
102
    use super::RequestTokenizer;
103

            
104
    #[test]
105
1
    fn test_request_tokenizer() {
106
1
        let input = r#"add arg1 "arg two" arg3 "arg four with spaces""#;
107
1
        let mut tokenizer = RequestTokenizer::new(input);
108

            
109
1
        assert_eq!(tokenizer.next(), Some("add"));
110
1
        assert_eq!(tokenizer.next(), Some("arg1"));
111
1
        assert_eq!(tokenizer.next(), Some("arg two"));
112
1
        assert_eq!(tokenizer.next(), Some("arg3"));
113
1
        assert_eq!(tokenizer.next(), Some("arg four with spaces"));
114
1
        assert_eq!(tokenizer.next(), None);
115
1
    }
116

            
117
    #[test]
118
1
    fn test_request_tokenizer_invalid_command() {
119
1
        let input = r#"invalid_command arg1 "arg two" arg3 "arg four with spaces""#;
120
1
        let mut tokenizer = RequestTokenizer::new(input);
121

            
122
1
        assert_eq!(tokenizer.next(), None);
123
1
    }
124

            
125
    #[test]
126
1
    fn test_request_tokenizer_unclosed_quote() {
127
1
        let input = r#"add arg1 "arg two arg3"#;
128
1
        let mut tokenizer = RequestTokenizer::new(input);
129

            
130
1
        assert_eq!(tokenizer.next(), Some("add"));
131
1
        assert_eq!(tokenizer.next(), Some("arg1"));
132
1
        assert_eq!(tokenizer.next(), Some("arg two arg3"));
133
1
        assert_eq!(tokenizer.next(), None);
134
1
    }
135

            
136
    #[test]
137
1
    fn test_request_tokenizer_only_whitespace() {
138
1
        let input = r#"     "#;
139
1
        let mut tokenizer = RequestTokenizer::new(input);
140
1
        assert_eq!(tokenizer.next(), None);
141
1
    }
142

            
143
    #[test]
144
1
    fn test_request_tokenizer_escape_quotes() {
145
1
        let input = r#"add "arg with a \" quote" arg2"#;
146
1
        let mut tokenizer = RequestTokenizer::new(input);
147

            
148
1
        assert_eq!(tokenizer.next(), Some("add"));
149
1
        assert_eq!(tokenizer.next(), Some(r#"arg with a \" quote"#));
150
1
        assert_eq!(tokenizer.next(), Some("arg2"));
151
1
        assert_eq!(tokenizer.next(), None);
152
1
    }
153

            
154
    #[test]
155
1
    fn test_request_tokenizer_multitoken_command() {
156
1
        let input = r#"protocol all arg"#;
157
1
        let mut tokenizer = RequestTokenizer::new(input);
158
1
        assert_eq!(tokenizer.next(), Some("protocol all"));
159
1
        assert_eq!(tokenizer.next(), Some("arg"));
160
1
        assert_eq!(tokenizer.next(), None);
161

            
162
1
        let input = r#"protocol arg"#;
163
1
        let mut tokenizer = RequestTokenizer::new(input);
164
1
        assert_eq!(tokenizer.next(), Some("protocol"));
165
1
        assert_eq!(tokenizer.next(), Some("arg"));
166
1
        assert_eq!(tokenizer.next(), None);
167
1
    }
168

            
169
    // TODO: Implement ignoring spacing within command names
170
    #[test]
171
    #[ignore = "Currently, arbitrary spacing within commands names is not supported"]
172
    fn test_request_tokenizer_ignore_spacing_in_command() {
173
        let input = r#"   protocol    all     arg   "#;
174

            
175
        let mut tokenizer = RequestTokenizer::new(input);
176
        assert_eq!(tokenizer.next(), Some("protocol all"));
177
        assert_eq!(tokenizer.next(), Some("arg"));
178
        assert_eq!(tokenizer.next(), None);
179

            
180
        let input = r#"   protocol    arg   "#;
181
        let mut tokenizer = RequestTokenizer::new(input);
182
        assert_eq!(tokenizer.next(), Some("protocol"));
183
        assert_eq!(tokenizer.next(), Some("arg"));
184
        assert_eq!(tokenizer.next(), None);
185
    }
186
}