1
//! Tokenizer for Mpd requests named [`RequestTokenizer`].
2

            
3
use crate::commands::*;
4

            
5
#[derive(Clone, Debug)]
6
pub struct RequestTokenizer<'a> {
7
    pub(super) inner: &'a str,
8
    pub(super) cursor: usize,
9
}
10

            
11
impl<'a> RequestTokenizer<'a> {
12
    #[inline]
13
7
    pub fn new(input: &'a str) -> Self {
14
7
        Self {
15
7
            inner: input,
16
7
            cursor: 0,
17
7
        }
18
7
    }
19
}
20

            
21
#[inline]
22
7
fn parse_command_name(input: &str) -> Option<&str> {
23
    // TODO: Create static trie of command names on startup  for more efficient matching
24
7
    let mut sorted_commands = COMMAND_NAMES.to_vec();
25
11298
    sorted_commands.sort_by_key(|b| std::cmp::Reverse(b.len()));
26
7
    sorted_commands
27
7
        .into_iter()
28
754
        .find(|&command| input.starts_with(command))
29
7
}
30

            
31
impl<'a> Iterator for RequestTokenizer<'a> {
32
    type Item = &'a str;
33

            
34
    // Split on strings and whitespace
35
    #[inline]
36
22
    fn next(&mut self) -> Option<&'a str> {
37
        #[inline]
38
56
        fn is_whitespace(byte: u8) -> bool {
39
56
            byte.is_ascii_whitespace()
40
56
        }
41

            
42
22
        let bytes = self.inner.as_bytes();
43
22
        let len = bytes.len();
44

            
45
22
        let should_parse_command = self.cursor == 0;
46

            
47
        // Skip leading whitespace
48
37
        while self.cursor < len && is_whitespace(bytes[self.cursor]) {
49
15
            self.cursor += 1;
50
15
        }
51

            
52
22
        if should_parse_command {
53
7
            if let Some(command) = parse_command_name(&self.inner[self.cursor..]) {
54
5
                self.cursor += command.len();
55
5
                return Some(command);
56
            } else {
57
2
                return None;
58
            }
59
15
        }
60

            
61
15
        if self.cursor >= len {
62
5
            return None;
63
10
        }
64

            
65
10
        let start = self.cursor;
66

            
67
10
        if bytes[self.cursor] == b'"' {
68
            // Quoted string
69
4
            self.cursor += 1; // Skip opening quote
70
61
            while self.cursor < len {
71
60
                if bytes[self.cursor] == b'\\'
72
1
                    && self.cursor + 1 < len
73
1
                    && bytes[self.cursor + 1] == b'"'
74
                {
75
1
                    self.cursor += 2; // Skip escaped quote
76
1
                    continue;
77
59
                }
78
59
                if bytes[self.cursor] == b'"' {
79
3
                    let end = self.cursor;
80
3
                    self.cursor += 1; // Skip closing quote
81
3
                    return Some(&self.inner[start + 1..end]);
82
56
                }
83
56
                self.cursor += 1;
84
            }
85
            // If we reach here, there was no closing quote
86
1
            Some(&self.inner[start + 1..])
87
        } else {
88
            // Unquoted string
89
28
            while self.cursor < len && !is_whitespace(bytes[self.cursor]) {
90
22
                self.cursor += 1;
91
22
            }
92
6
            Some(&self.inner[start..self.cursor])
93
        }
94
22
    }
95

            
96
    #[inline]
97
    fn size_hint(&self) -> (usize, Option<usize>) {
98
        (0, Some(self.inner.len()))
99
    }
100
}
101

            
102
#[cfg(test)]
103
mod tests {
104
    use super::RequestTokenizer;
105

            
106
    #[test]
107
1
    fn test_request_tokenizer() {
108
1
        let input = r#"add arg1 "arg two" arg3 "arg four with spaces""#;
109
1
        let mut tokenizer = RequestTokenizer::new(input);
110

            
111
1
        assert_eq!(tokenizer.next(), Some("add"));
112
1
        assert_eq!(tokenizer.next(), Some("arg1"));
113
1
        assert_eq!(tokenizer.next(), Some("arg two"));
114
1
        assert_eq!(tokenizer.next(), Some("arg3"));
115
1
        assert_eq!(tokenizer.next(), Some("arg four with spaces"));
116
1
        assert_eq!(tokenizer.next(), None);
117
1
    }
118

            
119
    #[test]
120
1
    fn test_request_tokenizer_invalid_command() {
121
1
        let input = r#"invalid_command arg1 "arg two" arg3 "arg four with spaces""#;
122
1
        let mut tokenizer = RequestTokenizer::new(input);
123

            
124
1
        assert_eq!(tokenizer.next(), None);
125
1
    }
126

            
127
    #[test]
128
1
    fn test_request_tokenizer_unclosed_quote() {
129
1
        let input = r#"add arg1 "arg two arg3"#;
130
1
        let mut tokenizer = RequestTokenizer::new(input);
131

            
132
1
        assert_eq!(tokenizer.next(), Some("add"));
133
1
        assert_eq!(tokenizer.next(), Some("arg1"));
134
1
        assert_eq!(tokenizer.next(), Some("arg two arg3"));
135
1
        assert_eq!(tokenizer.next(), None);
136
1
    }
137

            
138
    #[test]
139
1
    fn test_request_tokenizer_only_whitespace() {
140
1
        let input = r#"     "#;
141
1
        let mut tokenizer = RequestTokenizer::new(input);
142
1
        assert_eq!(tokenizer.next(), None);
143
1
    }
144

            
145
    #[test]
146
1
    fn test_request_tokenizer_escape_quotes() {
147
1
        let input = r#"add "arg with a \" quote" arg2"#;
148
1
        let mut tokenizer = RequestTokenizer::new(input);
149

            
150
1
        assert_eq!(tokenizer.next(), Some("add"));
151
1
        assert_eq!(tokenizer.next(), Some(r#"arg with a \" quote"#));
152
1
        assert_eq!(tokenizer.next(), Some("arg2"));
153
1
        assert_eq!(tokenizer.next(), None);
154
1
    }
155

            
156
    #[test]
157
1
    fn test_request_tokenizer_multitoken_command() {
158
1
        let input = r#"protocol all arg"#;
159
1
        let mut tokenizer = RequestTokenizer::new(input);
160
1
        assert_eq!(tokenizer.next(), Some("protocol all"));
161
1
        assert_eq!(tokenizer.next(), Some("arg"));
162
1
        assert_eq!(tokenizer.next(), None);
163

            
164
1
        let input = r#"protocol arg"#;
165
1
        let mut tokenizer = RequestTokenizer::new(input);
166
1
        assert_eq!(tokenizer.next(), Some("protocol"));
167
1
        assert_eq!(tokenizer.next(), Some("arg"));
168
1
        assert_eq!(tokenizer.next(), None);
169
1
    }
170

            
171
    // TODO: Implement ignoring spacing within command names
172
    #[test]
173
    #[ignore = "Currently, arbitrary spacing within commands names is not supported"]
174
    fn test_request_tokenizer_ignore_spacing_in_command() {
175
        let input = r#"   protocol    all     arg   "#;
176

            
177
        let mut tokenizer = RequestTokenizer::new(input);
178
        assert_eq!(tokenizer.next(), Some("protocol all"));
179
        assert_eq!(tokenizer.next(), Some("arg"));
180
        assert_eq!(tokenizer.next(), None);
181

            
182
        let input = r#"   protocol    arg   "#;
183
        let mut tokenizer = RequestTokenizer::new(input);
184
        assert_eq!(tokenizer.next(), Some("protocol"));
185
        assert_eq!(tokenizer.next(), Some("arg"));
186
        assert_eq!(tokenizer.next(), None);
187
    }
188
}