UPnPsdk 0.1
Universal Plug and Play +, Software Development Kit
 
Loading...
Searching...
No Matches
url.cpp
Go to the documentation of this file.
1// Copyright (C) 2022+ GPL 3 and higher by Ingo Höft, <Ingo@Hoeft-online.de>
2// Redistribution only with this Copyright remark. Last modified: 2024-08-18
8// TODO: Provide url_is_special() as flag
9
10#include "UPnPsdk/url.hpp"
11
12#include <map>
13#include <sstream>
14#include <iomanip>
15#include <iostream>
17
18namespace {
19
20const std::map<const std::string, const uint16_t> special_scheme{
21 {"file", static_cast<uint16_t>(NULL)},
22 {"ftp", static_cast<uint16_t>(21)},
23 {"http", static_cast<uint16_t>(80)},
24 {"https", static_cast<uint16_t>(443)},
25 {"ws", static_cast<uint16_t>(80)},
26 {"wss", static_cast<uint16_t>(443)}};
27
28bool url_is_special(std::string_view a_str) {
29 return a_str == "ftp" || a_str == "file" || a_str == "http" ||
30 a_str == "https" || a_str == "ws" || a_str == "wss";
31}
32
33bool is_in_userinfo_percent_encode_set(const unsigned char a_chr) {
34 return // C0 controls
35 a_chr <= '\x1F' ||
36 // C0 control percent-encode set
37 a_chr > '\x7E' ||
38 // query percent-encode set
39 a_chr == ' ' || a_chr == '"' || a_chr == '#' || a_chr == '<' ||
40 a_chr == '>' ||
41 // path percent-encode set
42 a_chr == '?' || a_chr == '`' || a_chr == '{' || a_chr == '}' ||
43 // userinfo percent-encode set
44 a_chr == '/' || a_chr == ':' || a_chr == ';' || a_chr == '=' ||
45 a_chr == '@' || (a_chr >= '[' && a_chr <= '^') || a_chr == '|';
46}
47
48std::string UTF8_percent_encode(const unsigned char a_chr) {
49 // Simplified function 'UTF-8 percent-encode' from the URL standard may be
50 // adjusted if needed.
51 if (is_in_userinfo_percent_encode_set(a_chr)) {
52 std::ostringstream escaped;
53 escaped.fill('0');
54 escaped << std::uppercase << std::hex;
55 escaped << '%' << std::setw(2) << int(a_chr);
56 return escaped.str();
57 } else
58 return std::string(sizeof(a_chr), (char)a_chr);
59}
60
61std::string esc_url(std::string_view a_str) {
62 std::ostringstream escaped;
63 escaped.fill('0');
64 escaped << std::uppercase << std::hex;
65
66 for (const char chr : a_str) {
67 if ((unsigned char)chr <= '\x1F' || (unsigned char)chr > '\x7E')
68 escaped << '%' << std::setw(2) << int((unsigned char)chr);
69 else
70 escaped << (unsigned char)chr;
71 }
72 return escaped.str();
73}
74
75} // namespace
76
77namespace UPnPsdk {
78
79
80// Url class methods
81// =================
82
83#if false
84Url::Url() {
85 // Proof to redirect clog to /dev/null, <fstream> is needed
86 // save clog stream buffer
87 std::streambuf* clog_old = std::clog.rdbuf();
88 // Redirect clog
89 std::ofstream clog_new("/dev/null");
90 std::clog.rdbuf(clog_new.rdbuf());
91}
92
93Url::~Url() {
94 // restore clog stream buffer
95 std::clog.rdbuf(clog_old);
96}
97#endif
98
99
100Url::operator std::string() const { return m_ser_url; }
101
102void Url::clear() {
103 m_given_url = "";
104 this->clear_private();
105}
106
107void Url::clear_private() {
108 // Clears all properties except m_given_url that may already be set to a new
109 // value.
110 m_input.reserve(m_given_url.size());
111 m_input = "";
112 m_buffer.reserve(m_input.size() + 20);
113 m_buffer = "";
114 m_ser_url = "";
115 m_ser_base_url = "";
116 m_scheme = "";
117 m_authority = "";
118 m_username = "";
119 m_password = "";
120 m_host = "";
121 m_port = "";
122 m_port_num = (uint16_t)NULL;
123 m_path = "";
124 m_query = "";
125 m_fragment = "";
126 m_atSignSeen = false;
127 m_insideBrackets = false;
128 m_passwordTokenSeen = false;
129}
130
131std::string Url::scheme() const { return m_scheme; }
132
133std::string Url::authority() const { return m_authority; }
134
135std::string Url::username() const { return m_username; }
136
137std::string Url::password() const { return m_password; }
138
139std::string Url::host() const { return m_host; }
140
141std::string Url::port() const { return m_port; }
142
143uint16_t Url::port_num() const { return m_port_num; }
144
145std::string Url::path() const { return m_path; }
146
147std::string Url::query() const { return m_query; }
148
149std::string Url::fragment() const { return m_fragment; }
150
151
152void Url::operator=(std::string_view a_given_url) {
153
154 m_given_url = a_given_url;
155 this->clear_private();
156
157 // To understand the parser below please refer to the "URL Living Standard"
158 // as noted at the top. I use the same terms so you should be able to see
159 // the relations better that way.
160
161 // Remove control character and space and copy to input. Because we copy
162 // char by char I use a predefined length on input to avoid additional
163 // memory allocation for characters.
164 this->clean_and_copy_url_to_input();
165
166 m_state = STATE_SCHEME_START;
167 m_pointer = m_input.begin();
168
169 // On the URL standard there is a State Machine used. It parses the inpupt
170 // string with a pointer to the string so it should finish at the end of
171 // it. The loop of the State Machine finishes regular if state is set to
172 // STATE_NO_STATE within the Machine. We guard it to always finish
173 // independent from the Machines logic to be on the safe side. Because the
174 // m_pointer is decreased sometimes in the State Machine and maybe several
175 // code points are percent encoded (will increase m_input) we double guard.
176 // But we need at least two loops to regular finish an empty m_input.
177 size_t guard = m_input.size() * 2 + 2;
178#ifdef DEBUG_URL
179 std::clog << "DEBUG: guard = " << guard << std::endl;
180#endif
181
182 // Because there are no external events we can use this
183 // simple Finite State Machine (fsm):
184 for (; guard > 0; m_pointer++, guard--) {
185 if (m_state == STATE_NO_STATE)
186 break;
187
188 switch (m_state) {
189 case STATE_SCHEME_START:
190 this->fsm_scheme_start();
191 break;
192 case STATE_SCHEME:
193 this->fsm_scheme();
194 break;
195 case STATE_NO_SCHEME:
196 this->fsm_no_scheme();
197 break;
198 case STATE_PATH_OR_AUTHORITY:
199 this->fsm_path_or_authority();
200 break;
201 case STATE_SPECIAL_AUTHORITY_SLASHES:
202 this->fsm_special_authority_slashes();
203 break;
204 case STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES:
205 this->fsm_special_authority_ignore_slashes();
206 break;
207 case STATE_AUTHORITY:
208 this->fsm_authority();
209 break;
210 case STATE_HOST:
211 this->fsm_host();
212 break;
213 case STATE_PORT:
214 this->fsm_port();
215 break;
216 case STATE_FILE:
217 this->fsm_file();
218 break;
219 case STATE_SPECIAL_RELATIVE_OR_AUTHORITY:
220 this->fsm_special_relative_or_authority();
221 break;
222 case STATE_PATH_START:
223 this->fsm_path_start();
224 break;
225 case STATE_PATH:
226 this->fsm_path();
227 break;
228 case STATE_OPAQUE_PATH:
229 this->fsm_opaque_path();
230 break;
231 default:
232 // Undefined state, stop the State Machine irregular. This is a
233 // program bug.
234 guard = 0;
235 break;
236 }
237 }
238
239 if (guard <= 0) {
240 throw std::out_of_range(
241 std::string((std::string)__FILE__ + ":" + std::to_string(__LINE__) +
242 ", Parsing URL " + __func__ +
243 ". State Machine doesn't finish regular."));
244 }
245}
246
247
248void Url::clean_and_copy_url_to_input() {
249#ifdef DEBUG_URL
250 std::clog << "DEBUG: Being on 'clean_and_copy_url_to_input'.\n";
251#endif
252
253 // To remove any leading C0 control or space, point to first valid char.
254 // control chars are \x00 to \x1F, space = \x20, DEL (backspace) = \x7F.
255 // Due to the URL standard backspace is ignored here.
256 auto it_leading = m_given_url.begin();
257 auto str_end = m_given_url.end();
258 while (it_leading < str_end && (unsigned char)*it_leading <= ' ') {
259 if (it_leading >= str_end - 1)
260 break;
261 it_leading++;
262 }
263 // std::clog << " DEBUG: *it_leading = '" << *it_leading << "'\n";
264
265 // To remove any trailing C0 control or space, point to last valid char.
266 auto it_trailing = m_given_url.end() - 1;
267 auto str_begin = m_given_url.begin();
268 while (it_trailing >= str_begin && (unsigned char)*it_trailing <= ' ') {
269 if (it_trailing <= str_begin)
270 break;
271 else
272 it_trailing--;
273 }
274 // std::clog << " DEBUG: *it_trailing = '" << *it_trailing << "'\n";
275
276 // Copy given URL to input lowercase and remove all ASCII tab or newline.
277 int invalid_chars{};
278 if ((unsigned char)*it_leading > ' ') {
279
280 // it_leading points to the first valid character,
281 // it_trailing points to the last valid character.
282 while (it_leading <= it_trailing) {
283
284 unsigned char c = (unsigned char)*it_leading;
285 if (c == '\x0D' || c == '\x0A' || c == '\x09')
286 invalid_chars++;
287 else
288 m_input.push_back((char)std::tolower(c));
289 it_leading++;
290 }
291 }
292 // std::clog << " DEBUG: m_input = '" << m_input << "'\n";
293
294 if (invalid_chars)
295 std::clog << "Warning: Removed " << invalid_chars
296 << " ASCII tab or newline character. Using \"" << m_input
297 << "\" now." << std::endl;
298}
299
300
301void Url::fsm_scheme_start() {
302#ifdef DEBUG_URL
303 std::clog << "DEBUG: Being on 'scheme_start_state' with \""
304 << std::string_view(m_pointer, m_input.end()) << "\"\n";
305#endif
306
307 // Check if first character is an lower ASCII alpha.
308 // We should have already converted all chars to lower.
309 if (std::islower((unsigned char)*m_pointer)) { // needs type cast here
310
311 // Exception: if the operation would result in size() > max_size(),
312 // throws std::length_error.
313 m_buffer.push_back((char)std::tolower(*m_pointer));
314
315 m_state = STATE_SCHEME;
316
317 } else {
318
319 m_state = STATE_NO_SCHEME;
320 m_pointer--;
321 }
322}
323
324
325void Url::fsm_scheme() {
326#ifdef DEBUG_URL
327 std::clog << "DEBUG: Being on 'scheme state' with \""
328 << std::string_view(m_pointer, m_input.end()) << "\"\n";
329#endif
330
331 const unsigned char c =
332 m_pointer < m_input.end() ? (unsigned char)*m_pointer : '\0';
333
334 // Check if character is an ASCII lower alphanumeric or U+002B (+), U+002D
335 // (-), or U+002E (.).
336 if (islower(c) || // type cast is needed here
337 isdigit(c) || c == '+' || c == '-' || c == '.') //
338 {
339 // Exception: if the operation would result in size() > max_size(),
340 // throws std::length_error.
341 m_buffer.push_back((char)c);
342
343 } else if (c == ':') {
344
345 m_scheme = m_buffer;
346 m_buffer = "";
347
348 if (m_scheme == "file") {
349 if (m_pointer + 2 >= m_input.end() || *(m_pointer + 1) != '/' ||
350 *(m_pointer + 2) != '/')
351 std::clog << "Warning: 'file' scheme misses \"//\", ignoring."
352 << std::endl;
353 m_state = STATE_FILE;
354
355 } else if (url_is_special(m_scheme) && m_ser_base_url != "") {
356 m_state = STATE_SPECIAL_RELATIVE_OR_AUTHORITY;
357
358 } else if (url_is_special(m_scheme)) {
359 m_state = STATE_SPECIAL_AUTHORITY_SLASHES;
360
361 } else if (m_pointer + 1 < m_input.end() && *(m_pointer + 1) == '/') {
362 m_state = STATE_PATH_OR_AUTHORITY;
363 m_pointer++;
364
365 } else {
366 m_path = "";
367 m_state = STATE_OPAQUE_PATH;
368 }
369
370 } else {
371
372 m_buffer = "";
373 m_state = STATE_NO_SCHEME;
374 }
375}
376
377
378void Url::fsm_no_scheme() {
379#ifdef DEBUG_URL
380 std::clog << "DEBUG: Being on 'no_scheme_state' with input \"" << m_input
381 << "\"\n";
382#endif
383 std::clog << "Error: no valid scheme found." << std::endl;
384 throw std::invalid_argument("Invalid URL: '" + esc_url(m_input) + "'");
385
386 m_state = STATE_NO_STATE;
387}
388
389
390void Url::fsm_special_relative_or_authority() {
391#ifdef DEBUG_URL
392 std::clog << "DEBUG: Being on 'special_relative_or_authority_state' with \""
393 << std::string_view(m_pointer, m_input.end()) << "\"\n";
394#endif
395
396 m_state = STATE_NO_STATE;
397}
398
399
400void Url::fsm_path_or_authority() {
401#ifdef DEBUG_URL
402 std::clog << "DEBUG: Being on 'path_or_authority_state' with \""
403 << std::string_view(m_pointer, m_input.end()) << "\"\n";
404#endif
405
406 if (*m_pointer == '/') {
407 m_state = STATE_AUTHORITY;
408 } else {
409 m_state = STATE_PATH;
410 m_pointer--;
411 }
412}
413
414
415void Url::fsm_special_authority_slashes() {
416#ifdef DEBUG_URL
417 std::clog << "DEBUG: Being on 'special_authority_slashes_state' with \""
418 << std::string_view(m_pointer, m_input.end()) << "\"\n";
419#endif
420
421 if (m_pointer + 1 < m_input.end() && *m_pointer == '/' &&
422 *(m_pointer + 1) == '/') {
423 m_state = STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES;
424 m_pointer++;
425 } else {
426 std::clog << "Warning: no \"//\" before authority: ignoring. Found \""
427 << std::string(m_pointer, m_input.end()) << "\"" << std::endl;
428 m_state = STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES;
429 m_pointer--;
430 }
431}
432
433
434void Url::fsm_special_authority_ignore_slashes() {
435#ifdef DEBUG_URL
436 std::clog
437 << "DEBUG: Being on 'special_authority_ignore_slashes_state' with \""
438 << std::string_view(m_pointer, m_input.end()) << "\"\n";
439#endif
440
441 if (*m_pointer != '/' && *m_pointer != '\\') {
442 m_state = STATE_AUTHORITY;
443 m_pointer--;
444 } else {
445 std::clog << "Warning: '/' or '\\' not expected on authority: "
446 "ignoring. Found \""
447 << std::string(m_pointer, m_input.end()) << "\"" << std::endl;
448 }
449}
450
451
452void Url::fsm_authority() {
453#ifdef DEBUG_URL
454 std::clog << "DEBUG: Being on 'authority_state' with \""
455 << std::string_view(m_pointer, m_input.end()) << "\"\n";
456#endif
457
458 const unsigned char c =
459 m_pointer < m_input.end() ? (unsigned char)*m_pointer : '\0';
460
461 if (c == '@') {
462
463 std::clog << "Status: '@' found for userinfo." << std::endl;
464 if (m_atSignSeen)
465 m_buffer.append("%40");
466 else
467 m_atSignSeen = true;
468
469 for (auto& cp : m_buffer) {
470 if (cp == ':' && !m_passwordTokenSeen) {
471 m_passwordTokenSeen = true;
472 continue;
473 }
474 std::string encodedCodePoints =
475 UTF8_percent_encode((unsigned char)cp);
476 if (m_passwordTokenSeen)
477 m_password += encodedCodePoints;
478 else
479 m_username += encodedCodePoints;
480 }
481 m_buffer = "";
482
483 } else if (m_pointer >= m_input.end() || c == '/' || c == '?' || c == '#' ||
484 (url_is_special(m_scheme) && c == '\\')) {
485
486 if (m_atSignSeen && m_buffer == "") {
487 std::clog << "Error: no valid authority." << std::endl;
488 throw std::invalid_argument("Invalid authority: '" + m_input + "'");
489 } else {
490 m_pointer = m_pointer - (long int)m_buffer.length() - 1;
491 m_buffer = "";
492 m_state = STATE_HOST;
493 }
494
495 } else {
496 m_buffer.push_back((char)c);
497 }
498}
499
500
501void Url::fsm_host() {
502#ifdef DEBUG_URL
503 std::clog << "DEBUG: Being on 'host_state' with \""
504 << std::string_view(m_pointer, m_input.end()) << "\", "
505 << "username = \"" << m_username << "\", password = \""
506 << m_password << "\"\n";
507#endif
508
509 const unsigned char c =
510 m_pointer < m_input.end() ? (unsigned char)*m_pointer : '\0';
511
512 if (c == ':' && !m_insideBrackets) {
513
514 if (m_buffer.empty()) {
515 std::clog << "Error: no valid hostname found." << std::endl;
516 throw std::invalid_argument("Invalid hostname: '" +
517 esc_url(m_input) + "'");
518 }
519 // On failure host_parser throws an error that is catched call stack
520 // upwards. It doesn't modify m_host then.
521 // m_host = host_parser(m_buffer, /* isNotSpecial */ true);
522 m_host = "dummy1.host.state";
523 m_buffer.clear();
524 m_state = STATE_PORT;
525
526 } else if (c == '\0' || c == '/' || c == '?' || c == '#' ||
527 (url_is_special(m_scheme) && c == '\\')) {
528
529 m_pointer--;
530
531 if (url_is_special(m_scheme) && m_buffer.empty()) {
532 std::clog << "Error: no valid host found." << std::endl;
533 throw std::invalid_argument("Invalid hostname: '" +
534 esc_url(m_input) + "'");
535 }
536 // On failure host_parser throws an error that is catched call stack
537 // upwards. It doesn't modify m_host then.
538 // m_host = host_parser(m_buffer, /* isNotSpecial */ true);
539 m_host = "dummy2.host.state";
540 m_buffer.clear();
541 m_state = STATE_PATH_START;
542
543 } else {
544 if (c == '[')
545 m_insideBrackets = true;
546 else if (c == ']')
547 m_insideBrackets = false;
548
549 m_buffer.push_back((char)c);
550 }
551}
552
553
554void Url::fsm_port() {
555#ifdef DEBUG_URL
556 std::clog << "DEBUG: Being on 'port_state' with \""
557 << std::string_view(m_pointer, m_input.end()) << "\", "
558 << "host = \"" << m_host << "\"\n";
559#endif
560
561 const unsigned char c =
562 m_pointer < m_input.end() ? (unsigned char)*m_pointer : '\0';
563
564 if (std::isdigit(c)) {
565 m_buffer.push_back((char)c);
566
567 } else if (c == '\0' || c == '/' || c == '?' || c == '#' ||
568 (url_is_special(m_scheme) && c == '\\')) {
569
570 if (!m_buffer.empty()) {
571 // uint16_t limits port number to max 65535
572 uint16_t port{};
573 long unsigned int port_tmp{};
574
575 // If port is greater than 2^16 − 1 (65535), validation error,
576 // return failure.
577 try {
578 port_tmp = std::stoul(m_buffer);
579 // } catch(std::invalid_argument& e) {} // not catched here
580 } catch (std::out_of_range& e) {
581 std::clog << "Error: " << e.what()
582 << ". Port number out of range." << std::endl;
583 throw;
584 }
585 if (port_tmp > 65535) {
586 throw std::out_of_range(std::string(
587 (std::string)__FILE__ + ":" + std::to_string(__LINE__) +
588 ", Parsing port " + __func__ + ". Error: Port number " +
589 std::to_string(port_tmp) + " is out of range."));
590 }
591 port = (uint16_t)port_tmp; // type cast is checked
592
593 // Set url’s port to null, if port is url’s scheme’s default port;
594 // otherwise to port.
595 auto it = special_scheme.find(m_scheme);
596 if (it != special_scheme.end() && it->second == port) {
597 m_port_num = (uint16_t)NULL;
598 m_port.clear();
599 } else {
600 m_port_num = port;
601 m_port = m_buffer;
602 }
603 m_buffer.clear();
604 }
605
606 m_state = STATE_PATH_START;
607 m_pointer--;
608
609 } else {
610 std::clog << "Error: no valid port found." << std::endl;
611 throw std::invalid_argument("Invalid port: '" + esc_url(m_input) + "'");
612 }
613}
614
615
616void Url::fsm_file() {
617#ifdef DEBUG_URL
618 std::clog << "DEBUG: Being on 'file_state' with \""
619 << std::string_view(m_pointer, m_input.end()) << "\"\n";
620#endif
621 m_state = STATE_NO_STATE;
622}
623
624
625void Url::fsm_path_start() {
626#ifdef DEBUG_URL
627 std::clog << "DEBUG: Being on 'path_start_state' with \""
628 << std::string_view(m_pointer, m_input.end()) << "\"\n";
629#endif
630 m_state = STATE_NO_STATE;
631}
632
633
634void Url::fsm_path() {
635#ifdef DEBUG_URL
636 std::clog << "DEBUG: Being on 'path_state' with \""
637 << std::string_view(m_pointer, m_input.end()) << "\"\n";
638#endif
639 m_state = STATE_NO_STATE;
640}
641
642
643void Url::fsm_opaque_path() {
644#ifdef DEBUG_URL
645 std::clog << "DEBUG: Being on 'opaque_path_state' with \""
646 << std::string_view(m_pointer, m_input.end()) << "\"\n";
647#endif
648 m_state = STATE_NO_STATE;
649}
650
651} // namespace UPnPsdk
Reengineered Object Oriented UPnP+ program code.
Declaration of the 'class Url'. Not usable, work in progess.