UPnPsdk 0.1
Universal Plug and Play +, Software Development Kit
 
Loading...
Searching...
No Matches
uri.cpp
Go to the documentation of this file.
1// Copyright (C) 2025+ GPL 3 and higher by Ingo Höft, <Ingo@Hoeft-online.de>
2// Redistribution only with this Copyright remark. Last modified: 2026-03-14
9#include <UPnPsdk/uri.hpp>
10
11#include <UPnPsdk/synclog.hpp>
12#include <UPnPsdk/sockaddr.hpp>
13#include <UPnPsdk/messages.hpp>
14#include <UPnPsdk/addrinfo.hpp>
15
17#include <regex>
19
20
21namespace UPnPsdk {
22
23namespace {
24
25// Free functions
26// ==============
39bool is_ipv4_addr(const std::string& ip
40) {
41 TRACE("Executing is_ipv4_addr()")
42 std::regex ipv4_pattern(
43 R"(^((25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])$)");
44 return std::regex_match(ip, ipv4_pattern);
45}
46
47
57bool is_dns_name(const std::string& label
58) {
59 // Regular expression to validate DNS label.
60 // Negative Lookbehind: (?<!...) is not supported by C++ STL. I workaround
61 // it. std::regex
62 // pattern("^((?!-)[A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$");
63 TRACE("Executing is_dns_name()")
64 std::regex pattern("^((?!-)[A-Za-z0-9-]{1,63}\\.)+[A-Za-z]{2,6}$");
65 return (std::regex_match(label, pattern) && !label.contains("-.") &&
66 !label.contains("--") && !label.ends_with('-')) ||
67 label == "localhost";
68}
69
70} // anonymous namespace
71
72
73void remove_dot_segments(std::string& a_path) {
74 // The letters (A., B., ...) are the steps as given by the algorithm in
75 // RFC3986_5.2.4.
76 TRACE("Executing UPnPsdk::remove_dot_segments(\"" + a_path + "\")")
77
78 std::string_view path_sv{a_path};
79 std::string output;
80
81 while (!path_sv.empty()) {
82 // A.
83 if (path_sv.substr(0, 3) == "../") {
84 path_sv.remove_prefix(3);
85 } else if (path_sv.substr(0, 2) == "./") {
86 path_sv.remove_prefix(2);
87 }
88 // B.
89 else if (path_sv.substr(0, 3) == "/./") {
90 path_sv.remove_prefix(2);
91 } else if (path_sv == "/.") {
92 path_sv.remove_suffix(1);
93 }
94 // C.
95 else if (path_sv.substr(0, 4) == "/../") {
96 path_sv.remove_prefix(3);
97 if (!output.empty()) {
98 size_t pos;
99 output.erase((pos = output.find_last_of('/')) ==
100 std::string::npos
101 ? 0
102 : pos); // Remove last segment
103 }
104 } else if (path_sv == "/..") {
105 path_sv.remove_suffix(2);
106 if (!output.empty()) {
107 size_t pos;
108 output.erase((pos = output.find_last_of('/')) ==
109 std::string::npos
110 ? 0
111 : pos); // Remove last segment
112 }
113 }
114 // D.
115 else if (path_sv == "." || path_sv == "..") {
116 path_sv = "";
117 }
118 // E.
119 else {
120 size_t start = path_sv.front() == '/' ? 1 : 0;
121 size_t end = path_sv.find_first_of('/', start);
122 if (end == std::string_view::npos)
123 end = path_sv.size();
124 output += path_sv.substr(0, end);
125 path_sv.remove_prefix(end);
126 }
127 }
128
129 a_path = output;
130}
131
132
133void decode_esc_chars(std::string& a_encoded) {
134 TRACE("Executing decode_esc_chars()")
135
136 std::string decoded;
137 int value;
138
139 auto it{a_encoded.begin()};
140 auto it_end{a_encoded.end()};
141 // clang-format off
142 // First check begin and end of the URI for invalid encoded character. With
143 // '%' we must always have three characters.
144 if (!a_encoded.empty() &&
145 ( /*begin*/ (a_encoded.size() < 3 && *it == '%') ||
146 /*end*/ (a_encoded.size() >= 3 && (*(it_end - 2) == '%' || *(it_end - 1) == '%'))))
147 goto exception;
148 // clang-format on
149
150 for (; it < it_end; it++) {
151 if (static_cast<unsigned char>(*it) == '%') {
152 // Check two hex digit characters after '%'.
153 if (!std::isxdigit(static_cast<unsigned char>(*(it + 1))) ||
154 !std::isxdigit(static_cast<unsigned char>(*(it + 2))))
155 goto exception;
156
157 value =
158 stoi(a_encoded.substr(
159 static_cast<size_t>(it - a_encoded.begin()) + 1, 2),
160 nullptr, 16);
161 decoded.push_back(static_cast<char>(value));
162 it += 2; // Skip the next two characters
163 } else {
164 decoded.push_back(*it);
165 }
166 }
167
168 a_encoded = decoded;
169 return;
170
171exception:
172 throw std::invalid_argument(
173 UPnPsdk_LOGEXCEPT(
174 "MSG1159") "URI with invalid percent encoding, failed URI=\"" +
175 a_encoded + "\".\n");
176}
177
178
179// CComponent
180// ==========
182 TRACE2(this, " Executing CComponent::state()")
183 return m_state;
184}
185
186const std::string& CComponent::str() const {
187 TRACE2(this, " Executing CComponent::str()")
188 return m_component;
189}
190
191
192// get_scheme free helper function
193// ==========---------------------
194namespace {
203std::string_view get_scheme(std::string_view a_uri_sv
204) {
205 // A scheme, if any, must begin with a letter, must have alphanum
206 // characters, or '-', or '+', or '.', and ends with ':'.
207 TRACE("Executing get_scheme(a_uri_sv)")
208
209 size_t pos;
210 if ((pos = a_uri_sv.find_first_of(':')) == std::string_view::npos)
211 // No separator found means the scheme is undefined. The URI-reference
212 // may be a relative reference (RFC3986_4.1).
213 return ""; // Valid after return with size 0 (but dangling pointer).
214
215 if (pos == 0)
216 // ':' at the first position means the scheme is empty. This is invalid
217 // but will be checked later. I cannot simply return ':' as string_view
218 // because the string ":" is only valid within this function.
219 return std::string_view(a_uri_sv.data(), 1);
220
221 // Strip the view to have only the scheme with ':'. If pos > size(), the
222 // behavior is undefined. But that is guarded above.
223 a_uri_sv.remove_suffix(a_uri_sv.size() - pos - 1);
224
225 // Check if the scheme has valid character (RFC3986_3.1.).
226 if (!std::isalpha(static_cast<unsigned char>(a_uri_sv.front()))) {
227 // First character is not alpha. This is not a scheme.
228 return ""; // Valid after return with size 0 (but dangling pointer).
229 } else {
230 unsigned char ch;
231 for (auto it{a_uri_sv.begin()}; it < a_uri_sv.end() - 1; it++) {
232 ch = static_cast<unsigned char>(*it);
233 if (!(std::isalnum(ch) || (ch == '-') || (ch == '+') ||
234 (ch == '.'))) {
235 // Invalid character for scheme. This is not a scheme.
236 return ""; // Valid after return with size 0 (but dangling ptr).
237 }
238 }
239 }
240
241 return a_uri_sv;
242}
243} // anonymous namespace
244
245
246// class CScheme
247// =============
248CScheme::CScheme(std::string_view a_uri_sv) {
249 TRACE2(this, " Construct CScheme(a_uri_sv)")
250
251 std::string_view scheme_sv = get_scheme(a_uri_sv);
252
253 if (scheme_sv.empty())
254 // Undefined scheme.
255 return;
256 if (scheme_sv == ":") {
257 // Empty scheme.
259 return;
260 }
261
262 // Normalize scheme to lower case character (RFC3986_6.2.2.1).
263 scheme_sv.remove_suffix(1); // Remove trailing ':'
264 m_component = scheme_sv;
265 for (auto it{m_component.begin()}; it < m_component.end(); it++)
266 *it = static_cast<char>(std::tolower(static_cast<unsigned char>(*it)));
267
269}
270
271
272// get_authority free helper function
273// =============---------------------
274namespace {
291std::string_view
292get_authority(std::string_view a_uriref_sv
293) {
294 // The authority component is preceded by a double slash ("//") and is
295 // terminated by the next slash ('/'), question mark ('?'), or number sign
296 // ('#') character, or by the end of the URI (RFC3986 3.2.).
297 TRACE("Executing get_authority(a_uriref_sv)")
298
299 auto& npos = std::string_view::npos;
300 size_t pos;
301 if ((pos = a_uriref_sv.find("//")) != npos) {
302 // Extract the authority component with its separator "//".
303 a_uriref_sv.remove_prefix(pos);
304 // Find end of the authority and remove the rest to the end.
305 if ((pos = a_uriref_sv.find_first_of("/?#", 2)) != npos)
306 a_uriref_sv.remove_suffix(a_uriref_sv.size() - pos);
307
308 // Here we have the authority string-view with separator "//" and end
309 // separator (one of '/', '?', '#', ''). Having authority only "//"
310 // means authority is empty.
311 return a_uriref_sv;
312 }
313 // No "//" separator found. An empty authority means it is undefined.
314 return ""; // Valid after return with size 0 (but dangling pointer).
315}
316} // anonymous namespace
317
318
319namespace {
320// get_userinfo free helper function
321// ============---------------------
331std::string_view
332get_userinfo(std::string_view a_uriref_sv
333) {
334 // The user information, if present, starts with "//" and is followed by a
335 // commercial at-sign ("@") that delimits it from the host (RFC3986
336 // 3.2.1.). The userinfo may be undefined (no "@" found within the
337 // authority). I also take an empty userinfo into account (first character
338 // of the authority is "@" or ":"). Applications should not render as clear
339 // text any password data after the first colon (:) found within a userinfo
340 // subcomponent unless the data after the colon is the empty string
341 // (indicating no password).
342 TRACE("Executing get_userinfo(a_uriref_sv)")
343
344 std::string_view authority_sv = get_authority(a_uriref_sv);
345
346 if (authority_sv.empty())
347 // Undefined authority.
348 return ""; // Valid after return with size 0 (but dangling pointer).
349
350 authority_sv.remove_prefix(2); // remove authority separator.
351
352 auto& npos = std::string_view::npos;
353 size_t pos;
354 // Check if there is a separator.
355 if ((pos = authority_sv.find_first_of('@')) == npos)
356 // No userinfo sub-component available.
357 return ""; // Valid after return with size 0 (but dangling pointer).
358
359 if (pos == 0 || authority_sv[0] == ':') {
360 // Separator '@' or ':' (for password) at first position means userinfo
361 // is empty. Return string_view to that first character from input
362 // string_view.
363 return std::string_view(authority_sv.data(), 1);
364 }
365
366 // Extract userinfo with trailing separator.
367 authority_sv.remove_suffix(authority_sv.size() - pos - 1);
368
369 // Check special case with clear text password.
370 if ((pos = authority_sv.find_first_of(':')) != npos) {
371 // Here we have found a username with clear text password appended.
372 // Strip deprecated clear text password without ':' (RFC3986 3.3.1.).
373 authority_sv.remove_suffix(authority_sv.size() - pos - 1);
374 }
375
376 authority_sv.remove_suffix(1); // Remove trailing separator.
377 return authority_sv;
378}
379} // anonymous namespace
380
381
382// class CUserinfo
383// ===============
384CUserinfo::CUserinfo(std::string_view a_uriref_sv) {
385 TRACE2(this, " Construct CUserinfo(a_uriref_sv)")
386
387 std::string_view userinfo_sv = get_userinfo(a_uriref_sv);
388
389 if (userinfo_sv.empty())
390 // Undefined userinfo.
391 return;
392 if (userinfo_sv == "@" || userinfo_sv == ":") {
393 // Empty userinfo.
395 return;
396 }
397 m_component = userinfo_sv;
399}
400
401
402namespace {
403// get_host free helper function
404// ========---------------------
418std::string_view
419get_host(std::string_view a_uriref_sv
420) {
421 // The Host information starts with "//" when a userinfo is removed. It
422 // ends with ':' if a port is available, or with the end of the authority,
423 // that is '/', or '?', or '#', or end of uri.
424 TRACE("Executing get_host(a_uriref_sv)")
425
426 std::string_view authority_sv = get_authority(a_uriref_sv);
427
428 if (authority_sv.empty())
429 // Undefined authority.
430 return ""; // Valid after return with size 0 (but dangling pointer).
431
432 // Point to a valid extern '/' character that can later be returned as end
433 // saparator.
434 std::string_view end_separator = authority_sv.substr(0, 1);
435 authority_sv.remove_prefix(2); // remove authority separator.
436
437 auto& npos = std::string_view::npos;
438 size_t pos;
439 // Strip userinfo from authority string if present.
440 if ((pos = authority_sv.find_first_of('@')) != npos)
441 authority_sv.remove_prefix(pos + 1);
442 // Strip port from authority string if present. I have to look for last
443 // occurrance of the port separator. If there is a ']' instead of ':' at
444 // last then it is an IPv6 address with colons for the host without port.
445 if ((pos = authority_sv.find_last_of("]:")) != npos &&
446 authority_sv[pos] == ':')
447 authority_sv.remove_suffix(authority_sv.size() - pos);
448
449 // Here we have the extracted host string.
450 if (authority_sv.empty()) {
451 // An authority has always a non empty host component for "http" scheme,
452 // and may be empty for "file" scheme. That is checked later. Here I
453 // return an empty host.
454 return end_separator;
455 }
456
457 // Check if the host_name is valid.
458 // Check IPv6 address.
459 if (authority_sv.front() == '[') {
460 SSockaddr saObj;
461 try {
462 saObj = authority_sv;
463 } catch (const std::exception&) {
464 goto exception;
465 }
466 }
467 // Check IPv4 address and DNS name. No DNS name resolution is performed.
468 // The syntax rule for host is ambiguous because it does not completely
469 // distinguish between an IPv4 address and a reg-name. In order to
470 // disambiguate the syntax, we apply the "first-match-wins" algorithm:
471 // If host matches the rule for IPv4 address, then it should be
472 // considered an IPv4 address literal and not a reg-name.
473 // (RFC3986_3.2.2.).
474 else {
475 std::string host_str{authority_sv};
476 if (!is_ipv4_addr(host_str) && !is_dns_name(host_str)) {
477 goto exception;
478 }
479 }
480
481 return authority_sv;
482
483exception:
484 throw std::invalid_argument(
485 UPnPsdk_LOGEXCEPT(
486 "MSG1160") "invalid host address or host name on URI=\"" +
487 std::string(a_uriref_sv) + "\".");
488}
489} // anonymous namespace
490
491
492// class CHost
493// ===========
494CHost::CHost(std::string_view a_uriref_sv) {
495 TRACE2(this, " Construct CHost(a_uriref_sv)")
496
497 std::string_view host_sv = get_host(a_uriref_sv);
498
499 if (host_sv.empty())
500 // Undefined host.
501 return;
502 if (host_sv.size() == 1 &&
503 host_sv.find_first_of(":/?#") != std::string_view::npos) {
504 // Empty userinfo.
506 return;
507 }
508 // Normalize host to lower case character (RFC3986_3.2.2.).
509 m_component = host_sv;
510 for (auto it{m_component.begin()}; it < m_component.end(); it++)
511 *it = static_cast<char>(std::tolower(static_cast<unsigned char>(*it)));
512
514}
515
516
517namespace {
518// get_port free helper function
519// ========---------------------
532std::string_view
533get_port(std::string_view a_uriref_sv
534) {
535 // The Port information starts with ':' and ends with the end of the
536 // authority, that is '/', or '?', or '#', or end of uri. Because there may
537 // be also colons as separator for a password, or within an IPv6 address, I
538 // must look at the last occurrence of a colon within the authority
539 // component that is the separator for the port.
540 TRACE("Executing get_port(a_uriref_sv)")
541
542 std::string_view authority_sv = get_authority(a_uriref_sv);
543
544 if (authority_sv.empty())
545 // Undefined authority means also no port.
546 return ""; // Valid after return with size 0 (but dangling pointer).
547
548 authority_sv.remove_prefix(2); // remove authority separator "//".
549
550 auto& npos = std::string_view::npos;
551 size_t pos;
552 // Extract port from authority string if present:
553 // first strip userinfo from authority string if present.
554 if ((pos = authority_sv.find_first_of('@')) != npos)
555 authority_sv.remove_prefix(pos + 1);
556 // I have to look for last occurrance of the port separator. If we find ']'
557 // before ':' at last then it is an IPv6 address with colons for the host
558 // without port.
559 if ((pos = authority_sv.find_last_of("]:")) == npos ||
560 authority_sv[pos] != ':')
561 // No port found. It is undefined.
562 return ""; // Valid after return with size 0 (but dangling pointer).
563
564 // There is at least a ':'. Remove host if any.
565 authority_sv.remove_prefix(pos);
566
567 // Here we have the port string with preceeding separator ':', but it may be
568 // empty.
569 if (authority_sv == ":") {
570 return authority_sv;
571 }
572
573 authority_sv.remove_prefix(1); // Remove preceeding ':'.
574
575 // Check if the port string is valid.
576 if (to_port(authority_sv) != 0)
577 throw std::invalid_argument(
578 UPnPsdk_LOGEXCEPT("MSG1164") "Invalid port number. Failed URI=\"" +
579 std::string(a_uriref_sv) + "\".\n");
580
581 return authority_sv;
582}
583} // anonymous namespace
584
585
586// class CPort
587// ===========
588CPort::CPort(std::string_view a_uriref_sv) {
589 TRACE2(this, " Construct CPort(a_uriref_sv)")
590
591 std::string_view port_sv = get_port(a_uriref_sv);
592
593 if (port_sv.empty())
594 // Undefined host.
595 return;
596 if (port_sv == ":") {
597 // Empty port.
599 return;
600 }
601
602 // Default ports are normalized to undefined ports. Pattern optimized for
603 // minimal calling get_scheme().
604 if (port_sv == "80") {
605 if (get_scheme(a_uriref_sv) == "http:") {
607 return;
608 }
609 } else if (port_sv == "443") {
610 if (get_scheme(a_uriref_sv) == "https:") {
612 return;
613 }
614 }
615
616 m_component = port_sv;
618}
619
620
621// CAuthority
622// ==========
623CAuthority::CAuthority(std::string_view a_uri_sv)
624 : userinfo(a_uri_sv), host(a_uri_sv), port(a_uri_sv) {
625 TRACE2(this, " Construct CAuthority(a_uri_sv)");
626}
627
629 TRACE2(this, " Executing CAuthority::state()")
630 if (this->host.state() == STATE::avail ||
631 this->userinfo.state() == STATE::avail ||
632 this->port.state() == STATE::avail)
633 return STATE::avail;
634
635 if (this->host.state() == STATE::undef &&
636 this->userinfo.state() == STATE::undef &&
637 this->port.state() == STATE::undef)
638 return STATE::undef;
639
640 return STATE::empty;
641}
642
643std::string CAuthority::str() const {
644 TRACE2(this, " Executing CAuthority::str()")
645 return //
646 (this->userinfo.state() == STATE::avail ? this->userinfo.str() : "") +
647 (this->userinfo.state() == STATE::avail ? "@" : "") +
648 (this->host.state() == STATE::avail ? this->host.str() : "") +
649 (this->port.state() == STATE::avail ? ":" : "") +
650 (this->port.state() == STATE::avail ? this->port.str() : "");
651}
652
653
654namespace {
655// get_path free helper function
656// ========---------------------
668std::string_view
669get_path(std::string_view a_uriref_sv
670) {
671 // A path is always defined for a URI, though the defined path may be empty
672 // (zero length) (RFC3986_3.3.). To get the path I strip all other
673 // components from the URI reference.
674 // If a URI contains an authority component, then the path component must
675 // either be empty or begin with a slash ("/") character. The path is
676 // terminated by the first question mark ("?") or number sign ("#")
677 // character, or by the end of the URI (RFC3986_3.3.). Schemes "http", and
678 // "https" must always have an authority component (RFC7230_2.7.1.).
679 // In addition, a URI reference may be a relative-path reference, in which
680 // case the first path segment cannot contain a colon (":") character
681 // (RFC3986_3.3.).
682 TRACE("Executing get_path(a_uriref_sv)")
683
684 auto& npos = std::string_view::npos;
685 size_t pos;
686
687 // Remove possible query and/or fragment. Scheme, authority and path does
688 // not contain '?' or '#'.
689 if ((pos = a_uriref_sv.find_first_of("?#")) != npos)
690 a_uriref_sv.remove_suffix(a_uriref_sv.size() - pos);
691
692 // Remove possible scheme.
693 if ((pos = get_scheme(a_uriref_sv).size()) != 0)
694 a_uriref_sv.remove_prefix(pos);
695
696 // Remove possible authority.
697 if ((pos = get_authority(a_uriref_sv).size()) != 0) {
698 a_uriref_sv.remove_prefix(pos);
699 }
700
701 // a_uriref_sv can also be empty that means an empty path. The path cannot
702 // be undefined by definition (RFC3986 3.3.).
703 return a_uriref_sv;
704}
705} // anonymous namespace
706
707
708// class CPath
709// ===========
710CPath::CPath(std::string_view a_uriref_sv) {
711 TRACE2(this, " Construct CPath(a_uriref_sv)")
712
713 std::string_view path_sv = get_path(a_uriref_sv);
714
715 // The path cannot be undefined by definition (RFC3986 3.3.).
716 if (path_sv.empty()) {
717 // Empty path.
719 return;
720 }
721
722 m_component = path_sv;
724}
725
726
728 // Normalize by removing dot segments in place.
730}
731
732
733namespace {
734// get_query free helper function
735// =========---------------------
746std::string_view
747get_query(std::string_view a_uriref_sv
748) {
749 // The query component is indicated by the first question mark ("?")
750 // character and terminated by a number sign ("#") character or by the end
751 // of the URI. The characters slash ("/") and question mark ("?") may
752 // represent data within the query component (RFC3986_3.4.) and within the
753 // fragment identifier (RFC3986_3.5.).
754 TRACE("Executing get_query(a_uriref_sv)")
755
756 // Find begin of the query component.
757 auto& npos = std::string_view::npos;
758 size_t pos;
759 if ((pos = a_uriref_sv.find_first_of("?#")) == npos ||
760 a_uriref_sv[pos] == '#')
761 // No query component found. Leave it undefined.
762 return ""; // Valid after return with size 0 (but dangling pointer).
763
764 // There is a '?'. Strip all before the query component.
765 a_uriref_sv.remove_prefix(pos);
766 // Strip all behind the query component.
767 if ((pos = a_uriref_sv.find_first_of('#')) != npos)
768 a_uriref_sv.remove_suffix(a_uriref_sv.size() - pos);
769
770 if (a_uriref_sv == "?")
771 // Only '?' means the query is empty.
772 return a_uriref_sv;
773
774 // Return query without separator.
775 a_uriref_sv.remove_prefix(1);
776 return a_uriref_sv;
777}
778} // anonymous namespace
779
780
781// class CQuery
782// ============
783CQuery::CQuery(std::string_view a_uriref_sv) {
784 TRACE2(this, " Construct CQuery(a_uriref_sv)")
785
786 std::string_view query_sv = get_query(a_uriref_sv);
787
788 if (query_sv.empty())
789 // Undefined query.
790 return;
791 if (query_sv == "?") {
792 // Empty query.
794 return;
795 }
796
797 m_component = query_sv;
799}
800
801
802namespace {
803// get_fragment free helper function
804// ============---------------------
815std::string_view
816get_fragment(std::string_view a_uriref_sv
817) {
818 // A fragment identifier component is indicated by the presence of a number
819 // sign ("#") character and terminated by the end of the URI (RFC3986_3.5.).
820 TRACE("Executing get_fragment(a_uriref_sv)")
821
822 // Find begin of the fragment component.
823 auto& npos = std::string_view::npos;
824 size_t pos;
825 if ((pos = a_uriref_sv.find_first_of('#')) == npos)
826 // No fragment component found. Leave it undefined.
827 return ""; // Valid after return with size 0 (but dangling pointer).
828
829 // Strip all before the fragment component.
830 a_uriref_sv.remove_prefix(pos);
831
832 if (a_uriref_sv == "#")
833 // Only '#' means the fragment is empty.
834 return a_uriref_sv;
835
836 // Return fragment without separator.
837 a_uriref_sv.remove_prefix(1);
838 return a_uriref_sv;
839}
840} // anonymous namespace
841
842
843// Class CFragment
844// ===============
845CFragment::CFragment(std::string_view a_uriref_sv) {
846 TRACE2(this, " Construct CFragment(a_uriref_sv)")
847
848 std::string_view fragment_sv = get_fragment(a_uriref_sv);
849
850 if (fragment_sv.empty())
851 // Undefined fragment.
852 return;
853 if (fragment_sv == "#") {
854 // Empty fragment.
856 return;
857 }
858
859 m_component = fragment_sv;
861}
862
863
864// Class CPrepUriStr
865// =================
866CPrepUriStr::CPrepUriStr(std::string& a_uriref_str) {
867 TRACE2(this, " Construct CPrepUriStr(a_uriref_str)")
868
869 auto it{a_uriref_str.begin()};
870 auto it_end{a_uriref_str.end()};
871 // clang-format off
872 // First check begin and end of the URI for invalid encoded character.
873 if (!a_uriref_str.empty() &&
874 ( /*begin*/ (a_uriref_str.size() < 3 && *it == '%') ||
875 /*end*/ (a_uriref_str.size() >= 3 && (*(it_end - 2) == '%' || *(it_end - 1) == '%'))))
876 goto exception;
877 // clang-format on
878
879 // Then parse the URI to set upper case hex digits.
880 for (; it < it_end; it++) {
881 if (static_cast<unsigned char>(*it) == '%') {
882 it++;
883 if (!std::isxdigit(static_cast<unsigned char>(*it)))
884 goto exception;
885 *it = static_cast<char>(
886 std::toupper(static_cast<unsigned char>(*it)));
887 it++;
888 if (!std::isxdigit(static_cast<unsigned char>(*it)))
889 goto exception;
890 *it = static_cast<char>(
891 std::toupper(static_cast<unsigned char>(*it)));
892 }
893 }
894 return;
895
896exception:
897 throw std::invalid_argument(
898 UPnPsdk_LOGEXCEPT(
899 "MSG1165") "URI with invalid percent encoding, failed URI=\"" +
900 a_uriref_str + "\".\n");
901}
902
903
904// Class CUriRef
905// =============
906CUriRef::CUriRef(std::string a_uriref_str)
907 : prepare_uri_str(a_uriref_str), scheme(a_uriref_str),
908 authority(a_uriref_str), path(a_uriref_str), query(a_uriref_str),
909 fragment(a_uriref_str) {
910 TRACE2(this, " Construct CUriRef(a_uriref_str)");
911
912 if (this->scheme.state() == STATE::undef)
913 // It's a relative refefence. The constructor checks only a base URI.
914 return;
915
916 if (this->scheme.str() == "http" || this->scheme.str() == "https") {
917 if (this->authority.host.state() != STATE::avail)
918 throw std::invalid_argument(
919 UPnPsdk_LOGEXCEPT("MSG1168") "Scheme \"" + this->scheme.str() +
920 "\" must have a host. Invalid URI=\"" + a_uriref_str + "\"\n");
921
922 } else if (this->scheme.str() == "file") {
923 // Due to RFC8089 2. Syntax:
924 // 0. file relative URI reference, not a file URI
925 // 1. file: invalid
926 // 1. file:// invalid
927 // 2. file:///
928 // 3. file:///path/to/file
929 // 4. file://localhost/
930 // 5. file://localhost/path/to/file
931 // 6. file://a.aa/
932 // 7. file://a.aa/path/to/file
933 // 8. file:/
934 // 9. file:/path/to/file
935 if (this->authority.userinfo.state() == STATE::undef &&
936 this->authority.port.state() == STATE::undef &&
937 this->path.state() != STATE::empty &&
938 this->query.state() == STATE::undef &&
939 this->fragment.state() == STATE::undef) {
940
941 return;
942 }
943 throw std::invalid_argument(
944 UPnPsdk_LOGEXCEPT("MSG1169") "Invalid URI=\"" +
945 std::string(a_uriref_str) + "\"\n");
946 }
947}
948
949// CUriRef Getter
951 TRACE2(this, " Executing CUriRf::state()")
952 using STATE = STATE;
953 if (this->scheme.state() == STATE::avail ||
954 this->authority.state() == STATE::avail ||
955 this->path.state() == STATE::avail ||
956 this->query.state() == STATE::avail ||
957 this->fragment.state() == STATE::avail)
958 return STATE::avail;
959
960 if (this->scheme.state() == STATE::undef &&
961 this->authority.state() == STATE::undef &&
962 this->path.state() == STATE::undef &&
963 this->query.state() == STATE::undef &&
964 this->fragment.state() == STATE::undef)
965 return STATE::undef;
966
967 return STATE::empty;
968}
969
970std::string CUriRef::str() const {
971 TRACE2(this, " Executing CUriRef::str()")
972
973 // This rule always appends a '/' to an authority, even the path is empty.
974 // For example, the URI "http://example.com/" is the normal form for the
975 // "http" scheme, as specified by RFC3986 6.2.3.
976 return (scheme.state() == STATE::avail ? scheme.str() : "") +
977 (scheme.state() == STATE::undef ? "" : ":") +
978 (authority.state() == STATE::undef ? "" : "//") +
979 (authority.state() == STATE::avail ? authority.str() : "") +
980 (authority.state() == STATE::undef
981 ? ""
982 : (path.state() != STATE::empty && path.str().front() == '/'
983 ? ""
984 : "/")) +
985 (path.state() == STATE::avail ? path.str() : "") +
986 (query.state() == STATE::undef ? "" : "?") +
987 (query.state() == STATE::avail ? query.str() : "") +
988 (fragment.state() == STATE::undef ? "" : "#") +
989 (fragment.state() == STATE::avail ? fragment.str() : "");
990}
991
992
993// CUri
994// ==========
995namespace {
1001void merge_paths(CPath& a_path,
1002 const CUriRef& a_base,
1003 const CUriRef& a_rel
1004) {
1005 // For an overview of the following algorithm have a look to the description
1006 // at RFC3986_5.2.3.
1007 std::string path_str;
1008 if (a_base.authority.state() != CComponent::STATE::undef &&
1009 a_base.path.state() == CComponent::STATE::empty) {
1010 path_str = '/' + a_rel.path.str();
1011 } else {
1012 size_t pos;
1013 if ((pos = a_base.path.str().find_last_of('/')) == std::string::npos)
1014 path_str.clear();
1015 else
1016 path_str = a_base.path.str().substr(0, pos);
1017 path_str += '/' + a_rel.path.str();
1018 }
1019 a_path = CPath(path_str);
1020}
1021} // anonymous namespace
1022
1023
1024CUri::CUri(std::string a_uriref_str) : base(a_uriref_str), target("") {
1025 TRACE2(this, " Construct CUri(a_uriref_str)")
1026
1028 // It's a relative resource refefence. The constructor accepts only a
1029 // base URI.
1030 throw std::invalid_argument(
1031 UPnPsdk_LOGEXCEPT(
1032 "MSG1170") "Only base URI accepted. Invalid URI=\"" +
1033 a_uriref_str + "\"\n");
1034}
1035
1036
1037void CUri::operator=(std::string a_relref_str) {
1038 TRACE2(this, " Executing CUri::operator=(a_relref_str)")
1039 // Get a working object with splitted components of the URI input string.
1040 CUriRef relObj(a_relref_str);
1041
1042 if (relObj.scheme.state() != CComponent::STATE::undef)
1043 // It's not a relative refefence. Only that is accepted.
1044 throw std::invalid_argument(
1045 UPnPsdk_LOGEXCEPT(
1046 "MSG1171") "Only relative reference accepted. Invalid URI=\"" +
1047 a_relref_str + "\"\n");
1048
1049 // Transform References
1050 // --------------------
1051 // For an overview of the following algorithm have a look to the pseudo code
1052 // at RFC3986_5.2.2.
1053 auto& baseObj = this->base;
1054 auto& targetObj = this->target;
1055
1056 if (relObj.scheme.state() != CComponent::STATE::undef) {
1057 targetObj.scheme = relObj.scheme;
1058 targetObj.authority = relObj.authority;
1059 targetObj.path = relObj.path;
1060 targetObj.path.remove_dot_segments();
1061 targetObj.query = relObj.query;
1062 } else {
1063 if (relObj.authority.state() != CComponent::STATE::undef) {
1064 targetObj.authority = relObj.authority;
1065 targetObj.path = relObj.path;
1066 targetObj.path.remove_dot_segments();
1067 targetObj.query = relObj.query;
1068 } else {
1069 if (relObj.path.state() == CComponent::STATE::empty) {
1070 targetObj.path = baseObj.path;
1071 targetObj.path.remove_dot_segments();
1072 if (relObj.query.state() != CComponent::STATE::undef)
1073 targetObj.query = relObj.query;
1074 else
1075 targetObj.query = baseObj.query;
1076 } else {
1077 if (relObj.path.str().front() == '/') {
1078 targetObj.path = relObj.path;
1079 targetObj.path.remove_dot_segments();
1080 } else {
1081 merge_paths(targetObj.path, baseObj, relObj);
1082 targetObj.path.remove_dot_segments();
1083 }
1084 targetObj.query = relObj.query;
1085 }
1086 targetObj.authority = baseObj.authority;
1087 }
1088 targetObj.scheme = baseObj.scheme;
1089 }
1090 targetObj.fragment = relObj.fragment;
1091}
1092
1093
1095 if (this->target.state() != CComponent::STATE::avail)
1096 return this->base.state();
1097 else
1098 return this->target.state();
1099}
1100
1101std::string CUri::str() const {
1102 if (this->target.state() != CComponent::STATE::avail)
1103 return this->base.str();
1104 else
1105 return this->target.str();
1106}
1107
1108} // namespace UPnPsdk
1109
1110
1111int parse_uri(const char* in, size_t max, uri_type* out) {
1112 // The CUriRef class coppies its input URI reference string to the stack to
1113 // be more thread safe. So it cannot provide pointer to the external source.
1114 // For that I use the "get component" free functions.
1115 using STATE = UPnPsdk::CComponent::STATE;
1116 std::string_view uriref_sv = std::string_view(in, max);
1117 // std::cerr << "DEBUG: parse_uri: uriref_sv=\"" << uriref_sv << "\"\n";
1118
1119 try {
1120 UPnPsdk::CUriRef uriObj{std::string(uriref_sv)};
1121
1122#if 0 // Helpful for developers for deep analysis.
1123 std::cerr << "DEBUG: scheme=\"" << uriObj.scheme.str() << "\"\n";
1124 std::cerr << "DEBUG: authority=\"" << uriObj.authority.str() << "\"\n";
1125 std::cerr << "DEBUG: path=\"" << uriObj.path.str() << "\"\n";
1126 std::cerr << "DEBUG: query=\"" << uriObj.query.str() << "\"\n";
1127 std::cerr << "DEBUG: fragment=\"" << uriObj.fragment.str() << "\"\n";
1128#endif
1129 ::memset(out, 0, sizeof(*out));
1130
1131 // 'out->type' and 'out->path_type'
1132 if (uriObj.scheme.state() == STATE::avail) {
1133 out->type = uriType::Absolute;
1135 } else {
1136 out->type = uriType::Relative;
1138 }
1139 // Correct 'out->path_type' if absolute path is available.
1140 if (uriObj.path.state() == STATE::avail &&
1141 uriObj.path.str().front() == '/')
1143
1144 // out->scheme
1145 if (uriObj.scheme.state() == STATE::avail) {
1146 std::string_view scheme_sv = UPnPsdk::get_scheme(uriref_sv);
1147 scheme_sv.remove_suffix(1); // Remove trailing ':'
1148 out->scheme.buff = scheme_sv.data();
1149 out->scheme.size = scheme_sv.size();
1150 }
1151
1152 // out->hostport
1153 if (uriObj.authority.host.state() == STATE::avail) {
1154 // Get pointer to host component on the input URI string and store
1155 // it to 'out'.
1156 std::string_view host_sv = UPnPsdk::get_host(uriref_sv);
1157 out->hostport.text.buff = host_sv.data();
1158
1159 // Calculate size of the host+port string and store it to 'out'. If
1160 // there is no port available we must only store the size of the
1161 // host string, otherwise we need the sum of host and port sizes
1162 // plus 1 for the host:port separator ':'.
1163 if (uriObj.authority.port.state() == STATE::avail)
1164 out->hostport.text.size = uriObj.authority.host.str().size() +
1165 1 +
1166 uriObj.authority.port.str().size();
1167 else
1168 out->hostport.text.size = uriObj.authority.host.str().size();
1169
1170 // Calculate the port to be used to get the remote host network
1171 // address. If it is available then that is used; if not, then the
1172 // defaults for "https" and "http" are used, or the unspecified one
1173 // (port=0).
1174 std::string port_str;
1175 if (uriObj.authority.port.state() == STATE::avail) {
1176 port_str = uriObj.authority.port.str();
1177 } else {
1178 if (uriObj.scheme.state() == STATE::avail) {
1179 if (uriObj.scheme.str() == "https")
1180 port_str = "443";
1181 else if (uriObj.scheme.str() == "http")
1182 port_str = "80";
1183 }
1184 }
1185 // Get the network address. If necessary with DNS lookup.
1186 UPnPsdk::CAddrinfo ai(uriObj.authority.host.str(), port_str);
1187 if (!ai.get_first())
1188 throw std::invalid_argument(
1189 UPnPsdk_LOGEXCEPT(
1190 "MSG1155") "Host not found. Failed URI=\"" +
1191 std::string(uriref_sv) + "\"\n");
1192 // Store the address to 'out'.
1193 out->hostport.IPaddress =
1194 *reinterpret_cast<sockaddr_storage*>(ai->ai_addr);
1195 }
1196
1197 // out->pathquery
1198 if (uriObj.path.state() == STATE::avail) {
1199 std::string_view path_sv = UPnPsdk::get_path(uriref_sv);
1200 out->pathquery.buff = path_sv.data();
1201 if (uriObj.query.state() == STATE::avail) {
1202 std::string_view query_sv = UPnPsdk::get_query(uriref_sv);
1203 out->pathquery.size = path_sv.size() + 1 + query_sv.size();
1204 } else {
1205 out->pathquery.size = path_sv.size();
1206 }
1207 }
1208
1209 // out->fragment
1210 if (uriObj.fragment.state() == STATE::avail) {
1211 std::string_view fragment_sv = UPnPsdk::get_fragment(uriref_sv);
1212 out->fragment.buff = fragment_sv.data();
1213 out->fragment.size = fragment_sv.size();
1214 }
1215
1216 return HTTP_SUCCESS;
1217
1218 } catch (const std::invalid_argument& ex) {
1219 UPnPsdk_LOGCATCH("MSG1046") "Catched next line...\n"
1220 << ex.what() << '\n';
1221 return UPNP_E_INVALID_URL;
1222 }
1223}
Manage Uniform Resource Identifier (URI) as specified with RFC 3986.
token fragment
Member variable.
Definition uri.hpp:71
token scheme
Member variable.
Definition uri.hpp:68
@ Relative
The URI is relative, means it hasn't a 'scheme' component.
@ Absolute
The URI is absolute, means it has a 'scheme' component.
hostport_type hostport
Member variable.
Definition uri.hpp:72
size_t size
Size of the buffer.
Definition uri.hpp:50
@ ABS_PATH
The 'path' component begins with a '/'.
@ REL_PATH
The 'path' component doesn't begin with a '/'.
pathType path_type
Member variable.
Definition uri.hpp:69
const char * buff
Buffer.
Definition uri.hpp:49
token pathquery
Member variable.
Definition uri.hpp:70
sockaddr_storage IPaddress
Network socket address.
Definition uri.hpp:58
constexpr int HTTP_SUCCESS
Yet another success code.
Definition uri.hpp:21
uriType type
Member variable.
Definition uri.hpp:67
token text
Pointing to the full host:port string representation.
Definition uri.hpp:57
Represents a URI used in parse_uri and elsewhere.
Definition uri.hpp:64
Declaration of the Addrinfo class.
Get information from the operating system about an internet address.
Definition addrinfo.hpp:55
bool get_first()
Get the first entry of an address info from the operating system.
Definition addrinfo.cpp:156
std::string str() const
Get the string of the authority component.
Definition uri.cpp:643
CAuthority(std::string_view a_uri_sv)
Initialize the authority component.
Definition uri.cpp:623
CUserinfo userinfo
Definition uri.hpp:276
CComponent::STATE state() const
Get state of the authority.
Definition uri.cpp:628
std::string m_component
Name of the component.
Definition uri.hpp:174
STATE state() const
Get state of the component.
Definition uri.cpp:181
STATE
Defines the possible states of a URI component.
Definition uri.hpp:152
@ avail
Component string is available, means it has a valid content.
@ empty
Component is defined but empty, component string is empty.
@ undef
component is undefined, component string is empty.
STATE m_state
Current state of the component.
Definition uri.hpp:169
const std::string & str() const
Get the string of the component.
Definition uri.cpp:186
CFragment(std::string_view a_uri_sv)
Initialize the fragment component.
Definition uri.cpp:845
CHost(std::string_view a_uri_sv)
Initialize the host subcomponent.
Definition uri.cpp:494
Path component of a URI reference.
Definition uri.hpp:306
CPath(std::string_view a_uri_sv)
Initialize the path component.
Definition uri.cpp:710
void remove_dot_segments()
UPnPsdk::remove_dot_segments() from path component.
Definition uri.cpp:727
CPort(std::string_view a_uri_sv)
Initialize the port subcomponent.
Definition uri.cpp:588
CQuery(std::string_view a_uri_sv)
Initialize the query component.
Definition uri.cpp:783
CScheme(std::string_view a_uri_sv)
Initialize the scheme component.
Definition uri.cpp:248
This is a URI reference.
Definition uri.hpp:375
CAuthority authority
Definition uri.hpp:390
CFragment fragment
Definition uri.hpp:393
CScheme scheme
Definition uri.hpp:389
CComponent::STATE state() const
Get state of the URI reference.
Definition uri.cpp:950
std::string str() const
Get URI reference string.
Definition uri.cpp:970
CQuery query
Definition uri.hpp:392
CUriRef(std::string a_uriref_str)
Initialize the URI reference.
Definition uri.cpp:906
CUriRef target
Resulting URI of merged relative reference to the base URI.
Definition uri.hpp:448
void operator=(std::string a_relref_str)
Set a relative resource reference.
Definition uri.cpp:1037
CUriRef base
Base URI.
Definition uri.hpp:446
CUri(std::string a_uriref_str)
Initialize with the base URI.
Definition uri.cpp:1024
CComponent::STATE state() const
Get state of the URI.
Definition uri.cpp:1094
std::string str() const
Get the resulting URI string merged with the relative reference.
Definition uri.cpp:1101
CUserinfo(std::string_view a_uri_sv)
Initialize the userinfo subcomponent.
Definition uri.cpp:384
int to_port(std::string_view a_port_str, in_port_t *const a_port_num=nullptr) noexcept
Free function to check if a string represents a valid port number.
Definition sockaddr.cpp:89
int parse_uri(const char *in, size_t max, uri_type *out)
Parses a uri as defined in RFC 3986 (Uniform Resource Identifier).
Definition uri.cpp:1111
UPnPsdk_VIS void remove_dot_segments(std::string &a_path)
Remove dot segments from a path.
Definition uri.cpp:73
std::string_view get_authority(std::string_view a_uriref_sv)
Separates the authority component from a URI reference.
Definition uri.cpp:292
CPrepUriStr(std::string &a_uriref_str)
Initialize the helper class.
Definition uri.cpp:866
std::string_view get_path(std::string_view a_uriref_sv)
Separates the path component from a URI reference.
Definition uri.cpp:669
std::string_view get_scheme(std::string_view a_uri_sv)
Separates the scheme component from a URI.
Definition uri.cpp:203
std::string_view get_port(std::string_view a_uriref_sv)
Separates the authority port subcomponent from a URI reference.
Definition uri.cpp:533
bool is_ipv4_addr(const std::string &ip)
Check if string is a valid IPv4 address.
Definition uri.cpp:39
std::string_view get_query(std::string_view a_uriref_sv)
Separates the query component from a URI reference.
Definition uri.cpp:747
std::string_view get_fragment(std::string_view a_uriref_sv)
Separates the fragment component from a URI reference.
Definition uri.cpp:816
std::string_view get_userinfo(std::string_view a_uriref_sv)
Separates the authority userinfo subcomponent from a URI reference.
Definition uri.cpp:332
std::string_view get_host(std::string_view a_uriref_sv)
Separates the authority host subcomponent from a URI reference.
Definition uri.cpp:419
void merge_paths(CPath &a_path, const CUriRef &a_base, const CUriRef &a_rel)
Merge a relative reference to a base URI.
Definition uri.cpp:1001
bool is_dns_name(const std::string &label)
Check if a string conforms to a DNS name.
Definition uri.cpp:57
Definition of the UPNP_E_* error messages.
#define UPNP_E_INVALID_URL
An URL passed into the function is invalid.
Definition messages.hpp:83
Reengineered Object Oriented UPnP+ program code.
UPnPsdk_VIS void decode_esc_chars(std::string &a_encoded)
Replaces http percent encoded characters with their character representation.
Definition uri.cpp:133
Declaration of the Sockaddr class and some free helper functions.
Trivial ::sockaddr structures enhanced with methods.
Definition sockaddr.hpp:133
Define macro for synced logging to the console for detailed info and debug.