diff options
Diffstat (limited to 'zen/http.cpp')
-rw-r--r-- | zen/http.cpp | 165 |
1 files changed, 130 insertions, 35 deletions
diff --git a/zen/http.cpp b/zen/http.cpp index 4f2c5205..8cd99d7a 100644 --- a/zen/http.cpp +++ b/zen/http.cpp @@ -18,15 +18,19 @@ class HttpInputStream::Impl { public: Impl(const Zstring& url, - const std::vector<std::pair<std::string, std::string>>* postParams, //issue POST if bound, GET otherwise + const std::string* postBuf /*issue POST if bound, GET otherwise*/, + const Zstring& contentType, //required for POST bool disableGetCache /*not relevant for POST (= never cached)*/, const Zstring& userAgent, const Zstring* caCertFilePath /*optional: enable certificate validation*/, - const IOCallback& notifyUnbufferedIO) : //throw SysError + const IOCallback& notifyUnbufferedIO) : //throw SysError, X notifyUnbufferedIO_(notifyUnbufferedIO) { ZEN_ON_SCOPE_FAIL(cleanup(); /*destructor call would lead to member double clean-up!!!*/); + //may be sending large POST: call back first + if (notifyUnbufferedIO_) notifyUnbufferedIO_(0); //throw X + const Zstring urlFmt = afterFirst(url, Zstr("://"), IF_MISSING_RETURN_NONE); const Zstring server = beforeFirst(urlFmt, Zstr('/'), IF_MISSING_RETURN_ALL); const Zstring page = Zstr('/') + afterFirst(urlFmt, Zstr('/'), IF_MISSING_RETURN_NONE); @@ -40,6 +44,13 @@ public: throw SysError(L"URL uses unexpected protocol."); }(); + assert(postBuf || contentType.empty()); + + std::map<std::string, std::string, LessAsciiNoCase> headers; + + if (postBuf && !contentType.empty()) + headers["Content-Type"] = utfTo<std::string>(contentType); + if (useTls) //HTTP default port: 443, see %WINDIR%\system32\drivers\etc\services { socket_ = std::make_unique<Socket>(server, Zstr("https")); //throw SysError @@ -49,27 +60,23 @@ public: socket_ = std::make_unique<Socket>(server, Zstr("http")); //throw SysError //we don't support "chunked and gzip transfer encoding" => HTTP 1.0 - std::map<std::string, std::string, LessAsciiNoCase> headers; headers["Host" ] = utfTo<std::string>(server); //only required for HTTP/1.1 but a few servers expect it even for HTTP/1.0 headers["User-Agent"] = utfTo<std::string>(userAgent); headers["Accept" ] = "*/*"; //won't hurt? - const std::string postBuf = postParams ? xWwwFormUrlEncode(*postParams) : ""; - - if (!postParams /*HTTP GET*/ && disableGetCache) + if (!postBuf /*HTTP GET*/ && disableGetCache) headers["Pragma"] = "no-cache"; //HTTP 1.0 only! superseeded by "Cache-Control" - else //HTTP POST - { - headers["Content-Type"] = "application/x-www-form-urlencoded"; - headers["Content-Length"] = numberTo<std::string>(postBuf.size()); - } + + if (postBuf) + headers["Content-Length"] = numberTo<std::string>(postBuf->size()); //https://www.w3.org/Protocols/HTTP/1.0/spec.html#Request-Line - std::string msg = (postParams ? "POST " : "GET ") + utfTo<std::string>(page) + " HTTP/1.0\r\n"; + std::string msg = (postBuf ? "POST " : "GET ") + utfTo<std::string>(page) + " HTTP/1.0\r\n"; for (const auto& [name, value] : headers) msg += name + ": " + value + "\r\n"; msg += "\r\n"; - msg += postBuf; + if (postBuf) + msg += *postBuf; //send request for (size_t bytesToSend = msg.size(); bytesToSend > 0;) @@ -121,6 +128,9 @@ public: //try to get "Content-Length" header if available if (const std::string* value = getHeader("Content-Length")) contentRemaining_ = stringTo<int64_t>(*value) - (bufPosEnd_ - bufPos_); + + //let's not get too finicky: at least report the logical amount of bytes sent/received (excluding HTTP headers) + if (notifyUnbufferedIO_) notifyUnbufferedIO_(postBuf ? postBuf->size() : 0); //throw X } ~Impl() { cleanup(); } @@ -225,16 +235,17 @@ std::string HttpInputStream::readAll() { return bufferedLoad<std::string>(*pimpl namespace { std::unique_ptr<HttpInputStream::Impl> sendHttpRequestImpl(const Zstring& url, - const std::vector<std::pair<std::string, std::string>>* postParams /*issue POST if bound, GET otherwise*/, + const std::string* postBuf /*issue POST if bound, GET otherwise*/, + const Zstring& contentType, //required for POST const Zstring& userAgent, const Zstring* caCertFilePath /*optional: enable certificate validation*/, - const IOCallback& notifyUnbufferedIO) //throw SysError + const IOCallback& notifyUnbufferedIO) //throw SysError, X { Zstring urlRed = url; //"A user agent should not automatically redirect a request more than five times, since such redirections usually indicate an infinite loop." for (int redirects = 0; redirects < 6; ++redirects) { - auto response = std::make_unique<HttpInputStream::Impl>(urlRed, postParams, false /*disableGetCache*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError + auto response = std::make_unique<HttpInputStream::Impl>(urlRed, postBuf, contentType, false /*disableGetCache*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X //https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#3xx_Redirection const int httpStatusCode = response->getStatusCode(); @@ -258,48 +269,48 @@ std::unique_ptr<HttpInputStream::Impl> sendHttpRequestImpl(const Zstring& url, } -//encode into "application/x-www-form-urlencoded" +//encode for "application/x-www-form-urlencoded" std::string urlencode(const std::string& str) { - std::string out; - for (const char c : str) //follow PHP spec: https://github.com/php/php-src/blob/master/ext/standard/url.c#L500 + std::string output; + for (const char c : str) //follow PHP spec: https://github.com/php/php-src/blob/e99d5d39239c611e1e7304e79e88545c4e71a073/ext/standard/url.c#L455 if (c == ' ') - out += '+'; + output += '+'; else if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '-' || c == '.' || c == '_') //note: "~" is encoded by PHP! - out += c; + output += c; else { const auto [high, low] = hexify(c); - out += '%'; - out += high; - out += low; + output += '%'; + output += high; + output += low; } - return out; + return output; } std::string urldecode(const std::string& str) { - std::string out; + std::string output; for (size_t i = 0; i < str.size(); ++i) { const char c = str[i]; if (c == '+') - out += ' '; + output += ' '; else if (c == '%' && str.size() - i >= 3 && isHexDigit(str[i + 1]) && isHexDigit(str[i + 2])) { - out += unhexify(str[i + 1], str[i + 2]); + output += unhexify(str[i + 1], str[i + 2]); i += 2; } else - out += c; + output += c; } - return out; + return output; } } @@ -327,16 +338,24 @@ std::vector<std::pair<std::string, std::string>> zen::xWwwFormUrlDecode(const st } +HttpInputStream zen::sendHttpGet(const Zstring& url, const Zstring& userAgent, const Zstring* caCertFilePath, const IOCallback& notifyUnbufferedIO) //throw SysError, X +{ + return sendHttpRequestImpl(url, nullptr /*postBuf*/, Zstr("") /*contentType*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X, X +} + + HttpInputStream zen::sendHttpPost(const Zstring& url, const std::vector<std::pair<std::string, std::string>>& postParams, - const Zstring& userAgent, const Zstring* caCertFilePath, const IOCallback& notifyUnbufferedIO) //throw SysError + const Zstring& userAgent, const Zstring* caCertFilePath, const IOCallback& notifyUnbufferedIO) //throw SysError, X { - return sendHttpRequestImpl(url, &postParams, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError + return sendHttpPost(url, xWwwFormUrlEncode(postParams), Zstr("application/x-www-form-urlencoded"), userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X } -HttpInputStream zen::sendHttpGet(const Zstring& url, const Zstring& userAgent, const Zstring* caCertFilePath, const IOCallback& notifyUnbufferedIO) //throw SysError + +HttpInputStream zen::sendHttpPost(const Zstring& url, const std::string& postBuf, const Zstring& contentType, + const Zstring& userAgent, const Zstring* caCertFilePath, const IOCallback& notifyUnbufferedIO) //throw SysError, X { - return sendHttpRequestImpl(url, nullptr /*postParams*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError + return sendHttpRequestImpl(url, &postBuf, contentType, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X } @@ -346,6 +365,7 @@ bool zen::internetIsAlive() //noexcept { auto response = std::make_unique<HttpInputStream::Impl>(Zstr("http://www.google.com/"), nullptr /*postParams*/, + Zstr("") /*contentType*/, true /*disableGetCache*/, Zstr("FreeFileSync"), nullptr /*caCertFilePath*/, @@ -439,4 +459,79 @@ std::wstring zen::formatHttpStatusCode(int sc) return trimCpy(replaceCpy<std::wstring>(L"HTTP status %x.", L"%x", numberTo<std::wstring>(sc))); else return trimCpy(replaceCpy<std::wstring>(L"HTTP status %x: ", L"%x", numberTo<std::wstring>(sc)) + statusText); -}
\ No newline at end of file +} + + +bool zen::isValidEmail(const Zstring& email) +{ + //https://en.wikipedia.org/wiki/Email_address#Syntax + //https://tools.ietf.org/html/rfc3696 => note errata! https://www.rfc-editor.org/errata_search.php?rfc=3696 + //https://tools.ietf.org/html/rfc5321 + std::string local = utfTo<std::string>(beforeLast(email, Zstr('@'), IF_MISSING_RETURN_NONE)); + std::string domain = utfTo<std::string>( afterLast(email, Zstr('@'), IF_MISSING_RETURN_NONE)); + //consider: "t@st"@email.com t\@st@email.com" + + auto stripComments = [](std::string& part) + { + if (startsWith(part, '(')) + part = afterFirst(part, ')', IF_MISSING_RETURN_NONE); + + if (endsWith(part, ')')) + part = beforeLast(part, '(', IF_MISSING_RETURN_NONE); + }; + stripComments(local); + stripComments(domain); + + if (local .empty() || local .size() > 63 || // 64 octets -> 63 ASCII chars: https://devblogs.microsoft.com/oldnewthing/20120412-00/?p=7873 + domain.empty() || domain.size() > 253) //255 octets -> 253 ASCII chars + return false; + //--------------------------------------------------------------------- + + const bool quoted = (startsWith(local, '"') && endsWith(local, '"')) || + contains(local, '\\'); //e.g. "t\@st@email.com" + if (!quoted) //I'm not going to parse and validate this! + for (const std::string& comp : split(local, '.', SplitType::ALLOW_EMPTY)) + if (comp.empty() || !std::all_of(comp.begin(), comp.end(), [](char c) + { + const char printable[] = "!#$%&'*+-/=?^_`{|}~"; + return isAsciiAlpha(c) || isDigit(c) || makeUnsigned(c) >= 128 || + std::find(std::begin(printable), std::end(printable), c) != std::end(printable); + })) + return false; + //--------------------------------------------------------------------- + + //e.g. jsmith@[192.168.2.1] jsmith@[IPv6:2001:db8::1] + const bool likelyIp = startsWith(domain, '[') && endsWith(domain, ']'); + if (!likelyIp) //not interested in parsing IPs! + { + if (!contains(domain, '.')) + return false; + + for (const std::string& comp : split(domain, '.', SplitType::ALLOW_EMPTY)) + if (comp.empty() || comp.size() > 63 || + !std::all_of(comp.begin(), comp.end(), [](char c) { return isAsciiAlpha(c) ||isDigit(c) || makeUnsigned(c) >= 128 || c == '-'; })) + return false; + } + + return true; +} + + +std::string zen::htmlSpecialChars(const std::string& str) +{ + //mirror PHP: https://github.com/php/php-src/blob/e99d5d39239c611e1e7304e79e88545c4e71a073/ext/standard/html_tables.h#L6189 + std::string output; + for (const char c : str) + switch (c) + { + //*INDENT-OFF* + case '&': output += "&" ; break; + case '"': output += """; break; + case '<': output += "<" ; break; + case '>': output += ">" ; break; + //case '\'': output += "'"; break; -> not encoded by default (needs ENT_QUOTES) + default: output += c; break; + //*INDENT-ON* + } + return output; +} |