// ***************************************************************************** // * This file is part of the FreeFileSync project. It is distributed under * // * GNU General Public License: http://www.gnu.org/licenses/gpl-3.0 * // * Copyright (C) Zenju (zenju AT freefilesync DOT org) - All Rights Reserved * // ***************************************************************************** #include "http.h" #include #include //std::thread::id #include using namespace zen; namespace { struct UrlRedirectError { UrlRedirectError(const std::wstring& url) : newUrl(url) {} std::wstring newUrl; }; } class HttpInputStream::Impl { public: Impl(const std::wstring& url, const std::wstring& userAgent, const IOCallback& notifyUnbufferedIO, //throw SysError, UrlRedirectError const std::string* postParams) : //issue POST if bound, GET otherwise notifyUnbufferedIO_(notifyUnbufferedIO) { ZEN_ON_SCOPE_FAIL( cleanup(); /*destructor call would lead to member double clean-up!!!*/ ); assert(!startsWith(makeUpperCopy(url), L"HTTPS:")); //not supported by wxHTTP! const std::wstring urlFmt = startsWith(makeUpperCopy(url), L"HTTP://") || startsWith(makeUpperCopy(url), L"HTTPS://") ? afterFirst(url, L"://", IF_MISSING_RETURN_NONE) : url; const std::wstring server = beforeFirst(urlFmt, L'/', IF_MISSING_RETURN_ALL); const std::wstring page = L'/' + afterFirst(urlFmt, L'/', IF_MISSING_RETURN_NONE); assert(std::this_thread::get_id() == mainThreadId); assert(wxApp::IsMainLoopRunning()); webAccess_.SetHeader(L"User-Agent", userAgent); webAccess_.SetTimeout(10 /*[s]*/); //default: 10 minutes: WTF are these wxWidgets people thinking??? if (!webAccess_.Connect(server)) //will *not* fail for non-reachable url here! throw SysError(L"wxHTTP::Connect"); if (postParams) if (!webAccess_.SetPostText(L"application/x-www-form-urlencoded", utfCvrtTo(*postParams))) throw SysError(L"wxHTTP::SetPostText"); httpStream_.reset(webAccess_.GetInputStream(page)); //pass ownership const int sc = webAccess_.GetResponse(); //http://en.wikipedia.org/wiki/List_of_HTTP_status_codes#3xx_Redirection if (sc / 100 == 3) //e.g. 301, 302, 303, 307... we're not too greedy since we check location, too! { const std::wstring newUrl(webAccess_.GetHeader(L"Location")); if (newUrl.empty()) throw SysError(L"Unresolvable redirect. Empty target Location."); throw UrlRedirectError(newUrl); } if (sc != 200) //HTTP_STATUS_OK throw SysError(replaceCpy(L"HTTP status code %x.", L"%x", numberTo(sc))); if (!httpStream_ || webAccess_.GetError() != wxPROTO_NOERR) throw SysError(L"wxHTTP::GetError (" + numberTo(webAccess_.GetError()) + L")"); } ~Impl() { cleanup(); } size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream! { const size_t blockSize = getBlockSize(); while (memBuf_.size() < bytesToRead) { memBuf_.resize(memBuf_.size() + blockSize); const size_t bytesRead = tryRead(&*(memBuf_.end() - blockSize), blockSize); //throw SysError; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0 memBuf_.resize(memBuf_.size() - blockSize + bytesRead); //caveat: unsigned arithmetics if (notifyUnbufferedIO_) notifyUnbufferedIO_(bytesRead); //throw X if (bytesRead == 0) //end of file bytesToRead = memBuf_.size(); } std::copy(memBuf_.begin(), memBuf_.begin() + bytesToRead, static_cast(buffer)); memBuf_.erase(memBuf_.begin(), memBuf_.begin() + bytesToRead); return bytesToRead; } size_t getBlockSize() const { return 64 * 1024; } private: size_t tryRead(void* buffer, size_t bytesToRead) //throw SysError; may return short, only 0 means EOF! { if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! throw std::logic_error("Contract violation! " + std::string(__FILE__) + ":" + numberTo(__LINE__)); assert(bytesToRead == getBlockSize()); httpStream_->Read(buffer, bytesToRead); const wxStreamError ec = httpStream_->GetLastError(); if (ec != wxSTREAM_NO_ERROR && ec != wxSTREAM_EOF) throw SysError(L"wxInputStream::GetLastError (" + numberTo(httpStream_->GetLastError()) + L")"); const size_t bytesRead = httpStream_->LastRead(); //"if there are not enough bytes in the stream right now, LastRead() value will be // less than size but greater than 0. If it is 0, it means that EOF has been reached." assert(bytesRead > 0 || ec == wxSTREAM_EOF); if (bytesRead > bytesToRead) //better safe than sorry throw SysError(L"InternetReadFile: buffer overflow."); return bytesRead; //"zero indicates end of file" } Impl (const Impl&) = delete; Impl& operator=(const Impl&) = delete; void cleanup() { } wxHTTP webAccess_; std::unique_ptr httpStream_; //must be deleted BEFORE webAccess is closed std::vector memBuf_; const IOCallback notifyUnbufferedIO_; //throw X }; HttpInputStream::HttpInputStream(std::unique_ptr&& pimpl) : pimpl_(std::move(pimpl)) {} HttpInputStream::~HttpInputStream() {} size_t HttpInputStream::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X; return "bytesToRead" bytes unless end of stream! size_t HttpInputStream::getBlockSize() const { return pimpl_->getBlockSize(); } std::string HttpInputStream::readAll() { return bufferedLoad(*pimpl_); } //throw SysError, X; namespace { std::unique_ptr sendHttpRequestImpl(const std::wstring& url, const std::wstring& userAgent, const IOCallback& notifyUnbufferedIO, //throw SysError const std::string* postParams) //issue POST if bound, GET otherwise { std::wstring urlRed = url; //"A user agent should not automatically redirect a request more than five times, since such redirections usually indicate an infinite loop." for (int redirects = 0; redirects < 6; ++redirects) try { return std::make_unique(urlRed, userAgent, notifyUnbufferedIO, postParams); //throw SysError, UrlRedirectError } catch (const UrlRedirectError& e) { urlRed = e.newUrl; } throw SysError(L"Too many redirects."); } //encode into "application/x-www-form-urlencoded" std::string urlencode(const std::string& str) { std::string out; for (const char c : str) //follow PHP spec: https://github.com/php/php-src/blob/master/ext/standard/url.c#L500 if (c == ' ') out += '+'; else if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '-' || c == '.' || c == '_') //note: "~" is encoded by PHP! out += c; else { const std::pair hex = hexify(c); out += '%'; out += hex.first; out += hex.second; } return out; } std::string urldecode(const std::string& str) { std::string out; for (size_t i = 0; i < str.size(); ++i) { const char c = str[i]; if (c == '+') out += ' '; else if (c == '%' && str.size() - i >= 3 && isHexDigit(str[i + 1]) && isHexDigit(str[i + 2])) { out += unhexify(str[i + 1], str[i + 2]); i += 2; } else out += c; } return out; } } std::string zen::xWwwFormUrlEncode(const std::vector>& paramPairs) { std::string output; for (const auto& pair : paramPairs) output += urlencode(pair.first) + '=' + urlencode(pair.second) + '&'; //encode both key and value: https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.1 if (!output.empty()) output.pop_back(); return output; } std::vector> zen::xWwwFormUrlDecode(const std::string& str) { std::vector> output; for (const std::string& nvPair : split(str, '&')) if (!nvPair.empty()) output.emplace_back(urldecode(beforeFirst(nvPair, '=', IF_MISSING_RETURN_ALL)), urldecode(afterFirst (nvPair, '=', IF_MISSING_RETURN_NONE))); return output; } HttpInputStream zen::sendHttpPost(const std::wstring& url, const std::wstring& userAgent, const IOCallback& notifyUnbufferedIO, const std::vector>& postParams) //throw SysError { const std::string encodedParams = xWwwFormUrlEncode(postParams); return sendHttpRequestImpl(url, userAgent, notifyUnbufferedIO, &encodedParams); //throw SysError } HttpInputStream zen::sendHttpGet(const std::wstring& url, const std::wstring& userAgent, const IOCallback& notifyUnbufferedIO) //throw SysError { return sendHttpRequestImpl(url, userAgent, notifyUnbufferedIO, nullptr); //throw SysError } bool zen::internetIsAlive() //noexcept { assert(std::this_thread::get_id() == mainThreadId); const wxString server = L"www.google.com"; const wxString page = L"/"; wxHTTP webAccess; webAccess.SetTimeout(10 /*[s]*/); //default: 10 minutes: WTF are these wxWidgets people thinking??? if (!webAccess.Connect(server)) //will *not* fail for non-reachable url here! return false; std::unique_ptr httpStream(webAccess.GetInputStream(page)); //call before checking wxHTTP::GetResponse() const int sc = webAccess.GetResponse(); //attention: http://www.google.com/ might redirect to "https" => don't follow, just return "true"!!! return sc / 100 == 2 || //e.g. 200 sc / 100 == 3; //e.g. 301, 302, 303, 307... when in doubt, consider internet alive! }