diff options
author | B. Stack <bgstack15@gmail.com> | 2022-10-11 15:17:59 +0000 |
---|---|---|
committer | B. Stack <bgstack15@gmail.com> | 2022-10-11 15:17:59 +0000 |
commit | 38c826621a39831d1bdc78aa9e45cc592db3e77f (patch) | |
tree | a49cfd729d9793681a57fa6f7409b0f0848e9ede /zen | |
parent | Merge branch 'b11.25' into 'master' (diff) | |
parent | add upstream 11.26 (diff) | |
download | FreeFileSync-38c826621a39831d1bdc78aa9e45cc592db3e77f.tar.gz FreeFileSync-38c826621a39831d1bdc78aa9e45cc592db3e77f.tar.bz2 FreeFileSync-38c826621a39831d1bdc78aa9e45cc592db3e77f.zip |
Merge branch 'b11.26' into 'master'11.26
add upstream 11.26
See merge request opensource-tracking/FreeFileSync!49
Diffstat (limited to 'zen')
36 files changed, 1092 insertions, 767 deletions
diff --git a/zen/base64.h b/zen/base64.h index 48cf2230..cbef7c33 100644 --- a/zen/base64.h +++ b/zen/base64.h @@ -28,8 +28,8 @@ OutputIterator encodeBase64(InputIterator first, InputIterator last, OutputItera template <class InputIterator, class OutputIterator> OutputIterator decodeBase64(InputIterator first, InputIterator last, OutputIterator result); //nothrow! -std::string stringEncodeBase64(const std::string& str); -std::string stringDecodeBase64(const std::string& str); +std::string stringEncodeBase64(const std::string_view& str); +std::string stringDecodeBase64(const std::string_view& str); @@ -156,7 +156,7 @@ OutputIterator decodeBase64(InputIterator first, InputIterator last, OutputItera inline -std::string stringEncodeBase64(const std::string& str) +std::string stringEncodeBase64(const std::string_view& str) { std::string out; encodeBase64(str.begin(), str.end(), std::back_inserter(out)); @@ -165,7 +165,7 @@ std::string stringEncodeBase64(const std::string& str) inline -std::string stringDecodeBase64(const std::string& str) +std::string stringDecodeBase64(const std::string_view& str) { std::string out; decodeBase64(str.begin(), str.end(), std::back_inserter(out)); diff --git a/zen/dir_watcher.cpp b/zen/dir_watcher.cpp index c48928a3..87fa3596 100644 --- a/zen/dir_watcher.cpp +++ b/zen/dir_watcher.cpp @@ -101,13 +101,13 @@ DirWatcher::~DirWatcher() std::vector<DirWatcher::Change> DirWatcher::fetchChanges(const std::function<void()>& requestUiUpdate, std::chrono::milliseconds cbInterval) //throw FileError { - std::vector<std::byte> buffer(512 * (sizeof(inotify_event) + NAME_MAX + 1)); + std::vector<std::byte> buf(512 * (sizeof(inotify_event) + NAME_MAX + 1)); ssize_t bytesRead = 0; do { //non-blocking call, see O_NONBLOCK - bytesRead = ::read(pimpl_->notifDescr, &buffer[0], buffer.size()); + bytesRead = ::read(pimpl_->notifDescr, buf.data(), buf.size()); } while (bytesRead < 0 && errno == EINTR); //"Interrupted function call; When this happens, you should try the call again." @@ -124,7 +124,7 @@ std::vector<DirWatcher::Change> DirWatcher::fetchChanges(const std::function<voi ssize_t bytePos = 0; while (bytePos < bytesRead) { - inotify_event& evt = reinterpret_cast<inotify_event&>(buffer[bytePos]); + inotify_event& evt = reinterpret_cast<inotify_event&>(buf[bytePos]); if (evt.len != 0) //exclude case: deletion of "self", already reported by parent directory watch { diff --git a/zen/file_access.cpp b/zen/file_access.cpp index 2e119e87..1da8bd1b 100644 --- a/zen/file_access.cpp +++ b/zen/file_access.cpp @@ -461,48 +461,47 @@ void zen::copyItemPermissions(const Zstring& sourcePath, const Zstring& targetPa void zen::createDirectory(const Zstring& dirPath) //throw FileError, ErrorTargetExisting { - auto getErrorMsg = [&] { return replaceCpy(_("Cannot create directory %x."), L"%x", fmtPath(dirPath)); }; - - //don't allow creating irregular folders! - const Zstring dirName = afterLast(dirPath, FILE_NAME_SEPARATOR, IfNotFoundReturn::all); + try + { + //don't allow creating irregular folders! + const Zstring dirName = afterLast(dirPath, FILE_NAME_SEPARATOR, IfNotFoundReturn::all); - //e.g. "...." https://social.technet.microsoft.com/Forums/windows/en-US/ffee2322-bb6b-4fdf-86f9-8f93cf1fa6cb/ - if (std::all_of(dirName.begin(), dirName.end(), [](Zchar c) { return c == Zstr('.'); })) - /**/throw FileError(getErrorMsg(), replaceCpy<std::wstring>(L"Invalid folder name %x.", L"%x", fmtPath(dirName))); + //e.g. "...." https://social.technet.microsoft.com/Forums/windows/en-US/ffee2322-bb6b-4fdf-86f9-8f93cf1fa6cb/ + if (std::all_of(dirName.begin(), dirName.end(), [](Zchar c) { return c == Zstr('.'); })) + /**/throw SysError(replaceCpy<std::wstring>(L"Invalid folder name %x.", L"%x", fmtPath(dirName))); #if 0 //not appreciated: https://freefilesync.org/forum/viewtopic.php?t=7509 - if (startsWith(dirName, Zstr(' ')) || //Windows can access these just fine once created! - endsWith (dirName, Zstr(' '))) // - throw FileError(getErrorMsg(), replaceCpy<std::wstring>(L"Invalid folder name. %x starts/ends with space character.", L"%x", fmtPath(dirName))); + if (startsWith(dirName, Zstr(' ')) || //Windows can access these just fine once created! + endsWith (dirName, Zstr(' '))) // + throw SysError(replaceCpy<std::wstring>(L"Invalid folder name %x starts/ends with space character.", L"%x", fmtPath(dirName))); #endif + const mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO; //0777 => consider umask! - const mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO; //0777 => consider umask! - - if (::mkdir(dirPath.c_str(), mode) != 0) - { - const int lastError = errno; //copy before directly or indirectly making other system calls! - const std::wstring errorDescr = formatSystemError("mkdir", lastError); - - if (lastError == EEXIST) - throw ErrorTargetExisting(getErrorMsg(), errorDescr); - //else if (lastError == ENOENT) - // throw ErrorTargetPathMissing(errorMsg, errorDescr); - throw FileError(getErrorMsg(), errorDescr); + if (::mkdir(dirPath.c_str(), mode) != 0) + { + const int ec = errno; //copy before directly or indirectly making other system calls! + if (ec == EEXIST) + throw ErrorTargetExisting(replaceCpy(_("Cannot create directory %x."), L"%x", fmtPath(dirPath)), formatSystemError("mkdir", ec)); + //else if (ec == ENOENT) + // throw ErrorTargetPathMissing(errorMsg, errorDescr); + THROW_LAST_SYS_ERROR("mkdir"); + } } + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot create directory %x."), L"%x", fmtPath(dirPath)), e.toString()); } } -bool zen::createDirectoryIfMissingRecursion(const Zstring& dirPath) //throw FileError +void zen::createDirectoryIfMissingRecursion(const Zstring& dirPath) //throw FileError { const std::optional<Zstring> parentPath = getParentFolderPath(dirPath); if (!parentPath) //device root - return false; + return; - try //generally we expect that path already exists (see: ffs_paths.cpp) => check first + try //generally expect folder already exists (see: ffs_paths.cpp) => check first { if (getItemType(dirPath) != ItemType::file) //throw FileError - return false; + return; } catch (FileError&) {} //not yet existing or access error? let's find out... @@ -511,14 +510,14 @@ bool zen::createDirectoryIfMissingRecursion(const Zstring& dirPath) //throw File try { createDirectory(dirPath); //throw FileError, ErrorTargetExisting - return true; + return; } catch (FileError&) { try { if (getItemType(dirPath) != ItemType::file) //throw FileError - return true; //already existing => possible, if createDirectoryIfMissingRecursion() is run in parallel + return; //already existing => possible, if createDirectoryIfMissingRecursion() is run in parallel } catch (FileError&) {} //not yet existing or access error @@ -529,6 +528,11 @@ bool zen::createDirectoryIfMissingRecursion(const Zstring& dirPath) //throw File void zen::tryCopyDirectoryAttributes(const Zstring& sourcePath, const Zstring& targetPath) //throw FileError { +//do NOT copy attributes for volume root paths which return as: FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_DIRECTORY +//https://freefilesync.org/forum/viewtopic.php?t=5550 +if (!getParentFolderPath(sourcePath)) //=> root path + return; + } @@ -549,6 +553,7 @@ void zen::copySymlink(const Zstring& sourcePath, const Zstring& targetPath) //th //allow only consistent objects to be created -> don't place before ::symlink(); targetPath may already exist! ZEN_ON_SCOPE_FAIL(try { removeSymlinkPlain(targetPath); /*throw FileError*/ } catch (FileError&) {}); + warn_static("log it!") //file times: essential for syncing a symlink: enforce this! (don't just try!) struct stat sourceInfo = {}; @@ -562,16 +567,17 @@ void zen::copySymlink(const Zstring& sourcePath, const Zstring& targetPath) //th FileCopyResult zen::copyNewFile(const Zstring& sourceFile, const Zstring& targetFile, //throw FileError, ErrorTargetExisting, (ErrorFileLocked), X const IoCallback& notifyUnbufferedIO /*throw X*/) { - int64_t totalUnbufferedIO = 0; + int64_t totalBytesNotified = 0; + IOCallbackDivider notifyIoDiv(notifyUnbufferedIO, totalBytesNotified); - FileInput fileIn(sourceFile, IOCallbackDivider(notifyUnbufferedIO, totalUnbufferedIO)); //throw FileError, (ErrorFileLocked -> Windows-only) + FileInputPlain fileIn(sourceFile); //throw FileError, (ErrorFileLocked -> Windows-only) - struct stat sourceInfo = {}; - if (::fstat(fileIn.getHandle(), &sourceInfo) != 0) - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file attributes of %x."), L"%x", fmtPath(sourceFile)), "fstat"); + const struct stat& sourceInfo = fileIn.getStatBuffered(); //throw FileError - const mode_t mode = sourceInfo.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO); //analog to "cp" which copies "mode" (considering umask) by default - //it seems we don't need S_IWUSR, not even for the setFileTime() below! (tested with source file having different user/group!) + //analog to "cp" which copies "mode" (considering umask) by default: + const mode_t mode = (sourceInfo.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) | + S_IWUSR;//macOS: S_IWUSR apparently needed to write extended attributes (see copyfile() function) + //Linux: not needed even for the setFileTime() below! (tested with source file having different user/group!) //=> need copyItemPermissions() only for "chown" and umask-agnostic permissions const int fdTarget = ::open(targetFile.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, mode); @@ -586,27 +592,46 @@ FileCopyResult zen::copyNewFile(const Zstring& sourceFile, const Zstring& target throw FileError(errorMsg, errorDescr); } - FileOutput fileOut(fdTarget, targetFile, IOCallbackDivider(notifyUnbufferedIO, totalUnbufferedIO)); //pass ownership + FileOutputPlain fileOut(fdTarget, targetFile); //pass ownership //preallocate disk space + reduce fragmentation (perf: no real benefit) fileOut.reserveSpace(sourceInfo.st_size); //throw FileError - bufferedStreamCopy(fileIn, fileOut); //throw FileError, (ErrorFileLocked), X + unbufferedStreamCopy([&](void* buffer, size_t bytesToRead) + { + const size_t bytesRead = fileIn.tryRead(buffer, bytesToRead); //throw FileError, (ErrorFileLocked) + notifyIoDiv(bytesRead); //throw X + return bytesRead; + }, + fileIn.getBlockSize() /*throw FileError*/, + + [&](const void* buffer, size_t bytesToWrite) + { + const size_t bytesWritten = fileOut.tryWrite(buffer, bytesToWrite); //throw FileError + notifyIoDiv(bytesWritten); //throw X + return bytesWritten; + }, + fileOut.getBlockSize() /*throw FileError*/); //throw FileError, X + +#if 0 + //clean file system cache: needed at all? no user complaints at all!!! + //posix_fadvise(POSIX_FADV_DONTNEED) does nothing, unless data was already read from/written to disk: https://insights.oetiker.ch/linux/fadvise/ + // => should be "most" of the data at this point => good enough? + if (::posix_fadvise(fileIn.getHandle(), 0 /*offset*/, 0 /*len*/, POSIX_FADV_DONTNEED) != 0) //"len == 0" means "end of the file" + THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file %x."), L"%x", fmtPath(sourceFile)), "posix_fadvise(POSIX_FADV_DONTNEED)"); + if (::posix_fadvise(fileOut.getHandle(), 0 /*offset*/, 0 /*len*/, POSIX_FADV_DONTNEED) != 0) //"len == 0" means "end of the file" + THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(targetFile)), "posix_fadvise(POSIX_FADV_DONTNEED)"); +#endif - //flush intermediate buffers before fiddling with the raw file handle - fileOut.flushBuffers(); //throw FileError, X - struct stat targetInfo = {}; - if (::fstat(fileOut.getHandle(), &targetInfo) != 0) - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file attributes of %x."), L"%x", fmtPath(targetFile)), "fstat"); + const auto targetFileIdx = fileOut.getStatBuffered().st_ino; //throw FileError //close output file handle before setting file time; also good place to catch errors when closing stream! - fileOut.finalize(); //throw FileError, (X) essentially a close() since buffers were already flushed - + fileOut.close(); //throw FileError //========================================================================================================== - //take fileOut ownership => from this point on, WE are responsible for calling removeFilePlain() on failure!! + //take over fileOut ownership => from this point on, WE are responsible for calling removeFilePlain() on failure!! + // not needed *currently*! see below: ZEN_ON_SCOPE_FAIL(try { removeFilePlain(targetFile); } catch (FileError&) {}); //=========================================================================================================== - std::optional<FileError> errorModTime; try { @@ -622,13 +647,14 @@ FileCopyResult zen::copyNewFile(const Zstring& sourceFile, const Zstring& target errorModTime = FileError(e.toString()); //avoid slicing } - FileCopyResult result; - result.fileSize = sourceInfo.st_size; - result.sourceModTime = sourceInfo.st_mtim; - result.sourceFileIdx = sourceInfo.st_ino; - result.targetFileIdx = targetInfo.st_ino; - result.errorModTime = errorModTime; - return result; + return + { + .fileSize = makeUnsigned(sourceInfo.st_size), + .sourceModTime = sourceInfo.st_mtim, + .sourceFileIdx = sourceInfo.st_ino, + .targetFileIdx = targetFileIdx, + .errorModTime = errorModTime, + }; } diff --git a/zen/file_access.h b/zen/file_access.h index f6a02edc..639abf64 100644 --- a/zen/file_access.h +++ b/zen/file_access.h @@ -72,8 +72,7 @@ void copyItemPermissions(const Zstring& sourcePath, const Zstring& targetPath, P void createDirectory(const Zstring& dirPath); //throw FileError, ErrorTargetExisting //creates directories recursively if not existing -//returns false if folder already exists -bool createDirectoryIfMissingRecursion(const Zstring& dirPath); //throw FileError +void createDirectoryIfMissingRecursion(const Zstring& dirPath); //throw FileError //symlink handling: follow //expects existing source/target directories diff --git a/zen/file_error.h b/zen/file_error.h index 168ea806..93c95f90 100644 --- a/zen/file_error.h +++ b/zen/file_error.h @@ -30,6 +30,7 @@ private: DEFINE_NEW_FILE_ERROR(ErrorTargetExisting) DEFINE_NEW_FILE_ERROR(ErrorFileLocked) DEFINE_NEW_FILE_ERROR(ErrorMoveUnsupported) +DEFINE_NEW_FILE_ERROR(RecycleBinUnavailable) //CAVEAT: thread-local Win32 error code is easily overwritten => evaluate *before* making any (indirect) system calls: diff --git a/zen/file_io.cpp b/zen/file_io.cpp index 7eebd2e8..ef3cbebb 100644 --- a/zen/file_io.cpp +++ b/zen/file_io.cpp @@ -5,7 +5,6 @@ // ***************************************************************************** #include "file_io.h" - #include <sys/stat.h> #include <fcntl.h> //open #include <unistd.h> //close, read, write @@ -13,6 +12,45 @@ using namespace zen; +size_t FileBase::getBlockSize() //throw FileError +{ + if (blockSizeBuf_ == 0) + { + /* - statfs::f_bsize - "optimal transfer block size" + - stat::st_blksize - "blocksize for file system I/O. Writing in smaller chunks may cause an inefficient read-modify-rewrite." + + e.g. local disk: f_bsize 4096 st_blksize 4096 + USB memory: f_bsize 32768 st_blksize 32768 */ + const auto st_blksize = getStatBuffered().st_blksize; //throw FileError + if (st_blksize > 0) //st_blksize is signed! + blockSizeBuf_ = st_blksize; // + + blockSizeBuf_ = std::max(blockSizeBuf_, defaultBlockSize); + //ha, convergent evolution! https://github.com/coreutils/coreutils/blob/master/src/ioblksize.h#L74 + } + return blockSizeBuf_; +} + + +const struct stat& FileBase::getStatBuffered() //throw FileError +{ + if (!statBuf_) + try + { + if (hFile_ == invalidFileHandle) + throw SysError(L"Contract error: getStatBuffered() called after close()."); + + struct stat fileInfo = {}; + if (::fstat(hFile_, &fileInfo) != 0) + THROW_LAST_SYS_ERROR("fstat"); + statBuf_ = std::move(fileInfo); + } + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot read file attributes of %x."), L"%x", fmtPath(filePath_)), e.toString()); } + + return *statBuf_; +} + + FileBase::~FileBase() { if (hFile_ != invalidFileHandle) @@ -21,29 +59,37 @@ FileBase::~FileBase() close(); //throw FileError } catch (FileError&) { assert(false); } + warn_static("log it!") } void FileBase::close() //throw FileError { - if (hFile_ == invalidFileHandle) - throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), L"Contract error: close() called more than once."); - ZEN_ON_SCOPE_EXIT(hFile_ = invalidFileHandle); - - if (::close(hFile_) != 0) - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), "close"); + try + { + if (hFile_ == invalidFileHandle) + throw SysError(L"Contract error: close() called more than once."); + if (::close(hFile_) != 0) + THROW_LAST_SYS_ERROR("close"); + hFile_ = invalidFileHandle; //do NOT set on failure! => ~FileOutputPlain() still wants to (try to) delete the file! + } + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), e.toString()); } } //---------------------------------------------------------------------------------------------------- namespace { -FileBase::FileHandle openHandleForRead(const Zstring& filePath) //throw FileError, ErrorFileLocked + std::pair<FileBase::FileHandle, struct stat> +openHandleForRead(const Zstring& filePath) //throw FileError, ErrorFileLocked { - //caveat: check for file types that block during open(): character device, block device, named pipe - struct stat fileInfo = {}; - if (::stat(filePath.c_str(), &fileInfo) == 0) //follows symlinks + try { + //caveat: check for file types that block during open(): character device, block device, named pipe + struct stat fileInfo = {}; + if (::stat(filePath.c_str(), &fileInfo) != 0) //follows symlinks + THROW_LAST_SYS_ERROR("stat"); + if (!S_ISREG(fileInfo.st_mode) && !S_ISDIR(fileInfo.st_mode) && //open() will fail with "EISDIR: Is a directory" => nice !S_ISLNK(fileInfo.st_mode)) //?? shouldn't be possible after successful stat() @@ -59,103 +105,77 @@ FileBase::FileHandle openHandleForRead(const Zstring& filePath) //throw FileErro name += L", "; return name + printNumber<std::wstring>(L"0%06o", m & S_IFMT); }(); - throw FileError(replaceCpy(_("Cannot open file %x."), L"%x", fmtPath(filePath)), - _("Unsupported item type.") + L" [" + typeName + L']'); + throw SysError(_("Unsupported item type.") + L" [" + typeName + L']'); } - } - //else: let ::open() fail for errors like "not existing" - //don't use O_DIRECT: https://yarchive.net/comp/linux/o_direct.html - const int fdFile = ::open(filePath.c_str(), O_RDONLY | O_CLOEXEC); - if (fdFile == -1) //don't check "< 0" -> docu seems to allow "-2" to be a valid file handle - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot open file %x."), L"%x", fmtPath(filePath)), "open"); - return fdFile; //pass ownership + //don't use O_DIRECT: https://yarchive.net/comp/linux/o_direct.html + const int fdFile = ::open(filePath.c_str(), O_RDONLY | O_CLOEXEC); + if (fdFile == -1) //don't check "< 0" -> docu seems to allow "-2" to be a valid file handle + THROW_LAST_SYS_ERROR("open"); + return {fdFile /*pass ownership*/, fileInfo}; + } + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot open file %x."), L"%x", fmtPath(filePath)), e.toString()); } } } -FileInput::FileInput(FileHandle handle, const Zstring& filePath, const IoCallback& notifyUnbufferedIO) : - FileBase(handle, filePath), notifyUnbufferedIO_(notifyUnbufferedIO) {} +FileInputPlain::FileInputPlain(const Zstring& filePath) : + FileInputPlain(openHandleForRead(filePath), filePath) {} //throw FileError, ErrorFileLocked + + +FileInputPlain::FileInputPlain(const std::pair<FileBase::FileHandle, struct stat>& fileDetails, const Zstring& filePath) : + FileInputPlain(fileDetails.first, filePath) +{ + setStatBuffered(fileDetails.second); +} -FileInput::FileInput(const Zstring& filePath, const IoCallback& notifyUnbufferedIO) : - FileBase(openHandleForRead(filePath), filePath), //throw FileError, ErrorFileLocked - notifyUnbufferedIO_(notifyUnbufferedIO) +FileInputPlain::FileInputPlain(FileHandle handle, const Zstring& filePath) : + FileBase(handle, filePath) { //optimize read-ahead on input file: - if (::posix_fadvise(getHandle(), 0, 0, POSIX_FADV_SEQUENTIAL) != 0) + if (::posix_fadvise(getHandle(), 0 /*offset*/, 0 /*len*/, POSIX_FADV_SEQUENTIAL) != 0) //"len == 0" means "end of the file" THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file %x."), L"%x", fmtPath(filePath)), "posix_fadvise(POSIX_FADV_SEQUENTIAL)"); + /* - POSIX_FADV_SEQUENTIAL is like POSIX_FADV_NORMAL, but with twice the read-ahead buffer size + - POSIX_FADV_NOREUSE "since kernel 2.6.18 this flag is a no-op" WTF!? + - POSIX_FADV_DONTNEED may be used to clear the OS file system cache (offset and len must be page-aligned!) + => does nothing, unless data was already written to disk: https://insights.oetiker.ch/linux/fadvise/ + - POSIX_FADV_WILLNEED: issue explicit read-ahead; almost the same as readahead(), but with weaker error checking + https://unix.stackexchange.com/questions/681188/difference-between-posix-fadvise-and-readahead + + clear file system cache manually: sync; echo 3 > /proc/sys/vm/drop_caches */ + } -size_t FileInput::tryRead(void* buffer, size_t bytesToRead) //throw FileError, ErrorFileLocked; may return short, only 0 means EOF! +//may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! +size_t FileInputPlain::tryRead(void* buffer, size_t bytesToRead) //throw FileError, ErrorFileLocked { if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); - assert(bytesToRead == getBlockSize()); - - ssize_t bytesRead = 0; - do + assert(bytesToRead % getBlockSize() == 0); + try { - bytesRead = ::read(getHandle(), buffer, bytesToRead); - } - while (bytesRead < 0 && errno == EINTR); //Compare copy_reg() in copy.c: ftp://ftp.gnu.org/gnu/coreutils/coreutils-8.23.tar.xz - //EINTR is not checked on macOS' copyfile: https://opensource.apple.com/source/copyfile/copyfile-146/copyfile.c.auto.html - //read() on macOS: https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man2/read.2.html - - if (bytesRead < 0) - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot read file %x."), L"%x", fmtPath(getFilePath())), "read"); - if (static_cast<size_t>(bytesRead) > bytesToRead) //better safe than sorry - throw FileError(replaceCpy(_("Cannot read file %x."), L"%x", fmtPath(getFilePath())), formatSystemError("ReadFile", L"", L"Buffer overflow.")); - - //if ::read is interrupted (EINTR) right in the middle, it will return successfully with "bytesRead < bytesToRead" + ssize_t bytesRead = 0; + do + { + bytesRead = ::read(getHandle(), buffer, bytesToRead); + } + while (bytesRead < 0 && errno == EINTR); //Compare copy_reg() in copy.c: ftp://ftp.gnu.org/gnu/coreutils/coreutils-8.23.tar.xz + //EINTR is not checked on macOS' copyfile: https://opensource.apple.com/source/copyfile/copyfile-173.40.2/copyfile.c.auto.html + //read() on macOS: https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man2/read.2.html - return bytesRead; //"zero indicates end of file" -} + if (bytesRead < 0) + THROW_LAST_SYS_ERROR("read"); + if (makeUnsigned(bytesRead) > bytesToRead) //better safe than sorry + throw SysError(formatSystemError("ReadFile", L"", L"Buffer overflow.")); + //if ::read is interrupted (EINTR) right in the middle, it will return successfully with "bytesRead < bytesToRead" -size_t FileInput::read(void* buffer, size_t bytesToRead) //throw FileError, ErrorFileLocked, X; return "bytesToRead" bytes unless end of stream! -{ - /* - FFS 8.9-9.5 perf issues on macOS: https://freefilesync.org/forum/viewtopic.php?t=4808 - app-level buffering is essential to optimize random data sizes; e.g. "export file list": - => big perf improvement on Windows, Linux. No significant improvement on macOS in tests - impact on stream-based file copy: - => no drawback vs block-wise copy loop on Linux, HOWEVER: big perf issue on macOS! - - Possible cause of macOS perf issue unclear: - - getting rid of std::vector::resize() and std::vector::erase() "fixed" the problem - => costly zero-initializing memory? problem with inlining? QOI issue of std:vector on clang/macOS? - - replacing std::copy() with memcpy() also *seems* to have improved speed "somewhat" - */ - - const size_t blockSize = getBlockSize(); - assert(memBuf_.size() >= blockSize); - assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); - - auto it = static_cast<std::byte*>(buffer); - const auto itEnd = it + bytesToRead; - for (;;) - { - const size_t junkSize = std::min(static_cast<size_t>(itEnd - it), bufPosEnd_ - bufPos_); - std::memcpy(it, &memBuf_[0] + bufPos_ /*caveat: vector debug checks*/, junkSize); - bufPos_ += junkSize; - it += junkSize; - - if (it == itEnd) - break; - //-------------------------------------------------------------------- - const size_t bytesRead = tryRead(&memBuf_[0], blockSize); //throw FileError, ErrorFileLocked; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0 - bufPos_ = 0; - bufPosEnd_ = bytesRead; - - if (notifyUnbufferedIO_) notifyUnbufferedIO_(bytesRead); //throw X - - if (bytesRead == 0) //end of file - break; + return bytesRead; //"zero indicates end of file" } - return it - static_cast<std::byte*>(buffer); + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot read file %x."), L"%x", fmtPath(getFilePath())), e.toString()); } } //---------------------------------------------------------------------------------------------------- @@ -164,170 +184,153 @@ namespace { FileBase::FileHandle openHandleForWrite(const Zstring& filePath) //throw FileError, ErrorTargetExisting { - //checkForUnsupportedType(filePath); -> not needed, open() + O_WRONLY should fail fast - - const mode_t lockFileMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; //0666 => umask will be applied implicitly! - - //O_EXCL contains a race condition on NFS file systems: https://linux.die.net/man/2/open - const int fdFile = ::open(filePath.c_str(), //const char* pathname - O_CREAT | //int flags - /*access == FileOutput::ACC_OVERWRITE ? O_TRUNC : */ O_EXCL | O_WRONLY | O_CLOEXEC, - lockFileMode); //mode_t mode - if (fdFile == -1) + try { - const int ec = errno; //copy before making other system calls! - const std::wstring errorMsg = replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(filePath)); - const std::wstring errorDescr = formatSystemError("open", ec); + //checkForUnsupportedType(filePath); -> not needed, open() + O_WRONLY should fail fast + + const mode_t lockFileMode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; //0666 => umask will be applied implicitly! - if (ec == EEXIST) - throw ErrorTargetExisting(errorMsg, errorDescr); - //if (ec == ENOENT) throw ErrorTargetPathMissing(errorMsg, errorDescr); + //O_EXCL contains a race condition on NFS file systems: https://linux.die.net/man/2/open + const int fdFile = ::open(filePath.c_str(), //const char* pathname + O_CREAT | //int flags + /*access == FileOutput::ACC_OVERWRITE ? O_TRUNC : */ O_EXCL | O_WRONLY | O_CLOEXEC, + lockFileMode); //mode_t mode + if (fdFile == -1) + { + const int ec = errno; //copy before making other system calls! + if (ec == EEXIST) + throw ErrorTargetExisting(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(filePath)), formatSystemError("open", ec)); + //if (ec == ENOENT) throw ErrorTargetPathMissing(errorMsg, errorDescr); - throw FileError(errorMsg, errorDescr); + THROW_LAST_SYS_ERROR("open"); + } + return fdFile; //pass ownership } - return fdFile; //pass ownership + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(filePath)), e.toString()); } } } -FileOutput::FileOutput(FileHandle handle, const Zstring& filePath, const IoCallback& notifyUnbufferedIO) : - FileBase(handle, filePath), notifyUnbufferedIO_(notifyUnbufferedIO) -{ -} +FileOutputPlain::FileOutputPlain(const Zstring& filePath) : + FileOutputPlain(openHandleForWrite(filePath), filePath) {} //throw FileError, ErrorTargetExisting -FileOutput::FileOutput(const Zstring& filePath, const IoCallback& notifyUnbufferedIO) : - FileBase(openHandleForWrite(filePath), filePath), notifyUnbufferedIO_(notifyUnbufferedIO) {} //throw FileError, ErrorTargetExisting +FileOutputPlain::FileOutputPlain(FileHandle handle, const Zstring& filePath) : + FileBase(handle, filePath) +{ +} -FileOutput::~FileOutput() +FileOutputPlain::~FileOutputPlain() { if (getHandle() != invalidFileHandle) //not finalized => clean up garbage - { - //"deleting while handle is open" == FILE_FLAG_DELETE_ON_CLOSE - if (::unlink(getFilePath().c_str()) != 0) + try + { + //"deleting while handle is open" == FILE_FLAG_DELETE_ON_CLOSE + if (::unlink(getFilePath().c_str()) != 0) + THROW_LAST_SYS_ERROR("unlink"); + } + catch (const SysError&) + { assert(false); - } + warn_static("at least log on failure!") + } } -size_t FileOutput::tryWrite(const void* buffer, size_t bytesToWrite) //throw FileError; may return short! CONTRACT: bytesToWrite > 0 +void FileOutputPlain::reserveSpace(uint64_t expectedSize) //throw FileError { - if (bytesToWrite == 0) - throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); - assert(bytesToWrite <= getBlockSize()); - - ssize_t bytesWritten = 0; - do - { - bytesWritten = ::write(getHandle(), buffer, bytesToWrite); - } - while (bytesWritten < 0 && errno == EINTR); - //write() on macOS: https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man2/write.2.html + //NTFS: "If you set the file allocation info [...] the file contents will be forced into nonresident data, even if it would have fit inside the MFT." + if (expectedSize < 1024) //https://docs.microsoft.com/en-us/archive/blogs/askcore/the-four-stages-of-ntfs-file-growth + return; - if (bytesWritten <= 0) + try { - if (bytesWritten == 0) //comment in safe-read.c suggests to treat this as an error due to buggy drivers - errno = ENOSPC; + //don't use ::posix_fallocate which uses horribly inefficient fallback if FS doesn't support it (EOPNOTSUPP) and changes files size! + //FALLOC_FL_KEEP_SIZE => allocate only, file size is NOT changed! + if (::fallocate(getHandle(), //int fd + FALLOC_FL_KEEP_SIZE, //int mode + 0, //off_t offset + expectedSize) != 0) //off_t len + if (errno != EOPNOTSUPP) //possible, unlike with posix_fallocate() + THROW_LAST_SYS_ERROR("fallocate"); - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), "write"); } - if (bytesWritten > static_cast<ssize_t>(bytesToWrite)) //better safe than sorry - throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), formatSystemError("write", L"", L"Buffer overflow.")); - - //if ::write() is interrupted (EINTR) right in the middle, it will return successfully with "bytesWritten < bytesToWrite"! - return bytesWritten; + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), e.toString()); } } -void FileOutput::write(const void* buffer, size_t bytesToWrite) //throw FileError, X +//may return short! CONTRACT: bytesToWrite > 0 +size_t FileOutputPlain::tryWrite(const void* buffer, size_t bytesToWrite) //throw FileError { - const size_t blockSize = getBlockSize(); - assert(memBuf_.size() >= blockSize); - assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); - - auto it = static_cast<const std::byte*>(buffer); - const auto itEnd = it + bytesToWrite; - for (;;) + if (bytesToWrite == 0) + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); + assert(bytesToWrite % getBlockSize() == 0 || bytesToWrite < getBlockSize()); + try { - if (memBuf_.size() - bufPos_ < blockSize) //support memBuf_.size() > blockSize to reduce memmove()s, but perf test shows: not really needed! - // || bufPos_ == bufPosEnd_) -> not needed while memBuf_.size() == blockSize + ssize_t bytesWritten = 0; + do { - std::memmove(&memBuf_[0], &memBuf_[0] + bufPos_, bufPosEnd_ - bufPos_); - bufPosEnd_ -= bufPos_; - bufPos_ = 0; + bytesWritten = ::write(getHandle(), buffer, bytesToWrite); } + while (bytesWritten < 0 && errno == EINTR); + //write() on macOS: https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man2/write.2.html - const size_t junkSize = std::min(static_cast<size_t>(itEnd - it), blockSize - (bufPosEnd_ - bufPos_)); - std::memcpy(&memBuf_[0] + bufPosEnd_ /*caveat: vector debug checks*/, it, junkSize); - bufPosEnd_ += junkSize; - it += junkSize; - - if (it == itEnd) - return; - //-------------------------------------------------------------------- - const size_t bytesWritten = tryWrite(&memBuf_[bufPos_], blockSize); //throw FileError; may return short! CONTRACT: bytesToWrite > 0 - bufPos_ += bytesWritten; - if (notifyUnbufferedIO_) notifyUnbufferedIO_(bytesWritten); //throw X! - } -} + if (bytesWritten <= 0) + { + if (bytesWritten == 0) //comment in safe-read.c suggests to treat this as an error due to buggy drivers + errno = ENOSPC; + THROW_LAST_SYS_ERROR("write"); + } + if (bytesWritten > static_cast<ssize_t>(bytesToWrite)) //better safe than sorry + throw SysError(formatSystemError("write", L"", L"Buffer overflow.")); -void FileOutput::flushBuffers() //throw FileError, X -{ - assert(bufPosEnd_ - bufPos_ <= getBlockSize()); - assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); - while (bufPos_ != bufPosEnd_) - { - const size_t bytesWritten = tryWrite(&memBuf_[bufPos_], bufPosEnd_ - bufPos_); //throw FileError; may return short! CONTRACT: bytesToWrite > 0 - bufPos_ += bytesWritten; - if (notifyUnbufferedIO_) notifyUnbufferedIO_(bytesWritten); //throw X! + //if ::write() is interrupted (EINTR) right in the middle, it will return successfully with "bytesWritten < bytesToWrite"! + return bytesWritten; } + catch (const SysError& e) { throw FileError(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), e.toString()); } } +//---------------------------------------------------------------------------------------------------- -void FileOutput::finalize() //throw FileError, X -{ - flushBuffers(); //throw FileError, X - close(); //throw FileError - //~FileBase() calls this one, too, but we want to propagate errors if any -} - - -void FileOutput::reserveSpace(uint64_t expectedSize) //throw FileError +std::string zen::getFileContent(const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/) //throw FileError, X { - //NTFS: "If you set the file allocation info [...] the file contents will be forced into nonresident data, even if it would have fit inside the MFT." - if (expectedSize < 1024) //https://www.sciencedirect.com/topics/computer-science/master-file-table - return; - - //don't use ::posix_fallocate which uses horribly inefficient fallback if FS doesn't support it (EOPNOTSUPP) and changes files size! - //FALLOC_FL_KEEP_SIZE => allocate only, file size is NOT changed! - if (::fallocate(getHandle(), //int fd - FALLOC_FL_KEEP_SIZE, //int mode - 0, //off_t offset - expectedSize) != 0) //off_t len - if (errno != EOPNOTSUPP) //possible, unlike with posix_fallocate() - THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot write file %x."), L"%x", fmtPath(getFilePath())), "fallocate"); + FileInputPlain fileIn(filePath); //throw FileError, ErrorFileLocked + return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead) + { + const size_t bytesRead = fileIn.tryRead(buffer, bytesToRead); //throw FileError; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! + if (notifyUnbufferedIO) notifyUnbufferedIO(bytesRead); //throw X! + return bytesRead; + }, + fileIn.getBlockSize()); //throw FileError, X } -std::string zen::getFileContent(const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/) //throw FileError, X +void zen::setFileContent(const Zstring& filePath, const std::string& byteStream, const IoCallback& notifyUnbufferedIO /*throw X*/) //throw FileError, X { - FileInput streamIn(filePath, notifyUnbufferedIO); //throw FileError, ErrorFileLocked - return bufferedLoad<std::string>(streamIn); //throw FileError, X -} + const Zstring tmpFilePath = getPathWithTempName(filePath); + FileOutputPlain tmpFile(tmpFilePath); //throw FileError, (ErrorTargetExisting) -void zen::setFileContent(const Zstring& filePath, const std::string& byteStream, const IoCallback& notifyUnbufferedIO /*throw X*/) //throw FileError, X -{ - TempFileOutput fileOut(filePath, notifyUnbufferedIO); //throw FileError - if (!byteStream.empty()) + tmpFile.reserveSpace(byteStream.size()); //throw FileError + + unbufferedSave(byteStream, [&](const void* buffer, size_t bytesToWrite) { - //preallocate disk space & reduce fragmentation - fileOut.reserveSpace(byteStream.size()); //throw FileError - fileOut.write(&byteStream[0], byteStream.size()); //throw FileError, X - } - fileOut.commit(); //throw FileError, X + const size_t bytesWritten = tmpFile.tryWrite(buffer, bytesToWrite); //throw FileError; may return short! CONTRACT: bytesToWrite > 0 + if (notifyUnbufferedIO) notifyUnbufferedIO(bytesWritten); //throw X! + return bytesWritten; + }, + tmpFile.getBlockSize()); //throw FileError, X + + tmpFile.close(); //throw FileError + //take over ownership: + ZEN_ON_SCOPE_FAIL( try { removeFilePlain(tmpFilePath); /*throw FileError*/ } + catch (FileError&) {}); + warn_static("log it!") + + //operation finished: move temp file transactionally + moveAndRenameItem(tmpFilePath, filePath, true /*replaceExisting*/); //throw FileError, (ErrorMoveUnsupported), (ErrorTargetExisting) } diff --git a/zen/file_io.h b/zen/file_io.h index f1e4200d..46ffa843 100644 --- a/zen/file_io.h +++ b/zen/file_io.h @@ -8,7 +8,7 @@ #define FILE_IO_H_89578342758342572345 #include "file_access.h" -//#include "serialize.h" +#include "serialize.h" #include "crc.h" #include "guid.h" @@ -17,7 +17,7 @@ namespace zen { const char LINE_BREAK[] = "\n"; //since OS X Apple uses newline, too -/* OS-buffered file IO optimized for +/* OS-buffered file I/O: - sequential read/write accesses - better error reporting - long path support @@ -30,17 +30,21 @@ public: FileHandle getHandle() { return hFile_; } - //Windows: use 64kB ?? https://docs.microsoft.com/en-us/previous-versions/windows/it-pro/windows-2000-server/cc938632%28v=technet.10%29 - //macOS, Linux: use st_blksize? - static size_t getBlockSize() { return 128 * 1024; }; - const Zstring& getFilePath() const { return filePath_; } + size_t getBlockSize(); //throw FileError + + static constexpr size_t defaultBlockSize = 256 * 1024; + + void close(); //throw FileError -> good place to catch errors when closing stream, otherwise called in ~FileBase()! + + const struct stat& getStatBuffered(); //throw FileError + protected: FileBase(FileHandle handle, const Zstring& filePath) : hFile_(handle), filePath_(filePath) {} ~FileBase(); - void close(); //throw FileError -> optional, but good place to catch errors when closing stream! + void setStatBuffered(const struct stat& fileInfo) { statBuf_ = fileInfo; } private: FileBase (const FileBase&) = delete; @@ -48,88 +52,125 @@ private: FileHandle hFile_ = invalidFileHandle; const Zstring filePath_; + size_t blockSizeBuf_ = 0; + std::optional<struct stat> statBuf_; }; //----------------------------------------------------------------------------------------------- -class FileInput : public FileBase +class FileInputPlain : public FileBase { public: - FileInput( const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/); //throw FileError, ErrorFileLocked - FileInput(FileHandle handle, const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/); //takes ownership! + FileInputPlain( const Zstring& filePath); //throw FileError, ErrorFileLocked + FileInputPlain(FileHandle handle, const Zstring& filePath); //takes ownership! - size_t read(void* buffer, size_t bytesToRead); //throw FileError, ErrorFileLocked, X; return "bytesToRead" bytes unless end of stream! + //may return short, only 0 means EOF! CONTRACT: bytesToRead > 0! + size_t tryRead(void* buffer, size_t bytesToRead); //throw FileError, ErrorFileLocked private: - size_t tryRead(void* buffer, size_t bytesToRead); //throw FileError, ErrorFileLocked; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! - - const IoCallback notifyUnbufferedIO_; //throw X - - std::vector<std::byte> memBuf_ = std::vector<std::byte>(getBlockSize()); - size_t bufPos_ = 0; - size_t bufPosEnd_= 0; + FileInputPlain(const std::pair<FileBase::FileHandle, struct stat>& fileDetails, const Zstring& filePath); }; -class FileOutput : public FileBase +class FileOutputPlain : public FileBase { public: - FileOutput( const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/); //throw FileError, ErrorTargetExisting - FileOutput(FileHandle handle, const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/); //takes ownership! - ~FileOutput(); + FileOutputPlain( const Zstring& filePath); //throw FileError, ErrorTargetExisting + FileOutputPlain(FileHandle handle, const Zstring& filePath); //takes ownership! + ~FileOutputPlain(); + //preallocate disk space & reduce fragmentation void reserveSpace(uint64_t expectedSize); //throw FileError - void write(const void* buffer, size_t bytesToWrite); //throw FileError, X - void flushBuffers(); //throw FileError, X - //caveat: does NOT flush OS or hard disk buffers like e.g. FlushFileBuffers()! + //may return short! CONTRACT: bytesToWrite > 0 + size_t tryWrite(const void* buffer, size_t bytesToWrite); //throw FileError - void finalize(); /*= flushBuffers() + close()*/ //throw FileError, X + //close() when done, or else file is considered incomplete and will be deleted! private: - size_t tryWrite(const void* buffer, size_t bytesToWrite); //throw FileError; may return short! CONTRACT: bytesToWrite > 0 - - IoCallback notifyUnbufferedIO_; //throw X - std::vector<std::byte> memBuf_ = std::vector<std::byte>(getBlockSize()); - size_t bufPos_ = 0; - size_t bufPosEnd_ = 0; }; -//----------------------------------------------------------------------------------------------- -//native stream I/O convenience functions: -class TempFileOutput +//-------------------------------------------------------------------- + +namespace impl +{ +inline +auto makeTryRead(FileInputPlain& fip, const IoCallback& notifyUnbufferedIO /*throw X*/) +{ + return [&](void* buffer, size_t bytesToRead) + { + const size_t bytesRead = fip.tryRead(buffer, bytesToRead); //throw FileError, ErrorFileLocked; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! + if (notifyUnbufferedIO) notifyUnbufferedIO(bytesRead); //throw X + return bytesRead; + }; +} + + +inline +auto makeTryWrite(FileOutputPlain& fop, const IoCallback& notifyUnbufferedIO /*throw X*/) +{ + return [&](const void* buffer, size_t bytesToWrite) + { + const size_t bytesWritten = fop.tryWrite(buffer, bytesToWrite); //throw FileError + if (notifyUnbufferedIO) notifyUnbufferedIO(bytesWritten); //throw X + return bytesWritten; + }; +} +} + +//-------------------------------------------------------------------- + +class FileInputBuffered { public: - TempFileOutput( const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/) : //throw FileError - filePath_(filePath), - tmpFile_(tmpFilePath_, notifyUnbufferedIO) {} //throw FileError, (ErrorTargetExisting) + FileInputBuffered(const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/) : //throw FileError, ErrorFileLocked + fileIn_(filePath), //throw FileError, ErrorFileLocked + notifyUnbufferedIO_(notifyUnbufferedIO) {} - void reserveSpace(uint64_t expectedSize) { tmpFile_.reserveSpace(expectedSize); } //throw FileError + //return "bytesToRead" bytes unless end of stream! + size_t read(void* buffer, size_t bytesToRead) { return streamIn_.read(buffer, bytesToRead); } //throw FileError, ErrorFileLocked, X + +private: + FileInputPlain fileIn_; + const IoCallback notifyUnbufferedIO_; //throw X - void write(const void* buffer, size_t bytesToWrite) { tmpFile_.write(buffer, bytesToWrite); } //throw FileError, X + BufferedInputStream<FunctionReturnTypeT<decltype(&impl::makeTryRead)>> + streamIn_{impl::makeTryRead(fileIn_, notifyUnbufferedIO_), fileIn_.getBlockSize()}; //throw FileError +}; - FileOutput& refTempFile() { return tmpFile_; } - void commit() //throw FileError, X - { - tmpFile_.finalize(); //throw FileError, X +class FileOutputBuffered +{ +public: + FileOutputBuffered(const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/) : //throw FileError, ErrorTargetExisting + fileOut_(filePath), //throw FileError, ErrorTargetExisting + notifyUnbufferedIO_(notifyUnbufferedIO) {} - //take ownership: - ZEN_ON_SCOPE_FAIL( try { removeFilePlain(tmpFilePath_); /*throw FileError*/ } - catch (FileError&) {}); + void write(const void* buffer, size_t bytesToWrite) { streamOut_.write(buffer, bytesToWrite); } //throw FileError, X - //operation finished: move temp file transactionally - moveAndRenameItem(tmpFilePath_, filePath_, true /*replaceExisting*/); //throw FileError, (ErrorMoveUnsupported), (ErrorTargetExisting) + void finalize() //throw FileError, X + { + streamOut_.flushBuffer(); //throw FileError, X + fileOut_.close(); //throw FileError } private: - //generate (hopefully) unique file name to avoid clashing with unrelated tmp file - const Zstring filePath_; - const Zstring shortGuid_ = printNumber<Zstring>(Zstr("%04x"), static_cast<unsigned int>(getCrc16(generateGUID()))); - const Zstring tmpFilePath_ = filePath_ + Zstr('.') + shortGuid_ + Zstr(".tmp"); - FileOutput tmpFile_; + FileOutputPlain fileOut_; + const IoCallback notifyUnbufferedIO_; //throw X + + BufferedOutputStream<FunctionReturnTypeT<decltype(&impl::makeTryWrite)>> + streamOut_{impl::makeTryWrite(fileOut_, notifyUnbufferedIO_), fileOut_.getBlockSize()}; //throw FileError }; +//----------------------------------------------------------------------------------------------- + +//stream I/O convenience functions: +inline +Zstring getPathWithTempName(const Zstring& filePath) //generate (hopefully) unique file name +{ + const Zstring shortGuid_ = printNumber<Zstring>(Zstr("%04x"), static_cast<unsigned int>(getCrc16(generateGUID()))); + return filePath + Zstr('.') + shortGuid_ + Zstr(".tmp"); +} [[nodiscard]] std::string getFileContent(const Zstring& filePath, const IoCallback& notifyUnbufferedIO /*throw X*/); //throw FileError, X diff --git a/zen/file_path.cpp b/zen/file_path.cpp index f5c207f3..912d5a37 100644 --- a/zen/file_path.cpp +++ b/zen/file_path.cpp @@ -70,7 +70,7 @@ std::optional<Zstring> zen::getParentFolderPath(const Zstring& itemPath) return appendPath(pc->rootPath, beforeLast(pc->relPath, FILE_NAME_SEPARATOR, IfNotFoundReturn::none)); } - assert(false); + assert(itemPath.empty()); return std::nullopt; } diff --git a/zen/globals.h b/zen/globals.h index dd0dfed9..9e22f56b 100644 --- a/zen/globals.h +++ b/zen/globals.h @@ -10,7 +10,6 @@ #include <atomic> #include <memory> #include "scope_guard.h" -#include "legacy_compiler.h" namespace zen @@ -188,7 +187,7 @@ void registerGlobalForDestruction(CleanUpEntry& entry) static struct { PodSpinMutex spinLock; - CleanUpEntry* head; + CleanUpEntry* head = nullptr; } cleanUpList; static_assert(std::is_trivially_destructible_v<decltype(cleanUpList)>, "we must not generate code for magic statics!"); @@ -25,7 +25,7 @@ std::string generateGUID() //creates a 16-byte GUID #endif #if __GLIBC_PREREQ(2, 25) //getentropy() requires Glibc 2.25 (ldd --version) PS: CentOS 7 is on 2.17 - if (::getentropy(&guid[0], guid.size()) != 0) //"The maximum permitted value for the length argument is 256" + if (::getentropy(guid.data(), guid.size()) != 0) //"The maximum permitted value for the length argument is 256" throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Failed to generate GUID." + "\n\n" + utfTo<std::string>(formatSystemError("getentropy", errno))); #else @@ -58,7 +58,7 @@ std::string generateGUID() //creates a 16-byte GUID const int fd_ = ::open("/dev/urandom", O_RDONLY | O_CLOEXEC); }; thread_local RandomGeneratorPosix gen; - gen.getBytes(&guid[0], guid.size()); + gen.getBytes(guid.data(), guid.size()); #endif return guid; diff --git a/zen/http.cpp b/zen/http.cpp index a26bb3a5..ee71e5b3 100644 --- a/zen/http.cpp +++ b/zen/http.cpp @@ -8,12 +8,18 @@ #include <libcurl/curl_wrap.h> //DON'T include <curl/curl.h> directly! #include "stream_buffer.h" - #include "thread.h" using namespace zen; + const int HTTP_ACCESS_TIME_OUT_SEC = 20; +const size_t HTTP_BLOCK_SIZE_DOWNLOAD = 64 * 1024; //libcurl returns blocks of only 16 kB as returned by recv() even if we request larger blocks via CURLOPT_BUFFERSIZE +//- InternetReadFile() is buffered + prefetching +//- libcurl returns blocks of only 16 kB as returned by recv() even if we request larger blocks via CURLOPT_BUFFERSIZE + const size_t HTTP_STREAM_BUFFER_SIZE = 1024 * 1024; //unit: [byte] + //stream buffer should be big enough to facilitate prefetching during alternating read/write operations => e.g. see serialize.h::unbufferedStreamCopy() + @@ -23,16 +29,14 @@ public: Impl(const Zstring& url, const std::string* postBuf, //issue POST if bound, GET otherwise const std::string& contentType, //required for POST + const IoCallback& onPostBytesSent /*throw X*/, bool disableGetCache, //not relevant for POST (= never cached) const Zstring& userAgent, - const Zstring& caCertFilePath, //optional: enable certificate validation - const IoCallback& notifyUnbufferedIO /*throw X*/) : //throw SysError, X - notifyUnbufferedIO_(notifyUnbufferedIO) + const Zstring& caCertFilePath /*optional: enable certificate validation*/) //throw SysError, X { ZEN_ON_SCOPE_FAIL(cleanup()); //destructor call would lead to member double clean-up!!! - //may be sending large POST: call back first - if (notifyUnbufferedIO_) notifyUnbufferedIO_(0); //throw X + assert(postBuf || !onPostBytesSent); const Zstring urlFmt = afterFirst(url, Zstr("://"), IfNotFoundReturn::none); const Zstring server = beforeFirst(urlFmt, Zstr('/'), IfNotFoundReturn::all); @@ -61,12 +65,14 @@ public: auto promiseHeader = std::make_shared<std::promise<std::string>>(); std::future<std::string> futHeader = promiseHeader->get_future(); - worker_ = InterruptibleThread([asyncStreamOut = this->asyncStreamIn_, promiseHeader, headers = std::move(headers), + auto postBytesSent = std::make_shared<std::atomic<int64_t>>(0); + + worker_ = InterruptibleThread([asyncStreamOut = this->asyncStreamIn_, promiseHeader, headers = std::move(headers), postBytesSent, server, useTls, caCertFilePath, userAgent = utfTo<std::string>(userAgent), postBuf = postBuf ? std::optional<std::string>(*postBuf) : std::nullopt, //[!] life-time! serverRelPath = utfTo<std::string>(page)] { - setCurrentThreadName(Zstr("HttpInputStream ") + server); + setCurrentThreadName(Zstr("Istream ") + server); bool headerReceived = false; try @@ -77,13 +83,22 @@ public: std::vector<CurlOption> extraOptions {{CURLOPT_USERAGENT, userAgent.c_str()}}; //CURLOPT_FOLLOWLOCATION already off by default :) + + + std::function<size_t(std::span<char> buf)> readRequest; if (postBuf) { - extraOptions.emplace_back(CURLOPT_POSTFIELDS, postBuf->c_str()); - extraOptions.emplace_back(CURLOPT_POSTFIELDSIZE_LARGE, postBuf->size()); //postBuf not necessarily null-terminated! + readRequest = [&, postBufStream{MemoryStreamIn(*postBuf)}](std::span<char> buf) mutable + { + const size_t bytesRead = postBufStream.read(buf.data(), buf.size()); + *postBytesSent += bytesRead; + return bytesRead; + }; + extraOptions.emplace_back(CURLOPT_POST, 1); + extraOptions.emplace_back(CURLOPT_POSTFIELDSIZE_LARGE, postBuf->size()); //avoid HTTP chunked transfer encoding? } - //carefully with these callbacks! First receive HTTP header without blocking, + //careful with these callbacks! First receive HTTP header without blocking, //and only then allow AsyncStreamBuffer::write() which can block! std::string headerBuf; @@ -109,13 +124,13 @@ public: if (!headerReceived) throw SysError(L"Received HTTP body without header."); - return asyncStreamOut->write(buf.data(), buf.size()); //throw ThreadStopRequest + asyncStreamOut->write(buf.data(), buf.size()); //throw ThreadStopRequest }; httpSession.perform(serverRelPath, curlHeaders, extraOptions, writeResponse /*throw ThreadStopRequest*/, - nullptr /*readRequest*/, + readRequest, onHeaderData /*throw SysError*/, HTTP_ACCESS_TIME_OUT_SEC); //throw SysError, ThreadStopRequest @@ -133,6 +148,19 @@ public: } }); + //------------------------------------------------------------------------------------ + if (postBuf && onPostBytesSent) + { + int64_t bytesReported = 0; + while (futHeader.wait_for(std::chrono::milliseconds(50)) == std::future_status::timeout) + { + const int64_t bytesDelta = *postBytesSent /*atomic shared access!*/- bytesReported; + bytesReported += bytesDelta; + onPostBytesSent(bytesDelta); //throw X + } + } + //------------------------------------------------------------------------------------ + const std::string headBuf = futHeader.get(); //throw SysError //parse header: https://www.w3.org/Protocols/HTTP/1.0/spec.html#Request-Line const std::string& statusBuf = beforeFirst(headBuf, "\r\n", IfNotFoundReturn::all); @@ -151,9 +179,6 @@ public: /* let's NOT consider "Content-Length" header: - may be unavailable ("Transfer-Encoding: chunked") - may refer to compressed data size ("Content-Encoding: gzip") */ - - //let's not get too finicky: at least report the logical amount of bytes sent/received (excluding HTTP headers) - if (notifyUnbufferedIO_) notifyUnbufferedIO_(postBuf ? postBuf->size() : 0); //throw X } ~Impl() { cleanup(); } @@ -166,39 +191,28 @@ public: return it != responseHeaders_.end() ? &it->second : nullptr; } - size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream! + size_t getBlockSize() const { return HTTP_BLOCK_SIZE_DOWNLOAD; } + + size_t tryRead(void* buffer, size_t bytesToRead) //throw SysError; may return short; only 0 means EOF! CONTRACT: bytesToRead > 0! { - const size_t bytesRead = asyncStreamIn_->read(buffer, bytesToRead); //throw SysError - reportBytesProcessed(); //throw X - return bytesRead; + return asyncStreamIn_->tryRead(buffer, bytesToRead); //throw SysError //no need for asyncStreamIn_->checkWriteErrors(): once end of stream is reached, asyncStreamOut->closeStream() was called => no errors occured } - size_t getBlockSize() const { return 64 * 1024; } - private: Impl (const Impl&) = delete; Impl& operator=(const Impl&) = delete; - void reportBytesProcessed() //throw X - { - const int64_t totalBytesDownloaded = asyncStreamIn_->getTotalBytesWritten(); - if (notifyUnbufferedIO_) notifyUnbufferedIO_(totalBytesDownloaded - totalBytesReported_); //throw X - totalBytesReported_ = totalBytesDownloaded; - } - void cleanup() { asyncStreamIn_->setReadError(std::make_exception_ptr(ThreadStopRequest())); + warn_static("log on error!") } - std::shared_ptr<AsyncStreamBuffer> asyncStreamIn_ = std::make_shared<AsyncStreamBuffer>(512 * 1024); + std::shared_ptr<AsyncStreamBuffer> asyncStreamIn_ = std::make_shared<AsyncStreamBuffer>(HTTP_STREAM_BUFFER_SIZE); InterruptibleThread worker_; - int64_t totalBytesReported_ = 0; int statusCode_ = 0; std::unordered_map<std::string, std::string, StringHashAsciiNoCase, StringEqualAsciiNoCase> responseHeaders_; - - const IoCallback notifyUnbufferedIO_; //throw X }; @@ -206,11 +220,20 @@ HttpInputStream::HttpInputStream(std::unique_ptr<Impl>&& pimpl) : pimpl_(std::mo HttpInputStream::~HttpInputStream() {} -size_t HttpInputStream::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X; return "bytesToRead" bytes unless end of stream! +size_t HttpInputStream::tryRead(void* buffer, size_t bytesToRead) { return pimpl_->tryRead(buffer, bytesToRead); } size_t HttpInputStream::getBlockSize() const { return pimpl_->getBlockSize(); } -std::string HttpInputStream::readAll() { return bufferedLoad<std::string>(*pimpl_); } //throw SysError, X +std::string HttpInputStream::readAll(const IoCallback& notifyUnbufferedIO /*throw X*/) //throw SysError, X +{ + return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead) + { + const size_t bytesRead = pimpl_->tryRead(buffer, bytesToRead); //throw SysError; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! + if (notifyUnbufferedIO) notifyUnbufferedIO(bytesRead); //throw X! + return bytesRead; + }, + pimpl_->getBlockSize()); //throw SysError, X +} namespace @@ -218,15 +241,16 @@ namespace std::unique_ptr<HttpInputStream::Impl> sendHttpRequestImpl(const Zstring& url, const std::string* postBuf /*issue POST if bound, GET otherwise*/, const std::string& contentType, //required for POST + const IoCallback& onPostBytesSent /*throw X*/, const Zstring& userAgent, - const Zstring& caCertFilePath /*optional: enable certificate validation*/, - const IoCallback& notifyUnbufferedIO) //throw SysError, X + const Zstring& caCertFilePath /*optional: enable certificate validation*/) //throw SysError, X { Zstring urlRed = url; //"A user agent should not automatically redirect a request more than five times, since such redirections usually indicate an infinite loop." for (int redirects = 0; redirects < 6; ++redirects) { - auto response = std::make_unique<HttpInputStream::Impl>(urlRed, postBuf, contentType, false /*disableGetCache*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X + auto response = std::make_unique<HttpInputStream::Impl>(urlRed, postBuf, contentType, onPostBytesSent, false /*disableGetCache*/, + userAgent, caCertFilePath); //throw SysError, X //https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#3xx_Redirection const int httpStatus = response->getStatusCode(); @@ -319,24 +343,30 @@ std::vector<std::pair<std::string, std::string>> zen::xWwwFormUrlDecode(const st } -HttpInputStream zen::sendHttpGet(const Zstring& url, const Zstring& userAgent, const Zstring& caCertFilePath, const IoCallback& notifyUnbufferedIO) //throw SysError, X +HttpInputStream zen::sendHttpGet(const Zstring& url, const Zstring& userAgent, const Zstring& caCertFilePath) //throw SysError { - return sendHttpRequestImpl(url, nullptr /*postBuf*/, "" /*contentType*/, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X, X + return sendHttpRequestImpl(url, nullptr /*postBuf*/, "" /*contentType*/, nullptr /*onPostBytesSent*/, userAgent, caCertFilePath); //throw SysError } HttpInputStream zen::sendHttpPost(const Zstring& url, const std::vector<std::pair<std::string, std::string>>& postParams, - const Zstring& userAgent, const Zstring& caCertFilePath, const IoCallback& notifyUnbufferedIO) //throw SysError, X + const IoCallback& notifyUnbufferedIO /*throw X*/, + const Zstring& userAgent, + const Zstring& caCertFilePath) //throw SysError, X { - return sendHttpPost(url, xWwwFormUrlEncode(postParams), "application/x-www-form-urlencoded", userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X + return sendHttpPost(url, xWwwFormUrlEncode(postParams), "application/x-www-form-urlencoded", notifyUnbufferedIO, userAgent, caCertFilePath); //throw SysError, X } -HttpInputStream zen::sendHttpPost(const Zstring& url, const std::string& postBuf, const std::string& contentType, - const Zstring& userAgent, const Zstring& caCertFilePath, const IoCallback& notifyUnbufferedIO) //throw SysError, X +HttpInputStream zen::sendHttpPost(const Zstring& url, + const std::string& postBuf, + const std::string& contentType, + const IoCallback& notifyUnbufferedIO /*throw X*/, + const Zstring& userAgent, + const Zstring& caCertFilePath) //throw SysError, X { - return sendHttpRequestImpl(url, &postBuf, contentType, userAgent, caCertFilePath, notifyUnbufferedIO); //throw SysError, X + return sendHttpRequestImpl(url, &postBuf, contentType, notifyUnbufferedIO, userAgent, caCertFilePath); //throw SysError, X } @@ -347,10 +377,10 @@ bool zen::internetIsAlive() //noexcept auto response = std::make_unique<HttpInputStream::Impl>(Zstr("https://www.google.com/"), //https more appropriate than http for testing? (different ports!) nullptr /*postParams*/, "" /*contentType*/, + nullptr /*onPostBytesSent*/, true /*disableGetCache*/, Zstr("FreeFileSync"), - Zstring() /*caCertFilePath*/, - nullptr /*notifyUnbufferedIO*/); //throw SysError + Zstring() /*caCertFilePath*/); //throw SysError const int statusCode = response->getStatusCode(); //attention: google.com might redirect to https://consent.google.com => don't follow, just return "true"!!! @@ -12,17 +12,18 @@ namespace zen { -/* - thread-safe! (Window/Linux/macOS) - - Linux/macOS: init libcurl before use! */ +/* - Linux/macOS: init libcurl before use! + - safe to use on worker thread */ class HttpInputStream { public: - //support zen/serialize.h buffered input stream concept - size_t read(void* buffer, size_t bytesToRead); //throw SysError, X; return "bytesToRead" bytes unless end of stream! - std::string readAll(); //throw SysError, X + //zen/serialize.h unbuffered input stream concept: + size_t tryRead(void* buffer, size_t bytesToRead); //throw SysError; may return short; only 0 means EOF! CONTRACT: bytesToRead > 0! size_t getBlockSize() const; + std::string readAll(const IoCallback& notifyUnbufferedIO /*throw X*/); //throw SysError, X + class Impl; HttpInputStream(std::unique_ptr<Impl>&& pimpl); HttpInputStream(HttpInputStream&&) noexcept = default; @@ -35,20 +36,17 @@ private: HttpInputStream sendHttpGet(const Zstring& url, const Zstring& userAgent, - const Zstring& caCertFilePath /*optional: enable certificate validation*/, - const IoCallback& notifyUnbufferedIO /*throw X*/); //throw SysError, X + const Zstring& caCertFilePath /*optional: enable certificate validation*/); //throw SysError HttpInputStream sendHttpPost(const Zstring& url, - const std::vector<std::pair<std::string, std::string>>& postParams, + const std::vector<std::pair<std::string, std::string>>& postParams, const IoCallback& notifyUnbufferedIO /*throw X*/, const Zstring& userAgent, - const Zstring& caCertFilePath /*optional: enable certificate validation*/, - const IoCallback& notifyUnbufferedIO /*throw X*/); //throw SysError, X + const Zstring& caCertFilePath /*optional: enable certificate validation*/); //throw SysError, X HttpInputStream sendHttpPost(const Zstring& url, - const std::string& postBuf, const std::string& contentType, + const std::string& postBuf, const std::string& contentType, const IoCallback& notifyUnbufferedIO /*throw X*/, const Zstring& userAgent, - const Zstring& caCertFilePath /*optional: enable certificate validation*/, - const IoCallback& notifyUnbufferedIO /*throw X*/); //throw SysError, X + const Zstring& caCertFilePath /*optional: enable certificate validation*/); //throw SysError, X bool internetIsAlive(); //noexcept std::wstring formatHttpError(int httpStatus); @@ -7,8 +7,6 @@ #ifndef I18_N_H_3843489325044253425456 #define I18_N_H_3843489325044253425456 -//#include <string> -//#include <cstdint> #include "globals.h" #include "string_tools.h" #include "format_unit.h" diff --git a/zen/open_ssl.cpp b/zen/open_ssl.cpp index 6dc13d3d..4494b76f 100644 --- a/zen/open_ssl.cpp +++ b/zen/open_ssl.cpp @@ -39,6 +39,8 @@ void zen::openSslInit() //excplicitly init OpenSSL on main thread: seems to initialize atomically! But it still might help to avoid issues: [[maybe_unused]] const int rv = ::OPENSSL_init_ssl(OPENSSL_INIT_SSL_DEFAULT | OPENSSL_INIT_NO_LOAD_CONFIG, nullptr); assert(rv == 1); //https://www.openssl.org/docs/man1.1.0/ssl/OPENSSL_init_ssl.html + + warn_static("probably should log") } @@ -240,7 +242,7 @@ std::string keyToStream(const EVP_PKEY* evp, RsaStreamType streamType, bool publ std::string keyStream(keyLen, '\0'); - if (::BIO_read(bio, &keyStream[0], keyLen) != keyLen) + if (::BIO_read(bio, keyStream.data(), keyLen) != keyLen) throw SysError(formatLastOpenSSLError("BIO_read")); return keyStream; } @@ -304,7 +306,7 @@ std::string keyToStream(const EVP_PKEY* evp, RsaStreamType streamType, bool publ std::string keyStream(keyLen, '\0'); - if (::BIO_read(bio, &keyStream[0], keyLen) != keyLen) + if (::BIO_read(bio, keyStream.data(), keyLen) != keyLen) throw SysError(formatLastOpenSSLError("BIO_read")); return keyStream; #endif @@ -354,9 +356,9 @@ std::string createSignature(const std::string& message, EVP_PKEY* privateKey) // std::string signature(sigLenMax, '\0'); size_t sigLen = sigLenMax; - if (::EVP_DigestSignFinal(mdctx, //EVP_MD_CTX* ctx - reinterpret_cast<unsigned char*>(&signature[0]), //unsigned char* sigret - &sigLen) != 1) //size_t* siglen + if (::EVP_DigestSignFinal(mdctx, //EVP_MD_CTX* ctx + reinterpret_cast<unsigned char*>(signature.data()), //unsigned char* sigret + &sigLen) != 1) //size_t* siglen throw SysError(formatLastOpenSSLError("EVP_DigestSignFinal")); signature.resize(sigLen); @@ -499,8 +501,8 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: const auto block2 = std::string("\0\0\0\1", 4) + passphrase; unsigned char key[2 * SHA_DIGEST_LENGTH] = {}; - ::SHA1(reinterpret_cast<const unsigned char*>(block1.c_str()), block1.size(), &key[0]); //no-fail - ::SHA1(reinterpret_cast<const unsigned char*>(block2.c_str()), block2.size(), &key[SHA_DIGEST_LENGTH]); // + ::SHA1(reinterpret_cast<const unsigned char*>(block1.c_str()), block1.size(), key); //no-fail + ::SHA1(reinterpret_cast<const unsigned char*>(block2.c_str()), block2.size(), key + SHA_DIGEST_LENGTH); // EVP_CIPHER_CTX* cipCtx = ::EVP_CIPHER_CTX_new(); if (!cipCtx) @@ -522,7 +524,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: int decLen1 = 0; if (::EVP_DecryptUpdate(cipCtx, //EVP_CIPHER_CTX* ctx - reinterpret_cast<unsigned char*>(&privateBlob[0]), //unsigned char* out + reinterpret_cast<unsigned char*>(privateBlob.data()), //unsigned char* out &decLen1, //int* outl reinterpret_cast<const unsigned char*>(privateBlobEnc.c_str()), //const unsigned char* in static_cast<int>(privateBlobEnc.size())) != 1) //int inl @@ -543,7 +545,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: macKeyBlob += passphrase; unsigned char macKey[SHA_DIGEST_LENGTH] = {}; - ::SHA1(reinterpret_cast<const unsigned char*>(macKeyBlob.c_str()), macKeyBlob.size(), &macKey[0]); //no-fail + ::SHA1(reinterpret_cast<const unsigned char*>(macKeyBlob.c_str()), macKeyBlob.size(), macKey); //no-fail auto numToBeString = [](size_t n) -> std::string { @@ -607,7 +609,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: { const std::string bytes = extractString(it, itEnd); - BIGNUM* bn = ::BN_bin2bn(reinterpret_cast<const unsigned char*>(&bytes[0]), static_cast<int>(bytes.size()), nullptr); + BIGNUM* bn = ::BN_bin2bn(reinterpret_cast<const unsigned char*>(bytes.c_str()), static_cast<int>(bytes.size()), nullptr); if (!bn) throw SysError(formatLastOpenSSLError("BN_bin2bn")); return std::unique_ptr<BIGNUM, BnFree>(bn); @@ -809,7 +811,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: if (::OSSL_PARAM_BLD_push_utf8_string(paramBld, OSSL_PKEY_PARAM_GROUP_NAME, groupName, 0) != 1) throw SysError(formatLastOpenSSLError("OSSL_PARAM_BLD_push_utf8_string(group)")); - if (::OSSL_PARAM_BLD_push_octet_string(paramBld, OSSL_PKEY_PARAM_PUB_KEY, &pointStream[0], pointStream.size()) != 1) + if (::OSSL_PARAM_BLD_push_octet_string(paramBld, OSSL_PKEY_PARAM_PUB_KEY, pointStream.data(), pointStream.size()) != 1) throw SysError(formatLastOpenSSLError("OSSL_PARAM_BLD_push_octet_string(pub)")); if (::OSSL_PARAM_BLD_push_BN(paramBld, OSSL_PKEY_PARAM_PRIV_KEY, pri.get()) != 1) @@ -861,7 +863,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: if (::EC_POINT_oct2point(ecGroup, //const EC_GROUP* group ecPoint, //EC_POINT* p - reinterpret_cast<const unsigned char*>(&pointStream[0]), //const unsigned char* buf + reinterpret_cast<const unsigned char*>(pointStream.c_str()), //const unsigned char* buf pointStream.size(), //size_t len nullptr) != 1) //BN_CTX* ctx throw SysError(formatLastOpenSSLError("EC_POINT_oct2point")); @@ -890,7 +892,7 @@ std::string zen::convertPuttyKeyToPkix(const std::string& keyStream, const std:: EVP_PKEY* evpPriv = ::EVP_PKEY_new_raw_private_key(EVP_PKEY_ED25519, //int type nullptr, //ENGINE* e - reinterpret_cast<const unsigned char*>(&priStream[0]), //const unsigned char* priv + reinterpret_cast<const unsigned char*>(priStream.c_str()), //const unsigned char* priv priStream.size()); //size_t len if (!evpPriv) throw SysError(formatLastOpenSSLError("EVP_PKEY_new_raw_private_key")); @@ -96,14 +96,12 @@ public: void showResult() { - const bool wasRunning = !watch_.isPaused(); - if (wasRunning) watch_.pause(); //don't include call to MessageBox()! - ZEN_ON_SCOPE_EXIT(if (wasRunning) watch_.resume()); - const int64_t timeMs = std::chrono::duration_cast<std::chrono::milliseconds>(watch_.elapsed()).count(); const std::string msg = numberTo<std::string>(timeMs) + " ms"; - std::clog << "Perf: duration: " << msg << '\n'; + std::clog << "Perf: duration: " << msg + '\n'; resultShown_ = true; + + watch_ = StopWatch(watch_.isPaused()); } private: diff --git a/zen/process_exec.cpp b/zen/process_exec.cpp index fb691151..a2c02eb0 100644 --- a/zen/process_exec.cpp +++ b/zen/process_exec.cpp @@ -122,7 +122,7 @@ std::pair<int /*exit code*/, std::string> processExecuteImpl(const Zstring& file argv.push_back(arg.c_str()); argv.push_back(nullptr); - /*int rv =*/::execv(argv[0], const_cast<char**>(&argv[0])); //only returns if an error occurred + /*int rv =*/::execv(argv[0], const_cast<char**>(argv.data())); //only returns if an error occurred //safe to cast away const: https://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html // "The statement about argv[] and envp[] being constants is included to make explicit to future // writers of language bindings that these objects are completely constant. Due to a limitation of @@ -206,8 +206,13 @@ std::pair<int /*exit code*/, std::string> processExecuteImpl(const Zstring& file THROW_LAST_SYS_ERROR("lseek"); guardTmpFile.dismiss(); - FileInput streamIn(fdTempFile, tempFilePath, nullptr /*notifyUnbufferedIO*/); //takes ownership! - std::string output = bufferedLoad<std::string>(streamIn); //throw FileError + FileInputPlain streamIn(fdTempFile, tempFilePath); //takes ownership! + + std::string output = unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead) + { + return streamIn.tryRead(buffer, bytesToRead); //throw FileError; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0! + }, + streamIn.getBlockSize()); //throw FileError if (!WIFEXITED(statusCode)) //signalled, crashed? throw SysError(formatSystemError("waitpid", WIFSIGNALED(statusCode) ? @@ -220,7 +225,7 @@ std::pair<int /*exit code*/, std::string> processExecuteImpl(const Zstring& file exitCode == 127) //details should have been streamed to STDERR: used by /bin/sh, e.g. failure to execute due to missing .so file throw SysError(utfTo<std::wstring>(trimCpy(output))); - return {exitCode, output}; + return {exitCode, std::move(output)}; } } diff --git a/zen/recycler.cpp b/zen/recycler.cpp index 4448fd60..26c27952 100644 --- a/zen/recycler.cpp +++ b/zen/recycler.cpp @@ -5,9 +5,7 @@ // ***************************************************************************** #include "recycler.h" -#include "file_access.h" - #include <sys/stat.h> #include <gio/gio.h> #include "scope_guard.h" @@ -17,7 +15,7 @@ using namespace zen; //*INDENT-OFF* -bool zen::recycleOrDeleteIfExists(const Zstring& itemPath) //throw FileError +void zen::moveToRecycleBin(const Zstring& itemPath) //throw FileError, RecycleBinUnavailable { GFile* file = ::g_file_new_for_path(itemPath.c_str()); //never fails according to docu ZEN_ON_SCOPE_EXIT(g_object_unref(file);) @@ -27,10 +25,6 @@ bool zen::recycleOrDeleteIfExists(const Zstring& itemPath) //throw FileError if (!::g_file_trash(file, nullptr, &error)) { - const std::optional<ItemType> type = itemStillExists(itemPath); //throw FileError - if (!type) - return false; - /* g_file_trash() can fail with different error codes/messages when trash is unavailable: Debian 8 (GLib 2.42): G_IO_ERROR_NOT_SUPPORTED: Unable to find or create trash directory CentOS 7 (GLib 2.56): G_IO_ERROR_FAILED: Unable to find or create trash directory for file.txt => localized! >:( @@ -42,7 +36,7 @@ bool zen::recycleOrDeleteIfExists(const Zstring& itemPath) //throw FileError //yes, the following is a cluster fuck, but what can you do? (error->code == G_IO_ERROR_FAILED && [&] { - for (const char* msgLoc : //translations from https://gitlab.gnome.org/GNOME/glib/-/tree/master/po + for (const char* msgLoc : //translations from https://gitlab.gnome.org/GNOME/glib/-/tree/main/po { "Unable to find or create trash directory for", "No s'ha pogut trobar o crear el directori de la paperera per", @@ -100,35 +94,12 @@ bool zen::recycleOrDeleteIfExists(const Zstring& itemPath) //throw FileError return false; }())); - if (trashUnavailable) //implement same behavior as on Windows: if recycler is not existing, delete permanently - { - if (*type == ItemType::folder) - removeDirectoryPlainRecursion(itemPath); //throw FileError - else - removeFilePlain(itemPath); //throw FileError - return true; - } + if (trashUnavailable) + throw RecycleBinUnavailable(replaceCpy(_("The recycle bin is not available for %x."), L"%x", fmtPath(itemPath)), + formatGlibError("g_file_trash", error)); throw FileError(replaceCpy(_("Unable to move %x to the recycle bin."), L"%x", fmtPath(itemPath)), formatGlibError("g_file_trash", error)); } - return true; } //*INDENT-ON* - - -/* We really need access to a similar function to check whether a directory supports trashing and emit a warning if it does not! - - The following function looks perfect, alas it is restricted to local files and to the implementation of GIO only: - - gboolean _g_local_file_has_trash_dir(const char* dirpath, dev_t dir_dev); - See: http://www.netmite.com/android/mydroid/2.0/external/bluetooth/glib/gio/glocalfileinfo.h - - Just checking for "G_FILE_ATTRIBUTE_ACCESS_CAN_TRASH" is not correct, since we find in - http://www.netmite.com/android/mydroid/2.0/external/bluetooth/glib/gio/glocalfileinfo.c - - g_file_info_set_attribute_boolean (info, G_FILE_ATTRIBUTE_ACCESS_CAN_TRASH, - writable && parent_info->has_trash_dir); - - => We're NOT interested in whether the specified folder can be trashed, but whether it supports thrashing its child elements! (Only support, not actual write access!) - This renders G_FILE_ATTRIBUTE_ACCESS_CAN_TRASH useless for this purpose. */ diff --git a/zen/recycler.h b/zen/recycler.h index deb03a1c..79771321 100644 --- a/zen/recycler.h +++ b/zen/recycler.h @@ -18,23 +18,17 @@ namespace zen |Recycle Bin Access| -------------------- - Windows - ------- - -> Recycler API (IFileOperation) always available - -> COM needs to be initialized before calling any of these functions! CoInitializeEx/CoUninitialize + Windows: -> Recycler API (IFileOperation) always available + -> COM needs to be initialized before calling any of these functions! CoInitializeEx/CoUninitialize - Linux - ----- - Compiler flags: `pkg-config --cflags gio-2.0` - Linker flags: `pkg-config --libs gio-2.0` + Linux: Compiler flags: `pkg-config --cflags gio-2.0` + Linker flags: `pkg-config --libs gio-2.0` - Already included in package "gtk+-2.0"! */ - - -//move a file or folder to Recycle Bin (deletes permanently if recycler is not available) -> crappy semantics, but we have no choice thanks to Windows' design -bool recycleOrDeleteIfExists(const Zstring& itemPath); //throw FileError, return "true" if file/dir was actually deleted + Already included in package "gtk+-2.0"! */ +//fails if item is not existing (anymore) +void moveToRecycleBin(const Zstring& itemPath); //throw FileError, RecycleBinUnavailable } #endif //RECYCLER_H_18345067341545 diff --git a/zen/ring_buffer.h b/zen/ring_buffer.h index dfbb6493..f6cc3e5f 100644 --- a/zen/ring_buffer.h +++ b/zen/ring_buffer.h @@ -13,7 +13,7 @@ namespace zen { -//basically a std::deque<> but with a non-garbage implementation => circular buffer with std::vector<>-like exponential growth! +//like std::deque<> but with a non-garbage implementation: circular buffer with std::vector<>-like exponential growth! //https://stackoverflow.com/questions/39324192/why-is-an-stl-deque-not-implemented-as-just-a-circular-vector template <class T> @@ -34,8 +34,9 @@ public: using reference = T&; using const_reference = const T&; - size_t size() const { return size_; } - bool empty() const { return size_ == 0; } + size_t size () const { return size_; } + size_t capacity() const { return capacity_; } + bool empty () const { return size_ == 0; } reference front() { checkInvariants(); assert(!empty()); return getBufPtr()[bufStart_]; } const_reference front() const { checkInvariants(); assert(!empty()); return getBufPtr()[bufStart_]; } @@ -139,13 +140,13 @@ public: std::swap(size_, other.size_); } - void reserve(size_t minSize) //throw ? (strong exception-safety!) + void reserve(size_t minCapacity) //throw ? (strong exception-safety!) { checkInvariants(); - if (minSize > capacity_) + if (minCapacity > capacity_) { - const size_t newCapacity = std::max(minSize + minSize / 2, minSize); //no minimum capacity: just like std::vector<> implementation + const size_t newCapacity = std::max(minCapacity + minCapacity / 2, minCapacity); //no lower limit for capacity: just like std::vector<> RingBuffer newBuf(newCapacity); //throw ? @@ -184,13 +185,15 @@ public: Iterator& operator++() { ++offset_; return *this; } Iterator& operator--() { --offset_; return *this; } Iterator& operator+=(ptrdiff_t offset) { offset_ += offset; return *this; } - inline friend bool operator==(const Iterator& lhs, const Iterator& rhs) { assert(lhs.container_ == rhs.container_); return lhs.offset_ == rhs.offset_; } - inline friend ptrdiff_t operator-(const Iterator& lhs, const Iterator& rhs) { return lhs.offset_ - rhs.offset_; } - inline friend Iterator operator+(const Iterator& lhs, ptrdiff_t offset) { Iterator tmp(lhs); return tmp += offset; } Value& operator* () const { return (*container_)[offset_]; } Value* operator->() const { return &(*container_)[offset_]; } + inline friend Iterator operator+(const Iterator& lhs, ptrdiff_t offset) { Iterator tmp(lhs); return tmp += offset; } + inline friend ptrdiff_t operator-(const Iterator& lhs, const Iterator& rhs) { return lhs.offset_ - rhs.offset_; } + inline friend bool operator==(const Iterator& lhs, const Iterator& rhs) { assert(lhs.container_ == rhs.container_); return lhs.offset_ == rhs.offset_; } + inline friend std::strong_ordering operator<=>(const Iterator& lhs, const Iterator& rhs) { assert(lhs.container_ == rhs.container_); return lhs.offset_ <=> rhs.offset_; } + //GCC debug needs "operator<=" private: - Container* container_ = nullptr; + Container* container_ = nullptr; //iterator must be assignable ptrdiff_t offset_ = 0; }; @@ -214,8 +217,6 @@ private: rawMem_(static_cast<std::byte*>(::operator new (capacity * sizeof(T)))), //throw std::bad_alloc capacity_(capacity) {} - struct FreeStoreDelete { void operator()(std::byte* p) const { ::operator delete (p); } }; - /**/ T* getBufPtr() { return reinterpret_cast<T*>(rawMem_.get()); } const T* getBufPtr() const { return reinterpret_cast<T*>(rawMem_.get()); } @@ -227,20 +228,23 @@ private: static T* uninitializedMoveIfNoexcept(T* first, T* last, T* firstTrg, std::true_type ) { return std::uninitialized_move(first, last, firstTrg); } static T* uninitializedMoveIfNoexcept(T* first, T* last, T* firstTrg, std::false_type) { return std::uninitialized_copy(first, last, firstTrg); } //throw ? - void checkInvariants() const - { - assert(bufStart_ == 0 || bufStart_ < capacity_); - assert(size_ <= capacity_); - } - - size_t getBufPos(size_t offset) const //< capacity_ + size_t getBufPos(size_t offset) const { + //assert(offset < capacity_); -> redundant in this context size_t bufPos = bufStart_ + offset; if (bufPos >= capacity_) bufPos -= capacity_; return bufPos; } + void checkInvariants() const + { + assert(bufStart_ == 0 || bufStart_ < capacity_); + assert(size_ <= capacity_); + } + + struct FreeStoreDelete { void operator()(std::byte* p) const { ::operator delete (p); } }; + std::unique_ptr<std::byte, FreeStoreDelete> rawMem_; size_t capacity_ = 0; //as number of T size_t bufStart_ = 0; //< capacity_ diff --git a/zen/scope_guard.h b/zen/scope_guard.h index 1e4165be..4cc049a8 100644 --- a/zen/scope_guard.h +++ b/zen/scope_guard.h @@ -8,7 +8,6 @@ #define SCOPE_GUARD_H_8971632487321434 #include <cassert> -//#include <exception> #include "type_traits.h" #include "legacy_compiler.h" //std::uncaught_exceptions diff --git a/zen/serialize.h b/zen/serialize.h index 26202d96..53a6fc62 100644 --- a/zen/serialize.h +++ b/zen/serialize.h @@ -7,6 +7,7 @@ #ifndef SERIALIZE_H_839405783574356 #define SERIALIZE_H_839405783574356 +//#include <bit> #include <functional> #include "sys_error.h" //keep header clean from specific stream implementations! (e.g.file_io.h)! used by abstract.h! @@ -22,38 +23,44 @@ namespace zen binary container for data storage: must support "basic" std::vector interface (e.g. std::vector<std::byte>, std::string, Zbase<char>) --------------------------------- - | Buffered Input Stream Concept | + | Unbuffered Input Stream Concept | --------------------------------- - struct BufferedInputStream - { - size_t read(void* buffer, size_t bytesToRead); //throw X; return "bytesToRead" bytes unless end of stream! + size_t getBlockSize(); //throw X + size_t tryRead(void* buffer, size_t bytesToRead); //throw X; may return short; only 0 means EOF! CONTRACT: bytesToRead > 0! + + ---------------------------------- + | Unbuffered Output Stream Concept | + ---------------------------------- + size_t getBlockSize(); //throw X + size_t tryWrite(const void* buffer, size_t bytesToWrite); //throw X; may return short! CONTRACT: bytesToWrite > 0 + + =============================================================================================== - Optional: support stream-copying - -------------------------------- - size_t getBlockSize() const; - const IoCallback& notifyUnbufferedIO - }; + --------------------------------- + | Buffered Input Stream Concept | + --------------------------------- + size_t read(void* buffer, size_t bytesToRead); //throw X; return "bytesToRead" bytes unless end of stream! ---------------------------------- | Buffered Output Stream Concept | ---------------------------------- - struct BufferedOutputStream - { - void write(const void* buffer, size_t bytesToWrite); //throw X + void write(const void* buffer, size_t bytesToWrite); //throw X */ - Optional: support stream-copying - -------------------------------- - const IoCallback& notifyUnbufferedIO - }; */ using IoCallback = std::function<void(int64_t bytesDelta)>; //throw X -//functions based on buffered stream abstraction -template <class BufferedInputStream, class BufferedOutputStream> -void bufferedStreamCopy(BufferedInputStream& streamIn, BufferedOutputStream& streamOut); //throw X +template <class BinContainer, class Function> +BinContainer unbufferedLoad(Function tryRead/*(void* buffer, size_t bytesToRead) throw X; may return short; only 0 means EOF*/, + size_t blockSize); //throw X + +template <class BinContainer, class Function> +void unbufferedSave(const BinContainer& cont, Function tryWrite /*(const void* buffer, size_t bytesToWrite) throw X; may return short*/, + size_t blockSize); //throw X + +template <class Function1, class Function2> +void unbufferedStreamCopy(Function1 tryRead /*(void* buffer, size_t bytesToRead) throw X; may return short; only 0 means EOF*/, size_t blockSizeIn, + Function2 tryWrite /*(const void* buffer, size_t bytesToWrite) throw X; may return short*/, size_t blockSizeOut); //throw X -template <class BinContainer, class BufferedInputStream> BinContainer -bufferedLoad(BufferedInputStream& streamIn); //throw X template <class N, class BufferedOutputStream> void writeNumber (BufferedOutputStream& stream, const N& num); // template <class C, class BufferedOutputStream> void writeContainer(BufferedOutputStream& stream, const C& str); //noexcept @@ -71,124 +78,277 @@ template < class BufferedInputStream> void readArray (BufferedInputSt struct IOCallbackDivider { - IOCallbackDivider(const IoCallback& notifyUnbufferedIO, int64_t& totalUnbufferedIO) : totalUnbufferedIO_(totalUnbufferedIO), notifyUnbufferedIO_(notifyUnbufferedIO) {} + IOCallbackDivider(const IoCallback& notifyUnbufferedIO, int64_t& totalBytesNotified) : + totalBytesNotified_(totalBytesNotified), + notifyUnbufferedIO_(notifyUnbufferedIO) { assert(totalBytesNotified == 0); } - void operator()(int64_t bytesDelta) + void operator()(int64_t bytesDelta) //throw X! { - if (notifyUnbufferedIO_) notifyUnbufferedIO_((totalUnbufferedIO_ - totalUnbufferedIO_ / 2 * 2 + bytesDelta) / 2); //throw X! - totalUnbufferedIO_ += bytesDelta; + if (notifyUnbufferedIO_) notifyUnbufferedIO_((totalBytesNotified_ + bytesDelta) / 2 - totalBytesNotified_ / 2); //throw X! + totalBytesNotified_ += bytesDelta; } private: - int64_t& totalUnbufferedIO_; + int64_t& totalBytesNotified_; const IoCallback& notifyUnbufferedIO_; }; +//------------------------------------------------------------------------------------- //buffered input/output stream reference implementations: -template <class BinContainer> struct MemoryStreamIn { - explicit MemoryStreamIn(const BinContainer& cont) : buffer_(cont) {} //this better be cheap! + explicit MemoryStreamIn(const std::string_view& stream) : memRef_(stream) {} + + MemoryStreamIn(std::string&&) = delete; //careful: do NOT store reference to a temporary! size_t read(void* buffer, size_t bytesToRead) //return "bytesToRead" bytes unless end of stream! { - using Byte = typename BinContainer::value_type; - static_assert(sizeof(Byte) == 1); - const size_t bytesRead = std::min(bytesToRead, buffer_.size() - pos_); - auto itFirst = buffer_.begin() + pos_; - std::copy(itFirst, itFirst + bytesRead, static_cast<Byte*>(buffer)); - pos_ += bytesRead; - return bytesRead; + const size_t junkSize = std::min(bytesToRead, memRef_.size() - pos_); + std::memcpy(buffer, memRef_.data() + pos_, junkSize); + pos_ += junkSize; + return junkSize; } size_t pos() const { return pos_; } private: - MemoryStreamIn (const MemoryStreamIn&) = delete; + //MemoryStreamIn (const MemoryStreamIn&) = delete; -> why not allow copying? MemoryStreamIn& operator=(const MemoryStreamIn&) = delete; - const BinContainer buffer_; + const std::string_view memRef_; size_t pos_ = 0; }; -template <class BinContainer> struct MemoryStreamOut { MemoryStreamOut() = default; void write(const void* buffer, size_t bytesToWrite) { - using Byte = typename BinContainer::value_type; - static_assert(sizeof(Byte) == 1); - buffer_.resize(buffer_.size() + bytesToWrite); - const auto it = static_cast<const Byte*>(buffer); - std::copy(it, it + bytesToWrite, buffer_.end() - bytesToWrite); + memBuf_.append(static_cast<const char*>(buffer), bytesToWrite); } - const BinContainer& ref() const { return buffer_; } - /**/ BinContainer& ref() { return buffer_; } + const std::string& ref() const { return memBuf_; } + /**/ std::string& ref() { return memBuf_; } private: MemoryStreamOut (const MemoryStreamOut&) = delete; MemoryStreamOut& operator=(const MemoryStreamOut&) = delete; - BinContainer buffer_; + std::string memBuf_; }; +//------------------------------------------------------------------------------------- +template <class Function> +struct BufferedInputStream +{ + BufferedInputStream(Function tryRead /*(void* buffer, size_t bytesToRead) throw X; may return short; only 0 means EOF*/, + size_t blockSize) : + tryRead_(tryRead), blockSize_(blockSize) {} + size_t read(void* buffer, size_t bytesToRead) //throw X; return "bytesToRead" bytes unless end of stream! + { + assert(memBuf_.size() >= blockSize_); + assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); + const auto bufStart = buffer; + for (;;) + { + const size_t junkSize = std::min(bytesToRead, bufPosEnd_ - bufPos_); + std::memcpy(buffer, memBuf_.data() + bufPos_ /*caveat: vector debug checks*/, junkSize); + bufPos_ += junkSize; + buffer = static_cast<std::byte*>(buffer) + junkSize; + bytesToRead -= junkSize; + + if (bytesToRead == 0) + break; + //-------------------------------------------------------------------- + const size_t bytesRead = tryRead_(memBuf_.data(), blockSize_); //throw X; may return short, only 0 means EOF! => CONTRACT: bytesToRead > 0 + bufPos_ = 0; + bufPosEnd_ = bytesRead; + + if (bytesRead == 0) //end of file + break; + } + return static_cast<std::byte*>(buffer) - + static_cast<std::byte*>(bufStart); + } + +private: + BufferedInputStream (const BufferedInputStream&) = delete; + BufferedInputStream& operator=(const BufferedInputStream&) = delete; + Function tryRead_; + const size_t blockSize_; -//-----------------------implementation------------------------------- -template <class BufferedInputStream, class BufferedOutputStream> inline -void bufferedStreamCopy(BufferedInputStream& streamIn, //throw X - BufferedOutputStream& streamOut) // + size_t bufPos_ = 0; + size_t bufPosEnd_= 0; + std::vector<std::byte> memBuf_{blockSize_}; +}; + + +template <class Function> +struct BufferedOutputStream { - const size_t blockSize = streamIn.getBlockSize(); + BufferedOutputStream(Function tryWrite /*(const void* buffer, size_t bytesToWrite) throw X; may return short*/, + size_t blockSize) : + tryWrite_(tryWrite), blockSize_(blockSize) {} + + ~BufferedOutputStream() + { + } + + void write(const void* buffer, size_t bytesToWrite) //throw X + { + assert(memBuf_.size() >= blockSize_); + assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); + + for (;;) + { + const size_t junkSize = std::min(bytesToWrite, blockSize_ - (bufPosEnd_ - bufPos_)); + std::memcpy(memBuf_.data() + bufPosEnd_, buffer, junkSize); + bufPosEnd_ += junkSize; + buffer = static_cast<const std::byte*>(buffer) + junkSize; + bytesToWrite -= junkSize; + + if (bytesToWrite == 0) + return; + //-------------------------------------------------------------------- + bufPos_ += tryWrite_(memBuf_.data() + bufPos_, blockSize_); //throw X; may return short + + if (memBuf_.size() - bufPos_ < blockSize_ || //support memBuf_.size() > blockSize to avoid memmove()s + bufPos_ == bufPosEnd_) + { + std::memmove(memBuf_.data(), memBuf_.data() + bufPos_, bufPosEnd_ - bufPos_); + bufPosEnd_ -= bufPos_; + bufPos_ = 0; + } + } + } + + void flushBuffer() //throw X + { + assert(bufPosEnd_ - bufPos_ <= blockSize_); + assert(bufPos_ <= bufPosEnd_ && bufPosEnd_ <= memBuf_.size()); + while (bufPos_ != bufPosEnd_) + bufPos_ += tryWrite_(memBuf_.data() + bufPos_, bufPosEnd_ - bufPos_); //throw X + } + +private: + BufferedOutputStream (const BufferedOutputStream&) = delete; + BufferedOutputStream& operator=(const BufferedOutputStream&) = delete; + + Function tryWrite_; + const size_t blockSize_; + + size_t bufPos_ = 0; + size_t bufPosEnd_ = 0; + std::vector<std::byte> memBuf_{2 * /*=> mitigate memmove()*/ blockSize_}; //throw FileError +}; + +//------------------------------------------------------------------------------------- + +template <class BinContainer, class Function> inline +BinContainer unbufferedLoad(Function tryRead /*(void* buffer, size_t bytesToRead) throw X; may return short; only 0 means EOF*/, + size_t blockSize) //throw X +{ + static_assert(sizeof(typename BinContainer::value_type) == 1); //expect: bytes if (blockSize == 0) throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); - std::vector<std::byte> buffer(blockSize); + BinContainer buf; for (;;) { - const size_t bytesRead = streamIn.read(&buffer[0], blockSize); //throw X; return "bytesToRead" bytes unless end of stream! - streamOut.write(&buffer[0], bytesRead); //throw X + warn_static("don't need zero-initialization!") + buf.resize(buf.size() + blockSize); + const size_t bytesRead = tryRead(buf.data() + buf.size() - blockSize, blockSize); //throw X; may return short; only 0 means EOF + buf.resize(buf.size() - blockSize + bytesRead); //caveat: unsigned arithmetics + + if (bytesRead == 0) //end of file + { + //caveat: memory consumption of returned string! + if (buf.capacity() > buf.size() * 3 / 2) //reference: in worst case, std::vector with growth factor 1.5 "wastes" 50% of its size as unused capacity + buf.shrink_to_fit(); //=> shrink if buffer is wasting more than that! - if (bytesRead < blockSize) //end of file - break; + return buf; + } } } -template <class BinContainer, class BufferedInputStream> inline -BinContainer bufferedLoad(BufferedInputStream& streamIn) //throw X +template <class BinContainer, class Function> inline +void unbufferedSave(const BinContainer& cont, + Function tryWrite /*(const void* buffer, size_t bytesToWrite) throw X; may return short*/, + size_t blockSize) //throw X { static_assert(sizeof(typename BinContainer::value_type) == 1); //expect: bytes - - const size_t blockSize = streamIn.getBlockSize(); if (blockSize == 0) throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); - BinContainer buffer; + const size_t bufPosEnd = cont.size(); + size_t bufPos = 0; + + while (bufPos < bufPosEnd) + bufPos += tryWrite(cont.data() + bufPos, std::min(bufPosEnd - bufPos, blockSize)); //throw X +} + + +template <class Function1, class Function2> inline +void unbufferedStreamCopy(Function1 tryRead /*(void* buffer, size_t bytesToRead) throw X; may return short; only 0 means EOF*/, + size_t blockSizeIn, + Function2 tryWrite /*(const void* buffer, size_t bytesToWrite) throw X; may return short*/, + size_t blockSizeOut) //throw X +{ + /* caveat: buffer block sizes might not be power of 2: + - f_iosize for network share on macOS + - libssh2 uses weird packet sizes like MAX_SFTP_OUTGOING_SIZE (30000), and will send incomplete packages if block size is not an exact multiple :( + => that's a problem because we want input/output sizes to be multiples of each other to help avoid the std::memmove() below */ +#if 0 + blockSizeIn = std::bit_ceil(blockSizeIn); + blockSizeOut = std::bit_ceil(blockSizeOut); +#endif + if (blockSizeIn <= 1 || blockSizeOut <= 1) + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); + + const size_t bufCapacity = blockSizeOut - 1 + blockSizeIn; + const size_t alignment = ::sysconf(_SC_PAGESIZE); //-1 on error => posix_memalign() will fail + assert(alignment >= sizeof(void*) && std::has_single_bit(alignment)); //required by posix_memalign() + std::byte* buf = nullptr; + errno = ::posix_memalign(reinterpret_cast<void**>(&buf), alignment, bufCapacity); + ZEN_ON_SCOPE_EXIT(::free(buf)); + + size_t bufPosEnd = 0; for (;;) { - buffer.resize(buffer.size() + blockSize); - const size_t bytesRead = streamIn.read(&*(buffer.end() - blockSize), blockSize); //throw X; return "blockSize" bytes unless end of stream! - if (bytesRead < blockSize) //end of file + const size_t bytesRead = tryRead(buf + bufPosEnd, blockSizeIn); //throw X; may return short; only 0 means EOF + + if (bytesRead == 0) //end of file { - buffer.resize(buffer.size() - (blockSize - bytesRead)); //caveat: unsigned arithmetics + size_t bufPos = 0; + while (bufPos < bufPosEnd) + bufPos += tryWrite(buf + bufPos, bufPosEnd - bufPos); //throw X; may return short + return; + } + else + { + bufPosEnd += bytesRead; - //caveat: memory consumption of returned string! - if (buffer.capacity() > buffer.size() * 3 / 2) //reference: in worst case, std::vector with growth factor 1.5 "wastes" 50% of its size as unused capacity - buffer.shrink_to_fit(); //=> shrink if buffer is wasting more than that! + size_t bufPos = 0; + while (bufPosEnd - bufPos >= blockSizeOut) + bufPos += tryWrite(buf + bufPos, blockSizeOut); //throw X; may return short - return buffer; + if (bufPos > 0) + { + bufPosEnd -= bufPos; + std::memmove(buf, buf + bufPos, bufPosEnd); + } } } } +//------------------------------------------------------------------------------------- template <class BufferedOutputStream> inline void writeArray(BufferedOutputStream& stream, const void* buffer, size_t len) @@ -232,7 +392,7 @@ template <class N, class BufferedInputStream> inline N readNumber(BufferedInputStream& stream) //throw SysErrorUnexpectedEos { static_assert(isArithmetic<N> || std::is_same_v<N, bool> || std::is_enum_v<N>); - N num{}; + N num; //uninitialized readArray(stream, &num, sizeof(N)); //throw SysErrorUnexpectedEos return num; } diff --git a/zen/socket.h b/zen/socket.h index d9517bd8..8e92b616 100644 --- a/zen/socket.h +++ b/zen/socket.h @@ -24,6 +24,7 @@ namespace zen using SocketType = int; const SocketType invalidSocket = -1; inline void closeSocket(SocketType s) { ::close(s); } +warn_static("log on error!") //Winsock needs to be initialized before calling any of these functions! (WSAStartup/WSACleanup) @@ -58,7 +59,7 @@ public: THROW_LAST_SYS_ERROR_WSA("socket"); ZEN_ON_SCOPE_FAIL(closeSocket(testSocket)); - + warn_static("support timeout! https://stackoverflow.com/questions/2597608/c-socket-connection-timeout") if (::connect(testSocket, ai.ai_addr, static_cast<int>(ai.ai_addrlen)) != 0) THROW_LAST_SYS_ERROR_WSA("connect"); diff --git a/zen/stream_buffer.h b/zen/stream_buffer.h index 8b8cd0d7..ee9e18fd 100644 --- a/zen/stream_buffer.h +++ b/zen/stream_buffer.h @@ -10,81 +10,78 @@ #include <condition_variable> #include "ring_buffer.h" #include "string_tools.h" +#include "thread.h" namespace zen { -/* implement streaming API on top of libcurl's icky callback-based design +/* implement streaming API on top of libcurl's icky callback-based design + + curl uses READBUFFER_SIZE download buffer size, but returns via a retarded sendf.c::chop_write() writing in small junks of CURL_MAX_WRITE_SIZE (16 kB) => support copying arbitrarily-large files: https://freefilesync.org/forum/viewtopic.php?t=4471 => maximum performance through async processing (prefetching + output buffer!) => cost per worker thread creation ~ 1/20 ms */ class AsyncStreamBuffer { public: - explicit AsyncStreamBuffer(size_t bufferSize) : bufSize_(bufferSize) { ringBuf_.reserve(bufferSize); } + explicit AsyncStreamBuffer(size_t capacity) { ringBuf_.reserve(capacity); } //context of input thread, blocking - //return "bytesToRead" bytes unless end of stream! - size_t read(void* buffer, size_t bytesToRead) //throw <write error> + size_t read(void* buffer, size_t bytesToRead) //throw <write error>; return "bytesToRead" bytes unless end of stream! { - if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! - throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); + std::unique_lock dummy(lockStream_); + const auto bufStart = buffer; - auto it = static_cast<std::byte*>(buffer); - const auto itEnd = it + bytesToRead; - - for (std::unique_lock dummy(lockStream_); it != itEnd;) + while (bytesToRead > 0) { - assert(!errorRead_); - conditionBytesWritten_.wait(dummy, [this] { return errorWrite_ || !ringBuf_.empty() || eof_; }); - - if (errorWrite_) - std::rethrow_exception(errorWrite_); //throw <write error> - - const size_t junkSize = std::min(static_cast<size_t>(itEnd - it), ringBuf_.size()); - ringBuf_.extract_front(it, it + junkSize); - it += junkSize; - - conditionBytesRead_.notify_all(); - - if (eof_) //end of file + const size_t bytesRead = tryReadImpl(dummy, buffer, bytesToRead); //throw <write error> + if (bytesRead == 0) //end of file break; + conditionBytesRead_.notify_all(); + buffer = static_cast<std::byte*>(buffer) + bytesRead; + bytesToRead -= bytesRead; } + return static_cast<std::byte*>(buffer) - + static_cast<std::byte*>(bufStart); + } - const size_t bytesRead = it - static_cast<std::byte*>(buffer); - totalBytesRead_ += bytesRead; + //context of input thread, blocking + size_t tryRead(void* buffer, size_t bytesToRead) //throw <write error>; may return short; only 0 means EOF! CONTRACT: bytesToRead > 0! + { + size_t bytesRead = 0; + { + std::unique_lock dummy(lockStream_); + bytesRead = tryReadImpl(dummy, buffer, bytesToRead); + } + if (bytesRead > 0) + conditionBytesRead_.notify_all(); //...*outside* the lock return bytesRead; } //context of output thread, blocking void write(const void* buffer, size_t bytesToWrite) //throw <read error> { - totalBytesWritten_ += bytesToWrite; //bytes already processed as far as raw FTP access is concerned - - auto it = static_cast<const std::byte*>(buffer); - const auto itEnd = it + bytesToWrite; - - for (std::unique_lock dummy(lockStream_); it != itEnd;) + std::unique_lock dummy(lockStream_); + while (bytesToWrite > 0) { - assert(!eof_ && !errorWrite_); - /* => can't use InterruptibleThread's interruptibleWait() :( - -> AsyncStreamBuffer is used for input and output streaming - => both AsyncStreamBuffer::write()/read() would have to implement interruptibleWait() - => one of these usually called from main thread - => but interruptibleWait() cannot be called from main thread! */ - conditionBytesRead_.wait(dummy, [this] { return errorRead_ || ringBuf_.size() < bufSize_; }); - - if (errorRead_) - std::rethrow_exception(errorRead_); //throw <read error> - - const size_t junkSize = std::min(static_cast<size_t>(itEnd - it), bufSize_ - ringBuf_.size()); - ringBuf_.insert_back(it, it + junkSize); - it += junkSize; - + const size_t bytesWritten = tryWriteWhileImpl(dummy, buffer, bytesToWrite); //throw <read error> conditionBytesWritten_.notify_all(); + buffer = static_cast<const std::byte*>(buffer) + bytesWritten; + bytesToWrite -= bytesWritten; } } + //context of output thread, blocking + size_t tryWrite(const void* buffer, size_t bytesToWrite) //throw <read error>; may return short! CONTRACT: bytesToWrite > 0 + { + size_t bytesWritten = 0; + { + std::unique_lock dummy(lockStream_); + bytesWritten = tryWriteWhileImpl(dummy, buffer, bytesToWrite); + } + conditionBytesWritten_.notify_all(); //...*outside* the lock + return bytesWritten; + } + //context of output thread void closeStream() { @@ -101,7 +98,7 @@ public: { { std::lock_guard dummy(lockStream_); - assert(!errorRead_); + assert(error && !errorRead_); if (!errorRead_) errorRead_ = error; } @@ -113,13 +110,16 @@ public: { { std::lock_guard dummy(lockStream_); - assert(!errorWrite_); + assert(error && !errorWrite_); if (!errorWrite_) errorWrite_ = error; } conditionBytesWritten_.notify_all(); } +#if 0 + //function not needed: when writing is completed successfully, no further error can occur! + // => caveat: writing is NOT done (yet) when closeStream() is called! //context of *output* thread void checkReadErrors() //throw <read error> { @@ -128,7 +128,8 @@ public: std::rethrow_exception(errorRead_); //throw <read error> } -#if 0 //function not needed: when EOF is reached (without errors), reading is done => no further error can occur! + //function not needed: when EOF is reached (without errors), reading is done => no further error can occur! + //context of *input* thread void checkWriteErrors() //throw <write error> { std::lock_guard dummy(lockStream_); @@ -144,7 +145,53 @@ private: AsyncStreamBuffer (const AsyncStreamBuffer&) = delete; AsyncStreamBuffer& operator=(const AsyncStreamBuffer&) = delete; - const size_t bufSize_; + //context of input thread, blocking + size_t tryReadImpl(std::unique_lock<std::mutex>& ul, void* buffer, size_t bytesToRead) //throw <write error>; may return short; only 0 means EOF! CONTRACT: bytesToRead > 0! + { + if (bytesToRead == 0) //"read() with a count of 0 returns zero" => indistinguishable from end of file! => check! + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); + + assert(isLocked(lockStream_)); + assert(!errorRead_); + + conditionBytesWritten_.wait(ul, [this] { return errorWrite_ || !ringBuf_.empty() || eof_; }); + + if (errorWrite_) + std::rethrow_exception(errorWrite_); //throw <write error> + + const size_t junkSize = std::min(bytesToRead, ringBuf_.size()); + ringBuf_.extract_front(static_cast<std::byte*>(buffer), + static_cast<std::byte*>(buffer)+ junkSize); + totalBytesRead_ += junkSize; + return junkSize; + } + + //context of output thread, blocking + size_t tryWriteWhileImpl(std::unique_lock<std::mutex>& ul, const void* buffer, size_t bytesToWrite) //throw <read error>; may return short! CONTRACT: bytesToWrite > 0 + { + if (bytesToWrite == 0) + throw std::logic_error("Contract violation! " + std::string(__FILE__) + ':' + numberTo<std::string>(__LINE__)); + + assert(isLocked(lockStream_)); + assert(!eof_ && !errorWrite_); + /* => can't use InterruptibleThread's interruptibleWait() :( + -> AsyncStreamBuffer is used for input and output streaming + => both AsyncStreamBuffer::write()/read() would have to implement interruptibleWait() + => one of these usually called from main thread + => but interruptibleWait() cannot be called from main thread! */ + conditionBytesRead_.wait(ul, [this] { return errorRead_ || ringBuf_.size() < ringBuf_.capacity(); }); + + if (errorRead_) + std::rethrow_exception(errorRead_); //throw <read error> + + const size_t junkSize = std::min(bytesToWrite, ringBuf_.capacity() - ringBuf_.size()); + + ringBuf_.insert_back(static_cast<const std::byte*>(buffer), + static_cast<const std::byte*>(buffer) + junkSize); + totalBytesWritten_ += junkSize; + return junkSize; + } + std::mutex lockStream_; RingBuffer<std::byte> ringBuf_; //prefetch/output buffer bool eof_ = false; diff --git a/zen/string_base.h b/zen/string_base.h index e18a0f16..a87827e6 100644 --- a/zen/string_base.h +++ b/zen/string_base.h @@ -61,11 +61,11 @@ protected: ~StorageDeepCopy() {} Char* create(size_t size) { return create(size, size); } - Char* create(size_t size, size_t minCapacity) + Char* create(size_t size, size_t capacity) { - assert(size <= minCapacity); - const size_t newCapacity = AP::calcCapacity(minCapacity); - assert(newCapacity >= minCapacity); + assert(size <= capacity); + const size_t newCapacity = AP::calcCapacity(capacity); + assert(newCapacity >= capacity); Descriptor* const newDescr = static_cast<Descriptor*>(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); @@ -124,18 +124,18 @@ protected: ~StorageRefCountThreadSafe() {} Char* create(size_t size) { return create(size, size); } - Char* create(size_t size, size_t minCapacity) + Char* create(size_t size, size_t capacity) { - assert(size <= minCapacity); + assert(size <= capacity); - if (minCapacity == 0) //perf: avoid memory allocation for empty string + if (capacity == 0) //perf: avoid memory allocation for empty string { ++globalEmptyString.descr.refCount; return &globalEmptyString.nullTerm; } - const size_t newCapacity = AP::calcCapacity(minCapacity); - assert(newCapacity >= minCapacity); + const size_t newCapacity = AP::calcCapacity(capacity); + assert(newCapacity >= capacity); Descriptor* const newDescr = static_cast<Descriptor*>(this->allocate(sizeof(Descriptor) + (newCapacity + 1) * sizeof(Char))); //throw std::bad_alloc new (newDescr) Descriptor(size, newCapacity); @@ -259,6 +259,7 @@ public: size_t length() const; size_t size () const { return length(); } const Char* c_str() const { return rawStr_; } //C-string format with 0-termination + /**/ Char* data() { return &*begin(); } const Char& operator[](size_t pos) const; /**/ Char& operator[](size_t pos); bool empty() const { return length() == 0; } @@ -558,7 +559,7 @@ void Zbase<Char, SP>::reserve(size_t minCapacity) //make unshared and check capa //allocate a new string const size_t len = length(); Char* newStr = this->create(len, std::max(len, minCapacity)); //reserve() must NEVER shrink the string: logical const! - std::copy(rawStr_, rawStr_ + len + 1, newStr); //include 0-termination + std::copy(rawStr_, rawStr_ + len + 1 /*0-termination*/, newStr); this->destroy(rawStr_); rawStr_ = newStr; diff --git a/zen/string_tools.h b/zen/string_tools.h index 181a3951..364a9a26 100644 --- a/zen/string_tools.h +++ b/zen/string_tools.h @@ -263,7 +263,7 @@ bool equalString(const S& lhs, const T& rhs) template <class S, class T> inline bool equalAsciiNoCase(const S& lhs, const T& rhs) { - //assert(isAsciiString(lhs) || isAsciiString(rhs)); + //assert(isAsciiString(lhs) || isAsciiString(rhs)); -> no, too strict (e.g. comparing file extensions ASCII-CI) const size_t lhsLen = strLength(lhs); return lhsLen == strLength(rhs) && impl::strcmpAsciiNoCase(strBegin(lhs), strBegin(rhs), lhsLen) == std::weak_ordering::equivalent; } @@ -627,10 +627,14 @@ S printNumber(const T& format, const Num& number) //format a single number using #endif static_assert(std::is_same_v<GetCharTypeT<S>, GetCharTypeT<T>>); - GetCharTypeT<S> buf[128]; //zero-initialize? - const int charsWritten = impl::saferPrintf(buf, std::size(buf), strBegin(format), number); + S buf(128, static_cast<GetCharTypeT<S>>('0')); + const int charsWritten = impl::saferPrintf(buf.data(), buf.size(), strBegin(format), number); - return 0 < charsWritten && charsWritten < std::ssize(buf) ? S(buf, charsWritten) : S(); + if (makeUnsigned(charsWritten) > buf.size()) + return S(); + + buf.resize(charsWritten); + return buf; } diff --git a/zen/string_traits.h b/zen/string_traits.h index 31c8c12c..240dbeac 100644 --- a/zen/string_traits.h +++ b/zen/string_traits.h @@ -36,8 +36,8 @@ namespace zen //reference a sub-string for consumption by zen string_tools //=> std::string_view seems decent, but of course fucks up in one regard: construction -template <class Iterator> auto makeStringView(Iterator first, Iterator last); //e.g. this constructor is not available (at least on clang) -template <class Iterator> auto makeStringView(Iterator first, size_t len); +template <class Iterator> auto makeStringView(Iterator first, Iterator last); //this constructor is not available (at least on clang) +template <class Iterator> auto makeStringView(Iterator first, size_t len); //std::string_view(char*, int) fails to compile! expected size_t as second parameter diff --git a/zen/symlink_target.h b/zen/symlink_target.h index ada4e358..44c15ab2 100644 --- a/zen/symlink_target.h +++ b/zen/symlink_target.h @@ -44,13 +44,13 @@ zen::SymlinkRawContent getSymlinkRawContent_impl(const Zstring& linkPath) //thro const size_t bufSize = 10000; std::vector<char> buf(bufSize); - const ssize_t bytesWritten = ::readlink(linkPath.c_str(), &buf[0], bufSize); + const ssize_t bytesWritten = ::readlink(linkPath.c_str(), buf.data(), bufSize); if (bytesWritten < 0) THROW_LAST_FILE_ERROR(replaceCpy(_("Cannot resolve symbolic link %x."), L"%x", fmtPath(linkPath)), "readlink"); if (bytesWritten >= static_cast<ssize_t>(bufSize)) //detect truncation; not an error for readlink! throw FileError(replaceCpy(_("Cannot resolve symbolic link %x."), L"%x", fmtPath(linkPath)), formatSystemError("readlink", L"", L"Buffer truncated.")); - return {Zstring(&buf[0], bytesWritten)}; //readlink does not append 0-termination! + return {.targetPath = Zstring(buf.data(), bytesWritten)}; //readlink does not append 0-termination! } diff --git a/zen/sys_error.h b/zen/sys_error.h index 6d03f299..73a92343 100644 --- a/zen/sys_error.h +++ b/zen/sys_error.h @@ -42,6 +42,7 @@ private: +//better leave it as a macro (see comment in file_error.h) #define THROW_LAST_SYS_ERROR(functionName) \ do { const ErrorCode ecInternal = getLastError(); throw zen::SysError(formatSystemError(functionName, ecInternal)); } while (false) diff --git a/zen/sys_info.cpp b/zen/sys_info.cpp index c57464bc..87e07f91 100644 --- a/zen/sys_info.cpp +++ b/zen/sys_info.cpp @@ -43,7 +43,7 @@ Zstring zen::getLoginUser() //throw FileError passwd* pwEntry = nullptr; if (const int rv = ::getpwuid_r(userIdNo, //uid_t uid &buf2, //struct passwd* pwd - &buf[0], //char* buf + buf.data(), //char* buf buf.size(), //size_t buflen &pwEntry); //struct passwd** result rv != 0 || !pwEntry) @@ -82,10 +82,10 @@ Zstring zen::getUserDescription() //throw FileError const Zstring computerName = []() -> Zstring //throw FileError { std::vector<char> buf(10000); - if (::gethostname(&buf[0], buf.size()) != 0) + if (::gethostname(buf.data(), buf.size()) != 0) THROW_LAST_FILE_ERROR(_("Cannot get process information."), "gethostname"); - Zstring hostName = &buf[0]; + Zstring hostName = buf.data(); if (endsWithAsciiNoCase(hostName, ".local")) //strip fluff (macOS) => apparently not added on Linux? hostName = beforeLast(hostName, '.', IfNotFoundReturn::none); @@ -204,7 +204,7 @@ Zstring zen::getUserHome() //throw FileError passwd* pwEntry = nullptr; if (const int rv = ::getpwnam_r(loginUser.c_str(), //const char *name &buf2, //struct passwd* pwd - &buf[0], //char* buf + buf.data(), //char* buf buf.size(), //size_t buflen &pwEntry); //struct passwd** result rv != 0 || !pwEntry) diff --git a/zen/sys_version.cpp b/zen/sys_version.cpp index 7355ef1a..d187e0f0 100644 --- a/zen/sys_version.cpp +++ b/zen/sys_version.cpp @@ -88,7 +88,7 @@ OsVersion zen::getOsVersion() } catch (const SysError& e) { - std::cerr << utfTo<std::string>(e.toString()) << '\n'; + std::cerr << utfTo<std::string>(e.toString()) + '\n'; return OsVersionDetail{}; //sigh, it's a jungle out there: https://freefilesync.org/forum/viewtopic.php?t=7276 } }(); diff --git a/zen/thread.h b/zen/thread.h index abdc6da0..931f2c0d 100644 --- a/zen/thread.h +++ b/zen/thread.h @@ -72,6 +72,7 @@ void interruptibleSleep(const std::chrono::duration<Rep, Period>& relTime); //th void setCurrentThreadName(const Zstring& threadName); bool runningOnMainThread(); + //------------------------------------------------------------------------------------------ /* std::async replacement without crappy semantics: @@ -272,14 +272,14 @@ Zstring formatTime(const Zchar* format, const TimeComp& tc) std::mktime(&ctc); //unfortunately std::strftime() needs all elements of "struct tm" filled, e.g. tm_wday, tm_yday //note: although std::mktime() explicitly expects "local time", calculating weekday and day of year *should* be time-zone and DST independent - Zstring buffer(256, Zstr('\0')); + Zstring buf(256, Zstr('\0')); //strftime() craziness on invalid input: // VS 2010: CRASH unless "_invalid_parameter_handler" is set: https://docs.microsoft.com/en-us/cpp/c-runtime-library/parameter-validation // GCC: returns 0, apparently no crash. Still, considering some clib maintainer's comments, we should expect the worst! // Windows: avoid char-based strftime() which uses ANSI encoding! (e.g. Greek letters for AM/PM) - const size_t charsWritten = std::strftime(&buffer[0], buffer.size(), format, &ctc); - buffer.resize(charsWritten); - return buffer; + const size_t charsWritten = std::strftime(buf.data(), buf.size(), format, &ctc); + buf.resize(charsWritten); + return buf; } diff --git a/zen/zlib_wrap.cpp b/zen/zlib_wrap.cpp index e87a284f..28b85c5c 100644 --- a/zen/zlib_wrap.cpp +++ b/zen/zlib_wrap.cpp @@ -8,8 +8,9 @@ //Windows: use the SAME zlib version that wxWidgets is linking against! //C:\Data\Projects\wxWidgets\Source\src\zlib\zlib.h //Linux/macOS: use zlib system header for both wxWidgets and libcurl (zlib is required for HTTP, SFTP) // => don't compile wxWidgets with: --with-zlib=builtin -#include <zlib.h> //https://www.zlib.net/manual.html -#include <zen/scope_guard.h> +#include <zlib.h> +#include "scope_guard.h" +#include "serialize.h" using namespace zen; @@ -20,9 +21,9 @@ std::wstring getZlibErrorLiteral(int sc) { switch (sc) { - ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); - ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); ZEN_CHECK_CASE_FOR_CONSTANT(Z_NEED_DICT); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_END); + ZEN_CHECK_CASE_FOR_CONSTANT(Z_OK); ZEN_CHECK_CASE_FOR_CONSTANT(Z_ERRNO); ZEN_CHECK_CASE_FOR_CONSTANT(Z_STREAM_ERROR); ZEN_CHECK_CASE_FOR_CONSTANT(Z_DATA_ERROR); @@ -34,16 +35,15 @@ std::wstring getZlibErrorLiteral(int sc) return replaceCpy<std::wstring>(L"zlib error %x", L"%x", numberTo<std::wstring>(sc)); } } -} -size_t zen::impl::zlib_compressBound(size_t len) +size_t zlib_compressBound(size_t len) { return ::compressBound(static_cast<uLong>(len)); //upper limit for buffer size, larger than input size!!! } -size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError +size_t zlib_compress(const void* src, size_t srcLen, void* trg, size_t trgLen, int level) //throw SysError { uLongf bufSize = static_cast<uLong>(trgLen); const int rv = ::compress2(static_cast<Bytef*>(trg), //Bytef* dest @@ -61,7 +61,7 @@ size_t zen::impl::zlib_compress(const void* src, size_t srcLen, void* trg, size_ } -size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError +size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen) //throw SysError { uLongf bufSize = static_cast<uLong>(trgLen); const int rv = ::uncompress(static_cast<Bytef*>(trg), //Bytef* dest @@ -77,13 +77,80 @@ size_t zen::impl::zlib_decompress(const void* src, size_t srcLen, void* trg, siz return bufSize; } +} + + +#undef compress //mitigate zlib macro shit... + +std::string zen::compress(const std::string_view& stream, int level) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //save uncompressed stream size for decompression + const uint64_t uncompressedSize = stream.size(); //use portable number type! + output.resize(sizeof(uncompressedSize)); + std::memcpy(output.data(), &uncompressedSize, sizeof(uncompressedSize)); + + const size_t bufferEstimate = zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!! + + output.resize(output.size() + bufferEstimate); + + const size_t bytesWritten = zlib_compress(stream.data(), + stream.size(), + output.data() + output.size() - bufferEstimate, + bufferEstimate, + level); //throw SysError + if (bytesWritten < bufferEstimate) + output.resize(output.size() - bufferEstimate + bytesWritten); //caveat: unsigned arithmetics + //caveat: physical memory consumption still *unchanged*! + } + return output; +} + + +std::string zen::decompress(const std::string_view& stream) //throw SysError +{ + std::string output; + if (!stream.empty()) //don't dereference iterator into empty container! + { + //retrieve size of uncompressed data + uint64_t uncompressedSize = 0; //use portable number type! + if (stream.size() < sizeof(uncompressedSize)) + throw SysError(L"zlib error: stream size < 8"); + + std::memcpy(&uncompressedSize, stream.data(), sizeof(uncompressedSize)); + + //attention: output MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!! + if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib! + throw SysError(L"zlib error: uncompressed size == 0"); + + try + { + output.resize(static_cast<size_t>(uncompressedSize)); //throw std::bad_alloc + } + //most likely this is due to data corruption: + catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } + catch (const std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } + + const size_t bytesWritten = zlib_decompress(stream.data() + sizeof(uncompressedSize), + stream.size() - sizeof(uncompressedSize), + output.data(), + static_cast<size_t>(uncompressedSize)); //throw SysError + if (bytesWritten != static_cast<size_t>(uncompressedSize)) + throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size.")); + } + return output; +} class InputStreamAsGzip::Impl { public: - Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : //throw SysError; returning 0 signals EOF: Posix read() semantics - readBlock_(readBlock) + Impl(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X; may return short, only 0 means EOF!*/, + size_t blockSize) : //throw SysError + tryReadBlock_(tryReadBlock), + blockSize_(blockSize) { const int windowBits = MAX_WBITS + 16; //"add 16 to windowBits to write a simple gzip header" @@ -105,6 +172,7 @@ public: { [[maybe_unused]] const int rv = ::deflateEnd(&gzipStream_); assert(rv == Z_OK); + warn_static("log on error") } size_t read(void* buffer, size_t bytesToRead) //throw SysError, X; return "bytesToRead" bytes unless end of stream! @@ -117,20 +185,18 @@ public: for (;;) { + //refill input buffer once avail_in == 0: https://www.zlib.net/manual.html if (gzipStream_.avail_in == 0 && !eof_) { - if (bufIn_.size() < bytesToRead) - bufIn_.resize(bytesToRead); - - const size_t bytesRead = readBlock_(&bufIn_[0], bufIn_.size()); //throw X; returning 0 signals EOF: Posix read() semantics - gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(&bufIn_[0]); + const size_t bytesRead = tryReadBlock_(bufIn_.data(), blockSize_); //throw X; may return short, only 0 means EOF! + gzipStream_.next_in = reinterpret_cast<z_const Bytef*>(bufIn_.data()); gzipStream_.avail_in = static_cast<uInt>(bytesRead); if (bytesRead == 0) eof_ = true; } const int rv = ::deflate(&gzipStream_, eof_ ? Z_FINISH : Z_NO_FLUSH); - if (rv == Z_STREAM_END) + if (eof_ && rv == Z_STREAM_END) return bytesToRead - gzipStream_.avail_out; if (rv != Z_OK) throw SysError(formatSystemError("zlib deflate", getZlibErrorLiteral(rv), L"")); @@ -140,34 +206,41 @@ public: } } + size_t getBlockSize() const { return blockSize_; } //returning input blockSize_ makes sense for low compression ratio + private: - const std::function<size_t(void* buffer, size_t bytesToRead)> readBlock_; //throw X + const std::function<size_t(void* buffer, size_t bytesToRead)> tryReadBlock_; //throw X + const size_t blockSize_; bool eof_ = false; - std::vector<std::byte> bufIn_; + std::vector<std::byte> bufIn_{blockSize_}; z_stream gzipStream_ = {}; }; -zen::InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : pimpl_(std::make_unique<Impl>(readBlock)) {} //throw SysError -zen::InputStreamAsGzip::~InputStreamAsGzip() {} -size_t zen::InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X +InputStreamAsGzip::InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X*/, size_t blockSize) : + pimpl_(std::make_unique<Impl>(tryReadBlock, blockSize)) {} //throw SysError + +InputStreamAsGzip::~InputStreamAsGzip() {} + +size_t InputStreamAsGzip::getBlockSize() const { return pimpl_->getBlockSize(); } +size_t InputStreamAsGzip::read(void* buffer, size_t bytesToRead) { return pimpl_->read(buffer, bytesToRead); } //throw SysError, X -std::string zen::compressAsGzip(const void* buffer, size_t bufSize) //throw SysError + +std::string zen::compressAsGzip(const std::string_view& stream) //throw SysError { - struct MemoryStreamAsGzip : InputStreamAsGzip + MemoryStreamIn memStream(stream); + + auto tryReadBlock = [&](void* buffer, size_t bytesToRead) //may return short, only 0 means EOF! { - explicit MemoryStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X*/) : InputStreamAsGzip(readBlock) {} //throw SysError - static size_t getBlockSize() { return 128 * 1024; } //InputStreamAsGzip has no idea what it's wrapping => has no getBlockSize() member! + return memStream.read(buffer, bytesToRead); //return "bytesToRead" bytes unless end of stream! }; - MemoryStreamAsGzip gzipStream([&](void* bufIn, size_t bytesToRead) //throw SysError + InputStreamAsGzip gzipStream(tryReadBlock, 1024 * 1024 /*blockSize*/); //throw SysError + + return unbufferedLoad<std::string>([&](void* buffer, size_t bytesToRead) { - const size_t bytesRead = std::min(bufSize, bytesToRead); - std::memcpy(bufIn, buffer, bytesRead); - buffer = static_cast<const char*>(buffer) + bytesRead; - bufSize -= bytesRead; - return bytesRead; //returning 0 signals EOF: Posix read() semantics - }); - return bufferedLoad<std::string>(gzipStream); //throw SysError + return gzipStream.read(buffer, bytesToRead); //throw SysError; return "bytesToRead" bytes unless end of stream! + }, + gzipStream.getBlockSize()); //throw SysError } diff --git a/zen/zlib_wrap.h b/zen/zlib_wrap.h index 41d7428a..d672707b 100644 --- a/zen/zlib_wrap.h +++ b/zen/zlib_wrap.h @@ -7,7 +7,7 @@ #ifndef ZLIB_WRAP_H_428597064566 #define ZLIB_WRAP_H_428597064566 -#include "serialize.h" +#include <functional> #include "sys_error.h" @@ -16,21 +16,21 @@ namespace zen // compression level must be between 0 and 9: // 0: no compression // 9: best compression -template <class BinContainer> //as specified in serialize.h -BinContainer compress(const BinContainer& stream, int level); //throw SysError +std::string compress(const std::string_view& stream, int level); //throw SysError //caveat: output stream is physically larger than input! => strip additional reserved space if needed: "BinContainer(output.begin(), output.end())" -template <class BinContainer> -BinContainer decompress(const BinContainer& stream); //throw SysError +std::string decompress(const std::string_view& stream); //throw SysError class InputStreamAsGzip //convert input stream into gzip on the fly { public: - explicit InputStreamAsGzip( //throw SysError - const std::function<size_t(void* buffer, size_t bytesToRead)>& readBlock /*throw X; returning 0 signals EOF: Posix read() semantics*/); + explicit InputStreamAsGzip(const std::function<size_t(void* buffer, size_t bytesToRead)>& tryReadBlock /*throw X; may return short, only 0 means EOF!*/, + size_t blockSize); //throw SysError ~InputStreamAsGzip(); + size_t getBlockSize() const; + size_t read(void* buffer, size_t bytesToRead); //throw SysError, X; return "bytesToRead" bytes unless end of stream! private: @@ -38,85 +38,7 @@ private: const std::unique_ptr<Impl> pimpl_; }; -std::string compressAsGzip(const void* buffer, size_t bufSize); //throw SysError - - - - - - -//######################## implementation ########################## -namespace impl -{ -size_t zlib_compressBound(size_t len); -size_t zlib_compress (const void* src, size_t srcLen, void* trg, size_t trgLen, int level); //throw SysError -size_t zlib_decompress(const void* src, size_t srcLen, void* trg, size_t trgLen); //throw SysError -} - - -template <class BinContainer> -BinContainer compress(const BinContainer& stream, int level) //throw SysError -{ - BinContainer contOut; - if (!stream.empty()) //don't dereference iterator into empty container! - { - //save uncompressed stream size for decompression - const uint64_t uncompressedSize = stream.size(); //use portable number type! - contOut.resize(sizeof(uncompressedSize)); - std::memcpy(&contOut[0], &uncompressedSize, sizeof(uncompressedSize)); - - const size_t bufferEstimate = impl::zlib_compressBound(stream.size()); //upper limit for buffer size, larger than input size!!! - - contOut.resize(contOut.size() + bufferEstimate); - - const size_t bytesWritten = impl::zlib_compress(&*stream.begin(), - stream.size(), - &*contOut.begin() + contOut.size() - bufferEstimate, - bufferEstimate, - level); //throw SysError - if (bytesWritten < bufferEstimate) - contOut.resize(contOut.size() - (bufferEstimate - bytesWritten)); //caveat: unsigned arithmetics - //caveat: physical memory consumption still *unchanged*! - } - return contOut; -} - - -template <class BinContainer> -BinContainer decompress(const BinContainer& stream) //throw SysError -{ - BinContainer contOut; - if (!stream.empty()) //don't dereference iterator into empty container! - { - //retrieve size of uncompressed data - uint64_t uncompressedSize = 0; //use portable number type! - if (stream.size() < sizeof(uncompressedSize)) - throw SysError(L"zlib error: stream size < 8"); - - std::memcpy(&uncompressedSize, &*stream.begin(), sizeof(uncompressedSize)); - - //attention: contOut MUST NOT be empty! Else it will pass a nullptr to zlib_decompress() => Z_STREAM_ERROR although "uncompressedSize == 0"!!! - //secondary bug: don't dereference iterator into empty container! - if (uncompressedSize == 0) //cannot be 0: compress() directly maps empty -> empty container skipping zlib! - throw SysError(L"zlib error: uncompressed size == 0"); - - try - { - contOut.resize(static_cast<size_t>(uncompressedSize)); //throw std::bad_alloc - } - //most likely this is due to data corruption: - catch (const std::length_error& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } - catch (const std::bad_alloc& e) { throw SysError(L"zlib error: " + _("Out of memory.") + L' ' + utfTo<std::wstring>(e.what())); } - - const size_t bytesWritten = impl::zlib_decompress(&*stream.begin() + sizeof(uncompressedSize), - stream.size() - sizeof(uncompressedSize), - &*contOut.begin(), - static_cast<size_t>(uncompressedSize)); //throw SysError - if (bytesWritten != static_cast<size_t>(uncompressedSize)) - throw SysError(formatSystemError("zlib_decompress", L"", L"bytes written != uncompressed size.")); - } - return contOut; -} +std::string compressAsGzip(const std::string_view& stream); //throw SysError } #endif //ZLIB_WRAP_H_428597064566 diff --git a/zen/zstring.cpp b/zen/zstring.cpp index 3f5328f7..73f18cd1 100644 --- a/zen/zstring.cpp +++ b/zen/zstring.cpp @@ -11,16 +11,18 @@ using namespace zen; -Zstring getUnicodeNormalFormNonAscii(const Zstring& str) +namespace +{ +Zstring getUnicodeNormalForm_NonAsciiValidUtf(const Zstring& str, UnicodeNormalForm form) { - //Example: const char* decomposed = "\x6f\xcc\x81"; - // const char* precomposed = "\xc3\xb3"; + //Example: const char* decomposed = "\x6f\xcc\x81"; //ó + // const char* precomposed = "\xc3\xb3"; //ó assert(!isAsciiString(str)); //includes "not-empty" check assert(str.find(Zchar('\0')) == Zstring::npos); //don't expect embedded nulls! try { - gchar* outStr = ::g_utf8_normalize(str.c_str(), str.length(), G_NORMALIZE_DEFAULT_COMPOSE); + gchar* outStr = ::g_utf8_normalize(str.c_str(), str.length(), form == UnicodeNormalForm::nfc ? G_NORMALIZE_NFC : G_NORMALIZE_NFD); if (!outStr) throw SysError(formatSystemError("g_utf8_normalize", L"", L"Conversion failed.")); ZEN_ON_SCOPE_EXIT(::g_free(outStr)); @@ -29,26 +31,53 @@ Zstring getUnicodeNormalFormNonAscii(const Zstring& str) } catch (const SysError& e) { - throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error normalizing string:" + - '\n' + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString())); + throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error normalizing string:" + '\n' + + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString())); } } -Zstring getUnicodeNormalForm(const Zstring& str) +Zstring getUnicodeNormalFormNonAscii(const Zstring& str, UnicodeNormalForm form) { - //fast pre-check: - if (isAsciiString(str)) //perf: in the range of 3.5ns - return str; - static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save output string memory consumption!"); + /* 1. do NOT fail on broken UTF encoding, instead normalize using REPLACEMENT_CHAR! + 2. NormalizeString() haateeez them Unicode non-characters: ERROR_NO_UNICODE_TRANSLATION! http://www.unicode.org/faq/private_use.html#nonchar1 + - No such issue on Linux/macOS with g_utf8_normalize(), and CFStringGetFileSystemRepresentation() + -> still, probably good idea to "normalize" Unicode non-characters cross-platform + - consistency for compareNoCase(): let's *unconditionally* check before other normalization operations, not just in error case! */ + using impl::CodePoint; + auto isUnicodeNonCharacter = [](CodePoint cp) { assert(cp <= impl::CODE_POINT_MAX); return (0xfdd0 <= cp && cp <= 0xfdef) || cp % 0x10'000 >= 0xfffe; }; + + const bool invalidUtf = [&] //pre-check: avoid memory allocation if valid UTF + { + UtfDecoder<Zchar> decoder(str.c_str(), str.size()); + while (const std::optional<CodePoint> cp = decoder.getNext()) + if (*cp == impl::REPLACEMENT_CHAR || //marks broken UTF encoding + isUnicodeNonCharacter(*cp)) + return true; + return false; + }(); - return getUnicodeNormalFormNonAscii(str); + if (invalidUtf) //band-aid broken UTF encoding with REPLACEMENT_CHAR + { + Zstring validStr; //don't want extra memory allocations in the standard case (valid UTF) + UtfDecoder<Zchar> decoder(str.c_str(), str.size()); + while (std::optional<CodePoint> cp = decoder.getNext()) + { + if (isUnicodeNonCharacter(*cp)) // + *cp = impl::REPLACEMENT_CHAR; //"normalize" Unicode non-characters + + codePointToUtf<Zchar>(*cp, [&](Zchar ch) { validStr += ch; }); + } + return getUnicodeNormalForm_NonAsciiValidUtf(validStr, form); + } + else + return getUnicodeNormalForm_NonAsciiValidUtf(str, form); } Zstring getUpperCaseNonAscii(const Zstring& str) { - Zstring strNorm = getUnicodeNormalFormNonAscii(str); + Zstring strNorm = getUnicodeNormalFormNonAscii(str, UnicodeNormalForm::native); try { Zstring output; @@ -64,10 +93,22 @@ Zstring getUpperCaseNonAscii(const Zstring& str) } catch (const SysError& e) { - throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error converting string to upper case:" + - '\n' + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString())); + throw std::runtime_error(std::string(__FILE__) + '[' + numberTo<std::string>(__LINE__) + "] Error converting string to upper case:" + '\n' + + utfTo<std::string>(str) + "\n\n" + utfTo<std::string>(e.toString())); } } +} + + +Zstring getUnicodeNormalForm(const Zstring& str, UnicodeNormalForm form) +{ + //fast pre-check: + if (isAsciiString(str)) //perf: in the range of 3.5ns + return str; + static_assert(std::is_same_v<decltype(str), const Zbase<Zchar>&>, "god bless our ref-counting! => save needless memory allocation!"); + + return getUnicodeNormalFormNonAscii(str, form); +} Zstring getUpperCase(const Zstring& str) @@ -90,8 +131,8 @@ namespace std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* rhs, size_t rhsLen) { //expect Unicode normalized strings! - assert(std::string(lhs, lhsLen) == getUnicodeNormalForm(std::string(lhs, lhsLen))); - assert(std::string(rhs, rhsLen) == getUnicodeNormalForm(std::string(rhs, rhsLen))); + assert(Zstring(lhs, lhsLen) == getUnicodeNormalForm(Zstring(lhs, lhsLen), UnicodeNormalForm::nfd)); + assert(Zstring(rhs, rhsLen) == getUnicodeNormalForm(Zstring(rhs, rhsLen), UnicodeNormalForm::nfd)); //- strncasecmp implements ASCII CI-comparsion only! => signature is broken for UTF8-input; toupper() similarly doesn't support Unicode //- wcsncasecmp: https://opensource.apple.com/source/Libc/Libc-763.12/string/wcsncasecmp-fbsd.c @@ -121,14 +162,14 @@ std::weak_ordering compareNoCaseUtf8(const char* lhs, size_t lhsLen, const char* std::weak_ordering compareNatural(const Zstring& lhs, const Zstring& rhs) { - /* Unicode normal forms: - Windows: CompareString() already ignores NFD/NFC differences: nice... - Linux: g_unichar_toupper() can't ignore differences - macOS: CFStringCompare() considers differences */ try { - const Zstring& lhsNorm = getUnicodeNormalForm(lhs); - const Zstring& rhsNorm = getUnicodeNormalForm(rhs); + /* Unicode normal forms: + Windows: CompareString() ignores NFD/NFC differences and converts to NFD + Linux: g_unichar_toupper() can't ignore differences + macOS: CFStringCompare() considers differences */ + const Zstring& lhsNorm = getUnicodeNormalForm(lhs, UnicodeNormalForm::nfd); //normalize: - broken UTF encoding + const Zstring& rhsNorm = getUnicodeNormalForm(rhs, UnicodeNormalForm::nfd); // - Unicode non-characters const char* strL = lhsNorm.c_str(); const char* strR = rhsNorm.c_str(); diff --git a/zen/zstring.h b/zen/zstring.h index 692217c1..d0a8eb4c 100644 --- a/zen/zstring.h +++ b/zen/zstring.h @@ -27,7 +27,13 @@ using Zstringc = zen::Zbase<char>; //Windows, Linux: precomposed //macOS: decomposed -Zstring getUnicodeNormalForm(const Zstring& str); +enum class UnicodeNormalForm +{ + nfc, //precomposed + nfd, //decomposed + native = nfc, +}; +Zstring getUnicodeNormalForm(const Zstring& str, UnicodeNormalForm form = UnicodeNormalForm::native); /* "In fact, Unicode declares that there is an equivalence relationship between decomposed and composed sequences, and conformant software should not treat canonically equivalent sequences, whether composed or decomposed or something in between, as different." https://www.win.tue.nl/~aeb/linux/uc/nfc_vs_nfd.html */ |