From b06ca5f8602a0aa4fc87fac84362fb7de918cc39 Mon Sep 17 00:00:00 2001 From: "Peter (Stig) Edwards" Date: Fri, 22 Jul 2016 19:46:40 +0100 Subject: [PATCH] ldb load, prefer ifsteam(/dev/stdin) to std::cin (#1207) getline on std::cin can be very inefficient when ldb is loading large values, with high CPU usage in libc _IO_(un)getc, this is because of the performance penalty that comes from synchronizing stdio and iostream buffers. See the reproducers and tests in #1133 . If an ifstream on /dev/stdin is used (when available) then using ldb to load large values can be much more efficient. I thought for ldb load, that this approach is preferable to using or std::ios_base::sync_with_stdio(false). I couldn't think of a use case where ldb load would need to support reading unbuffered input, an alternative approach would be to add support for passing --input_file=/dev/stdin. I have a CLA in place, thanks. The CI tests were failing at the time of https://github.com/facebook/rocksdb/pull/1156, so this change and PR will supersede it. --- tools/ldb_cmd.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 48a0bbbad..d0878f894 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -39,6 +39,7 @@ #include #include #include +#include namespace rocksdb { @@ -835,7 +836,10 @@ void DBLoaderCommand::DoCommand() { int bad_lines = 0; std::string line; - while (getline(std::cin, line, '\n')) { + // prefer ifstream getline performance vs that from std::cin istream + std::ifstream ifs_stdin("/dev/stdin"); + std::istream* istream_p = ifs_stdin.is_open() ? &ifs_stdin : &std::cin; + while (getline(*istream_p, line, '\n')) { std::string key; std::string value; if (ParseKeyValue(line, &key, &value, is_key_hex_, is_value_hex_)) {