diff --git a/.gitignore b/.gitignore index 0c7fd4f29..cc5116ec7 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ build_config.mk *.*jnilib* *.d-e *.o-* +*.swp ldb manifest_dump diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..66f37a5d2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,20 @@ +language: cpp +compiler: gcc +before_install: +# As of this writing (10 May 2014) the Travis build environment is Ubuntu 12.04, +# which needs the following ugly dependency incantations to build RocksDB: + - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + - sudo apt-get update -qq + - sudo apt-get install -y -qq gcc-4.8 g++-4.8 zlib1g-dev libbz2-dev libsnappy-dev libjemalloc-dev + - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 50 + - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50 + - wget https://gflags.googlecode.com/files/libgflags0_2.0-1_amd64.deb + - sudo dpkg -i libgflags0_2.0-1_amd64.deb + - wget https://gflags.googlecode.com/files/libgflags-dev_2.0-1_amd64.deb + - sudo dpkg -i libgflags-dev_2.0-1_amd64.deb +# Lousy hack to disable use and testing of fallocate, which doesn't behave quite +# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment. + - sed -i "s/fallocate(/HACK_NO_fallocate(/" build_tools/build_detect_platform +script: make check -j8 +notifications: + email: false diff --git a/HISTORY.md b/HISTORY.md index a80b11231..f9bdd7c98 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,9 @@ ### Public API changes * Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories +### New Features +* Hash index for block-based table will be materialized and reconstructed more efficiently. Previously hash index is constructed by scanning the whole table during every table open. + ## 3.0.0 (05/05/2014) ### Public API changes diff --git a/README.md b/README.md index b5a17f39a..fabced9a6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ ## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage +[![Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb) + RocksDB is developed and maintained by Facebook Database Engineering Team. It is built on on earlier work on LevelDB by Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) diff --git a/db/compaction.cc b/db/compaction.cc index bafb5b4ea..962ce1232 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -8,7 +8,13 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "db/compaction.h" + +#define __STDC_FORMAT_MACROS +#include +#include + #include "db/column_family.h" +#include "util/logging.h" namespace rocksdb { @@ -191,71 +197,51 @@ void Compaction::ResetNextCompactionIndex() { input_version_->ResetNextCompactionIndex(level_); } -/* -for sizes >=10TB, print "XXTB" -for sizes >=10GB, print "XXGB" -etc. -*/ -static void FileSizeSummary(unsigned long long sz, char* output, int len) { - const unsigned long long ull10 = 10; - if (sz >= ull10<<40) { - snprintf(output, len, "%lluTB", sz>>40); - } else if (sz >= ull10<<30) { - snprintf(output, len, "%lluGB", sz>>30); - } else if (sz >= ull10<<20) { - snprintf(output, len, "%lluMB", sz>>20); - } else if (sz >= ull10<<10) { - snprintf(output, len, "%lluKB", sz>>10); - } else { - snprintf(output, len, "%lluB", sz); - } -} - -static int InputSummary(std::vector& files, char* output, - int len) { +namespace { +int InputSummary(const std::vector& files, char* output, + int len) { *output = '\0'; int write = 0; for (unsigned int i = 0; i < files.size(); i++) { int sz = len - write; int ret; char sztxt[16]; - FileSizeSummary((unsigned long long)files.at(i)->file_size, sztxt, 16); - ret = snprintf(output + write, sz, "%lu(%s) ", - (unsigned long)files.at(i)->number, + AppendHumanBytes(files.at(i)->file_size, sztxt, 16); + ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ", files.at(i)->number, sztxt); - if (ret < 0 || ret >= sz) - break; + if (ret < 0 || ret >= sz) break; write += ret; } - return write; + // if files.size() is non-zero, overwrite the last space + return write - !!files.size(); } +} // namespace void Compaction::Summary(char* output, int len) { - int write = snprintf(output, len, - "Base version %lu Base level %d, seek compaction:%d, inputs: [", - (unsigned long)input_version_->GetVersionNumber(), - level_, - seek_compaction_); + int write = + snprintf(output, len, "Base version %" PRIu64 + " Base level %d, seek compaction:%d, inputs: [", + input_version_->GetVersionNumber(), level_, seek_compaction_); if (write < 0 || write >= len) { return; } - write += InputSummary(inputs_[0], output+write, len-write); + write += InputSummary(inputs_[0], output + write, len - write); if (write < 0 || write >= len) { return; } - write += snprintf(output+write, len-write, "],["); + write += snprintf(output + write, len - write, "], ["); if (write < 0 || write >= len) { return; } - write += InputSummary(inputs_[1], output+write, len-write); + write += InputSummary(inputs_[1], output + write, len - write); if (write < 0 || write >= len) { return; } - snprintf(output+write, len-write, "]"); + snprintf(output + write, len - write, "]"); } } // namespace rocksdb diff --git a/db/db_bench.cc b/db/db_bench.cc index 115443010..dbd8cecd9 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -159,14 +159,7 @@ DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run." DEFINE_int32(value_size, 100, "Size of each value"); -// the maximum size of key in bytes -static const int kMaxKeySize = 128; static bool ValidateKeySize(const char* flagname, int32_t value) { - if (value > kMaxKeySize) { - fprintf(stderr, "Invalid value for --%s: %d, must be < %d\n", - flagname, value, kMaxKeySize); - return false; - } return true; } diff --git a/db/db_test.cc b/db/db_test.cc index 693e56008..05403fc07 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2365,9 +2365,9 @@ TEST(DBTest, NumImmutableMemTable) { ASSERT_EQ(num, "0"); ASSERT_TRUE(dbfull()->GetProperty( handles_[1], "rocksdb.cur-size-active-mem-table", &num)); - // "208" is the size of the metadata of an empty skiplist, this would + // "200" is the size of the metadata of an empty skiplist, this would // break if we change the default skiplist implementation - ASSERT_EQ(num, "208"); + ASSERT_EQ(num, "200"); SetPerfLevel(kDisable); } while (ChangeCompactOptions()); } diff --git a/db/version_set.cc b/db/version_set.cc index 4a89e0226..02e9aa152 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -7,9 +7,9 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#define __STDC_FORMAT_MACROS #include "db/version_set.h" +#define __STDC_FORMAT_MACROS #include #include #include @@ -1151,6 +1151,10 @@ const char* Version::LevelSummary(LevelSummaryStorage* scratch) const { if (ret < 0 || ret >= sz) break; len += ret; } + if (len > 0) { + // overwrite the last space + --len; + } snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]"); return scratch->buffer; } @@ -1160,16 +1164,20 @@ const char* Version::LevelFileSummary(FileSummaryStorage* scratch, int len = snprintf(scratch->buffer, sizeof(scratch->buffer), "files_size["); for (const auto& f : files_[level]) { int sz = sizeof(scratch->buffer) - len; + char sztxt[16]; + AppendHumanBytes(f->file_size, sztxt, 16); int ret = snprintf(scratch->buffer + len, sz, - "#%lu(seq=%lu,sz=%lu,%lu) ", - (unsigned long)f->number, - (unsigned long)f->smallest_seqno, - (unsigned long)f->file_size, - (unsigned long)f->being_compacted); + "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->number, + f->smallest_seqno, sztxt, + static_cast(f->being_compacted)); if (ret < 0 || ret >= sz) break; len += ret; } + // overwrite the last space (only if files_[level].size() is non-zero) + if (files_[level].size() && len > 0) { + --len; + } snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]"); return scratch->buffer; } diff --git a/hdfs/env_hdfs.h b/hdfs/env_hdfs.h index 303cd81cf..e6fb8db12 100644 --- a/hdfs/env_hdfs.h +++ b/hdfs/env_hdfs.h @@ -18,9 +18,6 @@ namespace rocksdb { -static const std::string kProto = "hdfs://"; -static const std::string pathsep = "/"; - // Thrown during execution when there is an issue with the supplied // arguments. class HdfsUsageException : public std::exception { }; @@ -58,20 +55,23 @@ class HdfsEnv : public Env { } virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result); + std::unique_ptr* result, + const EnvOptions& options); virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result); + std::unique_ptr* result, + const EnvOptions& options); virtual Status NewWritableFile(const std::string& fname, - WritableFile** result); + std::unique_ptr* result, + const EnvOptions& options); virtual Status NewRandomRWFile(const std::string& fname, - unique_ptr* result, + std::unique_ptr* result, const EnvOptions& options); virtual Status NewDirectory(const std::string& name, - unique_ptr* result); + std::unique_ptr* result); virtual bool FileExists(const std::string& fname); @@ -97,7 +97,8 @@ class HdfsEnv : public Env { virtual Status UnlockFile(FileLock* lock); - virtual Status NewLogger(const std::string& fname, Logger** result); + virtual Status NewLogger(const std::string& fname, + std::shared_ptr* result); virtual void Schedule(void (*function)(void* arg), void* arg, Priority pri = LOW) { @@ -161,6 +162,9 @@ class HdfsEnv : public Env { // object here so that we can use posix timers, // posix threads, etc. + static const std::string kProto; + static const std::string pathsep; + /** * If the URI is specified of the form hdfs://server:port/path, * then connect to the specified cluster diff --git a/include/rocksdb/status.h b/include/rocksdb/status.h index dbd41fc9b..0298a2838 100644 --- a/include/rocksdb/status.h +++ b/include/rocksdb/status.h @@ -97,7 +97,6 @@ class Status { // Returns the string "OK" for success. std::string ToString() const; - private: enum Code { kOk = 0, kNotFound = 1, @@ -110,6 +109,10 @@ class Status { kShutdownInProgress = 8 }; + Code code() const { + return code_; + } + private: // A nullptr state_ (which is always the case for OK) means the message // is empty. // of the following form: @@ -118,9 +121,6 @@ class Status { Code code_; const char* state_; - Code code() const { - return code_; - } explicit Status(Code code) : code_(code), state_(nullptr) { } Status(Code code, const Slice& msg, const Slice& msg2); static const char* CopyState(const char* s); diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 4989b2be4..2ec6c1174 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -15,6 +15,8 @@ #include #include +#include +#include #include "db/dbformat.h" @@ -41,6 +43,8 @@ namespace rocksdb { +extern const std::string kHashIndexPrefixesBlock; +extern const std::string kHashIndexPrefixesMetadataBlock; namespace { typedef BlockBasedTableOptions::IndexType IndexType; @@ -57,6 +61,14 @@ typedef BlockBasedTableOptions::IndexType IndexType; // design that just works. class IndexBuilder { public: + // Index builder will construct a set of blocks which contain: + // 1. One primary index block. + // 2. (Optional) a set of metablocks that contains the metadata of the + // primary index. + struct IndexBlocks { + Slice index_block_contents; + std::unordered_map meta_blocks; + }; explicit IndexBuilder(const Comparator* comparator) : comparator_(comparator) {} @@ -72,15 +84,19 @@ class IndexBuilder { // the last one in the table // // REQUIRES: Finish() has not yet been called. - virtual void AddEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) = 0; + virtual void AddIndexEntry(std::string* last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle) = 0; + + // This method will be called whenever a key is added. The subclasses may + // override OnKeyAdded() if they need to collect additional information. + virtual void OnKeyAdded(const Slice& key) {} // Inform the index builder that all entries has been written. Block builder // may therefore perform any operation required for block finalization. // // REQUIRES: Finish() has not yet been called. - virtual Slice Finish() = 0; + virtual Status Finish(IndexBlocks* index_blocks) = 0; // Get the estimated size for index block. virtual size_t EstimatedSize() const = 0; @@ -103,9 +119,9 @@ class ShortenedIndexBuilder : public IndexBuilder { : IndexBuilder(comparator), index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {} - virtual void AddEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) override { + virtual void AddIndexEntry(std::string* last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle) override { if (first_key_in_next_block != nullptr) { comparator_->FindShortestSeparator(last_key_in_current_block, *first_key_in_next_block); @@ -118,7 +134,10 @@ class ShortenedIndexBuilder : public IndexBuilder { index_block_builder_.Add(*last_key_in_current_block, handle_encoding); } - virtual Slice Finish() override { return index_block_builder_.Finish(); } + virtual Status Finish(IndexBlocks* index_blocks) { + index_blocks->index_block_contents = index_block_builder_.Finish(); + return Status::OK(); + } virtual size_t EstimatedSize() const { return index_block_builder_.CurrentSizeEstimate(); @@ -128,38 +147,125 @@ class ShortenedIndexBuilder : public IndexBuilder { BlockBuilder index_block_builder_; }; -// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like -// ShortenedIndexBuilder, but preserves the full key instead the substitude key. -class FullKeyIndexBuilder : public IndexBuilder { +// HashIndexBuilder contains a binary-searchable primary index and the +// metadata for secondary hash index construction. +// The metadata for hash index consists two parts: +// - a metablock that compactly contains a sequence of prefixes. All prefixes +// are stored consectively without any metadata (like, prefix sizes) being +// stored, which is kept in the other metablock. +// - a metablock contains the metadata of the prefixes, including prefix size, +// restart index and number of block it spans. The format looks like: +// +// +-----------------+---------------------------+---------------------+ <=prefix 1 +// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | +// +-----------------+---------------------------+---------------------+ <=prefix 2 +// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | +// +-----------------+---------------------------+---------------------+ +// | | +// | .... | +// | | +// +-----------------+---------------------------+---------------------+ <=prefix n +// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes | +// +-----------------+---------------------------+---------------------+ +// +// The reason of separating these two metablocks is to enable the efficiently +// reuse the first metablock during hash index construction without unnecessary +// data copy or small heap allocations for prefixes. +class HashIndexBuilder : public IndexBuilder { public: - explicit FullKeyIndexBuilder(const Comparator* comparator) + explicit HashIndexBuilder(const Comparator* comparator, + const SliceTransform* hash_key_extractor) : IndexBuilder(comparator), - index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {} + primary_index_builder(comparator), + hash_key_extractor_(hash_key_extractor) {} + + virtual void AddIndexEntry(std::string* last_key_in_current_block, + const Slice* first_key_in_next_block, + const BlockHandle& block_handle) override { + ++current_restart_index_; + primary_index_builder.AddIndexEntry(last_key_in_current_block, + first_key_in_next_block, block_handle); + } - virtual void AddEntry(std::string* last_key_in_current_block, - const Slice* first_key_in_next_block, - const BlockHandle& block_handle) override { - std::string handle_encoding; - block_handle.EncodeTo(&handle_encoding); - index_block_builder_.Add(*last_key_in_current_block, handle_encoding); + virtual void OnKeyAdded(const Slice& key) override { + auto key_prefix = hash_key_extractor_->Transform(key); + bool is_first_entry = pending_block_num_ == 0; + + // Keys may share the prefix + if (is_first_entry || pending_entry_prefix_ != key_prefix) { + if (!is_first_entry) { + FlushPendingPrefix(); + } + + // need a hard copy otherwise the underlying data changes all the time. + // TODO(kailiu) ToString() is expensive. We may speed up can avoid data + // copy. + pending_entry_prefix_ = key_prefix.ToString(); + pending_block_num_ = 1; + pending_entry_index_ = current_restart_index_; + } else { + // entry number increments when keys share the prefix reside in + // differnt data blocks. + auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1; + assert(last_restart_index <= current_restart_index_); + if (last_restart_index != current_restart_index_) { + ++pending_block_num_; + } + } } - virtual Slice Finish() override { return index_block_builder_.Finish(); } + virtual Status Finish(IndexBlocks* index_blocks) { + FlushPendingPrefix(); + primary_index_builder.Finish(index_blocks); + index_blocks->meta_blocks.insert( + {kHashIndexPrefixesBlock.c_str(), prefix_block_}); + index_blocks->meta_blocks.insert( + {kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_}); + return Status::OK(); + } virtual size_t EstimatedSize() const { - return index_block_builder_.CurrentSizeEstimate(); + return primary_index_builder.EstimatedSize() + prefix_block_.size() + + prefix_meta_block_.size(); } private: - BlockBuilder index_block_builder_; + void FlushPendingPrefix() { + prefix_block_.append(pending_entry_prefix_.data(), + pending_entry_prefix_.size()); + PutVarint32(&prefix_meta_block_, pending_entry_prefix_.size()); + PutVarint32(&prefix_meta_block_, pending_entry_index_); + PutVarint32(&prefix_meta_block_, pending_block_num_); + } + + ShortenedIndexBuilder primary_index_builder; + const SliceTransform* hash_key_extractor_; + + // stores a sequence of prefixes + std::string prefix_block_; + // stores the metadata of prefixes + std::string prefix_meta_block_; + + // The following 3 variables keeps unflushed prefix and its metadata. + // The details of block_num and entry_index can be found in + // "block_hash_index.{h,cc}" + uint32_t pending_block_num_ = 0; + uint32_t pending_entry_index_ = 0; + std::string pending_entry_prefix_; + + uint64_t current_restart_index_ = 0; }; // Create a index builder based on its type. -IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) { +IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator, + const SliceTransform* prefix_extractor) { switch (type) { case BlockBasedTableOptions::kBinarySearch: { return new ShortenedIndexBuilder(comparator); } + case BlockBasedTableOptions::kHashSearch: { + return new HashIndexBuilder(comparator, prefix_extractor); + } default: { assert(!"Do not recognize the index type "); return nullptr; @@ -249,7 +355,7 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull; class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector : public TablePropertiesCollector { public: - BlockBasedTablePropertiesCollector( + explicit BlockBasedTablePropertiesCollector( BlockBasedTableOptions::IndexType index_type) : index_type_(index_type) {} @@ -288,6 +394,8 @@ struct BlockBasedTableBuilder::Rep { uint64_t offset = 0; Status status; BlockBuilder data_block; + + InternalKeySliceTransform internal_prefix_transform; std::unique_ptr index_builder; std::string last_key; @@ -316,8 +424,9 @@ struct BlockBasedTableBuilder::Rep { internal_comparator(icomparator), file(f), data_block(options, &internal_comparator), - index_builder( - CreateIndexBuilder(index_block_type, &internal_comparator)), + internal_prefix_transform(options.prefix_extractor.get()), + index_builder(CreateIndexBuilder(index_block_type, &internal_comparator, + &this->internal_prefix_transform)), compression_type(compression_type), checksum_type(checksum_type), filter_block(opt.filter_policy == nullptr @@ -335,16 +444,13 @@ struct BlockBasedTableBuilder::Rep { } }; -// TODO(sdong): Currently only write out binary search index. In -// BlockBasedTableReader, Hash index will be built using binary search index. BlockBasedTableBuilder::BlockBasedTableBuilder( const Options& options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_comparator, WritableFile* file, CompressionType compression_type) : rep_(new Rep(options, internal_comparator, file, table_options.flush_block_policy_factory.get(), - compression_type, - BlockBasedTableOptions::IndexType::kBinarySearch, + compression_type, table_options.index_type, table_options.checksum)) { if (rep_->filter_block != nullptr) { rep_->filter_block->StartBlock(0); @@ -370,7 +476,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { if (r->props.num_entries > 0) { assert(r->internal_comparator.Compare(key, Slice(r->last_key)) > 0); } - + r->index_builder->OnKeyAdded(key); auto should_flush = r->flush_block_policy->Update(key, value); if (should_flush) { assert(!r->data_block.empty()); @@ -385,7 +491,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { // entries in the first block and < all entries in subsequent // blocks. if (ok()) { - r->index_builder->AddEntry(&r->last_key, &key, r->pending_handle); + r->index_builder->AddIndexEntry(&r->last_key, &key, r->pending_handle); } } @@ -561,24 +667,36 @@ Status BlockBasedTableBuilder::Finish() { // block, we will finish writing all index entries here and flush them // to storage after metaindex block is written. if (ok() && !empty_data_block) { - r->index_builder->AddEntry(&r->last_key, nullptr /* no next data block */, - r->pending_handle); + r->index_builder->AddIndexEntry( + &r->last_key, nullptr /* no next data block */, r->pending_handle); + } + + IndexBuilder::IndexBlocks index_blocks; + auto s = r->index_builder->Finish(&index_blocks); + if (!s.ok()) { + return s; } // Write meta blocks and metaindex block with the following order. // 1. [meta block: filter] - // 2. [meta block: properties] - // 3. [metaindex block] - if (ok()) { - MetaIndexBuilder meta_index_builer; + // 2. [other meta blocks] + // 3. [meta block: properties] + // 4. [metaindex block] + // write meta blocks + MetaIndexBuilder meta_index_builder; + for (const auto& item : index_blocks.meta_blocks) { + BlockHandle block_handle; + WriteBlock(item.second, &block_handle); + meta_index_builder.Add(item.first, block_handle); + } - // Write filter block. + if (ok()) { if (r->filter_block != nullptr) { // Add mapping from ".Name" to location // of filter data. std::string key = BlockBasedTable::kFilterBlockPrefix; key.append(r->options.filter_policy->Name()); - meta_index_builer.Add(key, filter_block_handle); + meta_index_builder.Add(key, filter_block_handle); } // Write properties block. @@ -605,20 +723,16 @@ Status BlockBasedTableBuilder::Finish() { &properties_block_handle ); - meta_index_builer.Add(kPropertiesBlock, - properties_block_handle); + meta_index_builder.Add(kPropertiesBlock, properties_block_handle); } // end of properties block writing - - WriteRawBlock( - meta_index_builer.Finish(), - kNoCompression, - &metaindex_block_handle - ); - } // meta blocks and metaindex block. + } // meta blocks // Write index block if (ok()) { - WriteBlock(r->index_builder->Finish(), &index_block_handle); + // flush the meta index block + WriteRawBlock(meta_index_builder.Finish(), kNoCompression, + &metaindex_block_handle); + WriteBlock(index_blocks.index_block_contents, &index_block_handle); } // Write footer @@ -685,7 +799,6 @@ uint64_t BlockBasedTableBuilder::FileSize() const { return rep_->offset; } -const std::string BlockBasedTable::kFilterBlockPrefix = - "filter."; +const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; } // namespace rocksdb diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 822adee22..22fd0dd93 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -56,5 +56,8 @@ TableFactory* NewBlockBasedTableFactory( const std::string BlockBasedTablePropertyNames::kIndexType = "rocksdb.block.based.table.index.type"; +const std::string kHashIndexPrefixesBlock = "rocksdb.hashindex.prefixes"; +const std::string kHashIndexPrefixesMetadataBlock = + "rocksdb.hashindex.metadata"; } // namespace rocksdb diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index 492349c55..656b531ae 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -8,9 +8,11 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once -#include #include +#include +#include + #include "rocksdb/flush_block_policy.h" #include "rocksdb/options.h" #include "rocksdb/table.h" @@ -45,4 +47,7 @@ class BlockBasedTableFactory : public TableFactory { BlockBasedTableOptions table_options_; }; +extern const std::string kHashIndexPrefixesBlock; +extern const std::string kHashIndexPrefixesMetadataBlock; + } // namespace rocksdb diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 953800787..4fcdfd29e 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -38,6 +38,8 @@ namespace rocksdb { extern const uint64_t kBlockBasedTableMagicNumber; +extern const std::string kHashIndexPrefixesBlock; +extern const std::string kHashIndexPrefixesMetadataBlock; using std::unique_ptr; typedef BlockBasedTable::IndexReader IndexReader; @@ -186,19 +188,13 @@ class BinarySearchIndexReader : public IndexReader { // Index that leverages an internal hash table to quicken the lookup for a given // key. -// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that -// functions requires index to be initalized. To avoid this problem external -// caller will pass a function that can create the iterator over the entries -// without the table to be fully initialized. class HashIndexReader : public IndexReader { public: - static Status Create(RandomAccessFile* file, const Footer& footer, - const BlockHandle& index_handle, Env* env, + static Status Create(const SliceTransform* hash_key_extractor, + const Footer& footer, RandomAccessFile* file, Env* env, const Comparator* comparator, - std::function data_iter_gen, - const SliceTransform* prefix_extractor, - IndexReader** index_reader) { - assert(prefix_extractor); + const BlockHandle& index_handle, + Iterator* meta_index_iter, IndexReader** index_reader) { Block* index_block = nullptr; auto s = ReadBlockFromFile(file, footer, ReadOptions(), index_handle, &index_block, env); @@ -207,14 +203,57 @@ class HashIndexReader : public IndexReader { return s; } - *index_reader = new HashIndexReader(comparator, index_block); - std::unique_ptr index_iter(index_block->NewIterator(nullptr)); - std::unique_ptr data_iter( - data_iter_gen(index_block->NewIterator(nullptr))); - auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(), - index_block->NumRestarts(), - comparator, prefix_extractor); - index_block->SetBlockHashIndex(hash_index); + // Get prefixes block + BlockHandle prefixes_handle; + s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesBlock, + &prefixes_handle); + if (!s.ok()) { + return s; + } + + // Get index metadata block + BlockHandle prefixes_meta_handle; + s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesMetadataBlock, + &prefixes_meta_handle); + if (!s.ok()) { + return s; + } + + // Read contents for the blocks + BlockContents prefixes_contents; + s = ReadBlockContents(file, footer, ReadOptions(), prefixes_handle, + &prefixes_contents, env, true /* do decompression */); + if (!s.ok()) { + return s; + } + BlockContents prefixes_meta_contents; + s = ReadBlockContents(file, footer, ReadOptions(), prefixes_meta_handle, + &prefixes_meta_contents, env, + true /* do decompression */); + if (!s.ok()) { + if (prefixes_contents.heap_allocated) { + delete[] prefixes_contents.data.data(); + } + return s; + } + + auto new_index_reader = + new HashIndexReader(comparator, index_block, prefixes_contents); + BlockHashIndex* hash_index = nullptr; + s = CreateBlockHashIndex(hash_key_extractor, prefixes_contents.data, + prefixes_meta_contents.data, &hash_index); + if (!s.ok()) { + return s; + } + + new_index_reader->index_block_->SetBlockHashIndex(hash_index); + + *index_reader = new_index_reader; + + // release resources + if (prefixes_meta_contents.heap_allocated) { + delete[] prefixes_meta_contents.data.data(); + } return s; } @@ -225,11 +264,22 @@ class HashIndexReader : public IndexReader { virtual size_t size() const override { return index_block_->size(); } private: - HashIndexReader(const Comparator* comparator, Block* index_block) - : IndexReader(comparator), index_block_(index_block) { + HashIndexReader(const Comparator* comparator, Block* index_block, + const BlockContents& prefixes_contents) + : IndexReader(comparator), + index_block_(index_block), + prefixes_contents_(prefixes_contents) { assert(index_block_ != nullptr); } + + ~HashIndexReader() { + if (prefixes_contents_.heap_allocated) { + delete[] prefixes_contents_.data.data(); + } + } + std::unique_ptr index_block_; + BlockContents prefixes_contents_; }; @@ -408,7 +458,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, // and with a same life-time as this table object. IndexReader* index_reader = nullptr; // TODO: we never really verify check sum for index block - s = new_table->CreateIndexReader(&index_reader); + s = new_table->CreateIndexReader(&index_reader, meta_iter.get()); if (s.ok()) { rep->index_reader.reset(index_reader); @@ -417,10 +467,9 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, if (rep->options.filter_policy) { std::string key = kFilterBlockPrefix; key.append(rep->options.filter_policy->Name()); - meta_iter->Seek(key); - - if (meta_iter->Valid() && meta_iter->key() == Slice(key)) { - rep->filter.reset(ReadFilter(meta_iter->value(), rep)); + BlockHandle handle; + if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) { + rep->filter.reset(ReadFilter(handle, rep)); } } } else { @@ -617,16 +666,9 @@ Status BlockBasedTable::PutDataBlockToCache( return s; } -FilterBlockReader* BlockBasedTable::ReadFilter ( - const Slice& filter_handle_value, - BlockBasedTable::Rep* rep, - size_t* filter_size) { - Slice v = filter_handle_value; - BlockHandle filter_handle; - if (!filter_handle.DecodeFrom(&v).ok()) { - return nullptr; - } - +FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle, + BlockBasedTable::Rep* rep, + size_t* filter_size) { // TODO: We might want to unify with ReadBlockFromFile() if we start // requiring checksum verification in Table::Open. ReadOptions opt; @@ -687,10 +729,9 @@ BlockBasedTable::CachableEntry BlockBasedTable::GetFilter( if (s.ok()) { std::string filter_block_key = kFilterBlockPrefix; filter_block_key.append(rep_->options.filter_policy->Name()); - iter->Seek(filter_block_key); - - if (iter->Valid() && iter->key() == Slice(filter_block_key)) { - filter = ReadFilter(iter->value(), rep_, &filter_size); + BlockHandle handle; + if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) { + filter = ReadFilter(handle, rep_, &filter_size); assert(filter); assert(filter_size > 0); @@ -1032,7 +1073,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, // 3. options // 4. internal_comparator // 5. index_type -Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) { +Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader, + Iterator* preloaded_meta_index_iter) { // Some old version of block-based tables don't have index type present in // table properties. If that's the case we can safely use the kBinarySearch. auto index_type_on_file = BlockBasedTableOptions::kBinarySearch; @@ -1045,41 +1087,45 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) { } } - // TODO(sdong): Currently binary index is the only index type we support in - // files. Hash index is built on top of binary index too. - if (index_type_on_file != BlockBasedTableOptions::kBinarySearch) { - return Status::NotSupported("File Contains not supported index type: ", - std::to_string(index_type_on_file)); - } - auto file = rep_->file.get(); auto env = rep_->options.env; auto comparator = &rep_->internal_comparator; const Footer& footer = rep_->footer; - switch (rep_->index_type) { + switch (index_type_on_file) { case BlockBasedTableOptions::kBinarySearch: { return BinarySearchIndexReader::Create( file, footer, footer.index_handle(), env, comparator, index_reader); } case BlockBasedTableOptions::kHashSearch: { + std::unique_ptr meta_guard; + std::unique_ptr meta_iter_guard; + auto meta_index_iter = preloaded_meta_index_iter; + if (meta_index_iter == nullptr) { + auto s = ReadMetaBlock(rep_, &meta_guard, &meta_iter_guard); + if (!s.ok()) { + return Status::Corruption("Unable to read the metaindex block"); + } + meta_index_iter = meta_iter_guard.get(); + } + // We need to wrap data with internal_prefix_transform to make sure it can // handle prefix correctly. + if (rep_->options.prefix_extractor == nullptr) { + return Status::InvalidArgument( + "BlockBasedTableOptions::kHashSearch requires " + "options.prefix_extractor to be set."); + } + rep_->internal_prefix_transform.reset( new InternalKeySliceTransform(rep_->options.prefix_extractor.get())); return HashIndexReader::Create( - file, footer, footer.index_handle(), env, comparator, - [&](Iterator* index_iter) { - return NewTwoLevelIterator(new BlockEntryIteratorState(this, - ReadOptions(), nullptr), index_iter); - }, - rep_->internal_prefix_transform.get(), index_reader); + rep_->internal_prefix_transform.get(), footer, file, env, comparator, + footer.index_handle(), meta_index_iter, index_reader); } default: { std::string error_message = "Unrecognized index type: " + std::to_string(rep_->index_type); - // equivalent to assert(false), but more informative. - assert(!error_message.c_str()); return Status::InvalidArgument(error_message.c_str()); } } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index f68d6423d..cfb1d0af0 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -160,8 +160,13 @@ class BlockBasedTable : public TableReader { friend class BlockBasedTableBuilder; void ReadMeta(const Footer& footer); - void ReadFilter(const Slice& filter_handle_value); - Status CreateIndexReader(IndexReader** index_reader); + + // Create a index reader based on the index type stored in the table. + // Optionally, user can pass a preloaded meta_index_iter for the index that + // need to access extra meta blocks for index construction. This parameter + // helps avoid re-reading meta index block if caller already created one. + Status CreateIndexReader(IndexReader** index_reader, + Iterator* preloaded_meta_index_iter = nullptr); // Read the meta block from sst. static Status ReadMetaBlock( @@ -170,10 +175,8 @@ class BlockBasedTable : public TableReader { std::unique_ptr* iter); // Create the filter from the filter block. - static FilterBlockReader* ReadFilter( - const Slice& filter_handle_value, - Rep* rep, - size_t* filter_size = nullptr); + static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle, + Rep* rep, size_t* filter_size = nullptr); static void SetupCacheKeyPrefix(Rep* rep); diff --git a/table/block_hash_index.cc b/table/block_hash_index.cc index 0c9674c95..7a6e219a0 100644 --- a/table/block_hash_index.cc +++ b/table/block_hash_index.cc @@ -3,21 +3,62 @@ // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. +#include "table/block_hash_index.h" + #include -#include "table/block_hash_index.h" #include "rocksdb/comparator.h" #include "rocksdb/iterator.h" #include "rocksdb/slice_transform.h" +#include "util/coding.h" namespace rocksdb { -BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter, - const uint32_t num_restarts, - const Comparator* comparator, - const SliceTransform* hash_key_extractor) { +Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, + const Slice& prefixes, const Slice& prefix_meta, + BlockHashIndex** hash_index) { + uint64_t pos = 0; + auto meta_pos = prefix_meta; + Status s; + *hash_index = new BlockHashIndex( + hash_key_extractor, + false /* external module manages memory space for prefixes */); + + while (!meta_pos.empty()) { + uint32_t prefix_size = 0; + uint32_t entry_index = 0; + uint32_t num_blocks = 0; + if (!GetVarint32(&meta_pos, &prefix_size) || + !GetVarint32(&meta_pos, &entry_index) || + !GetVarint32(&meta_pos, &num_blocks)) { + s = Status::Corruption( + "Corrupted prefix meta block: unable to read from it."); + break; + } + Slice prefix(prefixes.data() + pos, prefix_size); + (*hash_index)->Add(prefix, entry_index, num_blocks); + + pos += prefix_size; + } + + if (s.ok() && pos != prefixes.size()) { + s = Status::Corruption("Corrupted prefix meta block"); + } + + if (!s.ok()) { + delete *hash_index; + } + + return s; +} + +BlockHashIndex* CreateBlockHashIndexOnTheFly( + Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts, + const Comparator* comparator, const SliceTransform* hash_key_extractor) { assert(hash_key_extractor); - auto hash_index = new BlockHashIndex(hash_key_extractor); + auto hash_index = new BlockHashIndex( + hash_key_extractor, + true /* hash_index will copy prefix when Add() is called */); uint64_t current_restart_index = 0; std::string pending_entry_prefix; @@ -88,12 +129,16 @@ BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter, bool BlockHashIndex::Add(const Slice& prefix, uint32_t restart_index, uint32_t num_blocks) { - auto prefix_ptr = arena_.Allocate(prefix.size()); - std::copy(prefix.data() /* begin */, prefix.data() + prefix.size() /* end */, - prefix_ptr /* destination */); - auto result = - restart_indices_.insert({Slice(prefix_ptr, prefix.size()), - RestartIndex(restart_index, num_blocks)}); + auto prefix_to_insert = prefix; + if (kOwnPrefixes) { + auto prefix_ptr = arena_.Allocate(prefix.size()); + std::copy(prefix.data() /* begin */, + prefix.data() + prefix.size() /* end */, + prefix_ptr /* destination */); + prefix_to_insert = Slice(prefix_ptr, prefix.size()); + } + auto result = restart_indices_.insert( + {prefix_to_insert, RestartIndex(restart_index, num_blocks)}); return result.second; } diff --git a/table/block_hash_index.h b/table/block_hash_index.h index 0ff65b418..d5603d366 100644 --- a/table/block_hash_index.h +++ b/table/block_hash_index.h @@ -7,6 +7,7 @@ #include #include +#include "rocksdb/status.h" #include "util/arena.h" #include "util/murmurhash.h" @@ -35,8 +36,12 @@ class BlockHashIndex { uint32_t num_blocks = 1; }; - explicit BlockHashIndex(const SliceTransform* hash_key_extractor) - : hash_key_extractor_(hash_key_extractor) {} + // @params own_prefixes indicate if we should take care the memory space for + // the `key_prefix` + // passed by Add() + explicit BlockHashIndex(const SliceTransform* hash_key_extractor, + bool own_prefixes) + : hash_key_extractor_(hash_key_extractor), kOwnPrefixes(own_prefixes) {} // Maps a key to its restart first_index. // Returns nullptr if the restart first_index is found @@ -52,9 +57,18 @@ class BlockHashIndex { private: const SliceTransform* hash_key_extractor_; std::unordered_map restart_indices_; + Arena arena_; + bool kOwnPrefixes; }; +// Create hash index by reading from the metadata blocks. +// @params prefixes: a sequence of prefixes. +// @params prefix_meta: contains the "metadata" to of the prefixes. +Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor, + const Slice& prefixes, const Slice& prefix_meta, + BlockHashIndex** hash_index); + // Create hash index by scanning the entries in index as well as the whole // dataset. // @params index_iter: an iterator with the pointer to the first entry in a @@ -64,9 +78,8 @@ class BlockHashIndex { // @params num_restarts: used for correctness verification. // @params hash_key_extractor: extract the hashable part of a given key. // On error, nullptr will be returned. -BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter, - const uint32_t num_restarts, - const Comparator* comparator, - const SliceTransform* hash_key_extractor); +BlockHashIndex* CreateBlockHashIndexOnTheFly( + Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts, + const Comparator* comparator, const SliceTransform* hash_key_extractor); } // namespace rocksdb diff --git a/table/block_hash_index_test.cc b/table/block_hash_index_test.cc index f4c0ac4a4..6f7bcb2b7 100644 --- a/table/block_hash_index_test.cc +++ b/table/block_hash_index_test.cc @@ -81,9 +81,9 @@ TEST(BlockTest, BasicTest) { MapIterator index_iter(index_entries); auto prefix_extractor = NewFixedPrefixTransform(prefix_size); - std::unique_ptr block_hash_index( - CreateBlockHashIndex(&index_iter, &data_iter, index_entries.size(), - BytewiseComparator(), prefix_extractor)); + std::unique_ptr block_hash_index(CreateBlockHashIndexOnTheFly( + &index_iter, &data_iter, index_entries.size(), BytewiseComparator(), + prefix_extractor)); std::map expected = { {"01xx", BlockHashIndex::RestartIndex(0, 1)}, diff --git a/table/block_test.cc b/table/block_test.cc index fdba8e99b..8ef4a5a8d 100644 --- a/table/block_test.cc +++ b/table/block_test.cc @@ -163,9 +163,9 @@ void CheckBlockContents(BlockContents contents, const int max_key, { auto iter1 = reader1.NewIterator(nullptr); auto iter2 = reader1.NewIterator(nullptr); - reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(), - BytewiseComparator(), - prefix_extractor.get())); + reader1.SetBlockHashIndex(CreateBlockHashIndexOnTheFly( + iter1, iter2, keys.size(), BytewiseComparator(), + prefix_extractor.get())); delete iter1; delete iter2; diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 4d20b5975..7443eb731 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -254,11 +254,23 @@ Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, properties); } else { s = Status::Corruption("Unable to read the property block."); - Log(WARN_LEVEL, info_log, - "Cannot find Properties block from file."); + Log(WARN_LEVEL, info_log, "Cannot find Properties block from file."); } return s; } +Status FindMetaBlock(Iterator* meta_index_iter, + const std::string& meta_block_name, + BlockHandle* block_handle) { + meta_index_iter->Seek(meta_block_name); + if (meta_index_iter->status().ok() && meta_index_iter->Valid() && + meta_index_iter->key() == meta_block_name) { + Slice v = meta_index_iter->value(); + return block_handle->DecodeFrom(&v); + } else { + return Status::Corruption("Cannot find the meta block", meta_block_name); + } +} + } // namespace rocksdb diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 6cd20adec..6cfc0babd 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -123,4 +123,9 @@ Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, // set to true. extern Status SeekToPropertiesBlock(Iterator* meta_iter, bool* is_found); +// Find the meta block from the meta index block. +Status FindMetaBlock(Iterator* meta_index_iter, + const std::string& meta_block_name, + BlockHandle* block_handle); + } // namespace rocksdb diff --git a/third-party/rapidjson/document.h b/third-party/rapidjson/document.h new file mode 100644 index 000000000..83d95a33d --- /dev/null +++ b/third-party/rapidjson/document.h @@ -0,0 +1,821 @@ +#ifndef RAPIDJSON_DOCUMENT_H_ +#define RAPIDJSON_DOCUMENT_H_ + +#include "reader.h" +#include "internal/strfunc.h" +#include // placement new + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// GenericValue + +//! Represents a JSON value. Use Value for UTF8 encoding and default allocator. +/*! + A JSON value can be one of 7 types. This class is a variant type supporting + these types. + + Use the Value if UTF8 and default allocator + + \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document) + \tparam Allocator Allocator type for allocating memory of object, array and string. +*/ +#pragma pack (push, 4) +template > +class GenericValue { +public: + //! Name-value pair in an object. + struct Member { + GenericValue name; //!< name of member (must be a string) + GenericValue value; //!< value of member. + }; + + typedef Encoding EncodingType; //!< Encoding type from template parameter. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef Member* MemberIterator; //!< Member iterator for iterating in object. + typedef const Member* ConstMemberIterator; //!< Constant member iterator for iterating in object. + typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array. + typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array. + + //!@name Constructors and destructor. + //@{ + + //! Default constructor creates a null value. + GenericValue() : flags_(kNullFlag) {} + + //! Copy constructor is not permitted. +private: + GenericValue(const GenericValue& rhs); + +public: + + //! Constructor with JSON value type. + /*! This creates a Value of specified type with default content. + \param type Type of the value. + \note Default content for number is zero. + */ + GenericValue(Type type) { + static const unsigned defaultFlags[7] = { + kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kConstStringFlag, + kNumberFlag | kIntFlag | kUintFlag | kInt64Flag | kUint64Flag | kDoubleFlag + }; + RAPIDJSON_ASSERT(type <= kNumberType); + flags_ = defaultFlags[type]; + memset(&data_, 0, sizeof(data_)); + } + + //! Constructor for boolean value. + GenericValue(bool b) : flags_(b ? kTrueFlag : kFalseFlag) {} + + //! Constructor for int value. + GenericValue(int i) : flags_(kNumberIntFlag) { + data_.n.i64 = i; + if (i >= 0) + flags_ |= kUintFlag | kUint64Flag; + } + + //! Constructor for unsigned value. + GenericValue(unsigned u) : flags_(kNumberUintFlag) { + data_.n.u64 = u; + if (!(u & 0x80000000)) + flags_ |= kIntFlag | kInt64Flag; + } + + //! Constructor for int64_t value. + GenericValue(int64_t i64) : flags_(kNumberInt64Flag) { + data_.n.i64 = i64; + if (i64 >= 0) { + flags_ |= kNumberUint64Flag; + if (!(i64 & 0xFFFFFFFF00000000LL)) + flags_ |= kUintFlag; + if (!(i64 & 0xFFFFFFFF80000000LL)) + flags_ |= kIntFlag; + } + else if (i64 >= -2147483648LL) + flags_ |= kIntFlag; + } + + //! Constructor for uint64_t value. + GenericValue(uint64_t u64) : flags_(kNumberUint64Flag) { + data_.n.u64 = u64; + if (!(u64 & 0x8000000000000000ULL)) + flags_ |= kInt64Flag; + if (!(u64 & 0xFFFFFFFF00000000ULL)) + flags_ |= kUintFlag; + if (!(u64 & 0xFFFFFFFF80000000ULL)) + flags_ |= kIntFlag; + } + + //! Constructor for double value. + GenericValue(double d) : flags_(kNumberDoubleFlag) { data_.n.d = d; } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s, SizeType length) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kConstStringFlag; + data_.s.str = s; + data_.s.length = length; + } + + //! Constructor for constant string (i.e. do not make a copy of string) + GenericValue(const Ch* s) { SetStringRaw(s, internal::StrLen(s)); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch* s, SizeType length, Allocator& allocator) { SetStringRaw(s, length, allocator); } + + //! Constructor for copy-string (i.e. do make a copy of string) + GenericValue(const Ch*s, Allocator& allocator) { SetStringRaw(s, internal::StrLen(s), allocator); } + + //! Destructor. + /*! Need to destruct elements of array, members of object, or copy-string. + */ + ~GenericValue() { + if (Allocator::kNeedFree) { // Shortcut by Allocator's trait + switch(flags_) { + case kArrayFlag: + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + v->~GenericValue(); + Allocator::Free(data_.a.elements); + break; + + case kObjectFlag: + for (Member* m = data_.o.members; m != data_.o.members + data_.o.size; ++m) { + m->name.~GenericValue(); + m->value.~GenericValue(); + } + Allocator::Free(data_.o.members); + break; + + case kCopyStringFlag: + Allocator::Free(const_cast(data_.s.str)); + break; + } + } + } + + //@} + + //!@name Assignment operators + //@{ + + //! Assignment with move semantics. + /*! \param rhs Source of the assignment. It will become a null value after assignment. + */ + GenericValue& operator=(GenericValue& rhs) { + RAPIDJSON_ASSERT(this != &rhs); + this->~GenericValue(); + memcpy(this, &rhs, sizeof(GenericValue)); + rhs.flags_ = kNullFlag; + return *this; + } + + //! Assignment with primitive types. + /*! \tparam T Either Type, int, unsigned, int64_t, uint64_t, const Ch* + \param value The value to be assigned. + */ + template + GenericValue& operator=(T value) { + this->~GenericValue(); + new (this) GenericValue(value); + return *this; + } + //@} + + //!@name Type + //@{ + + Type GetType() const { return static_cast(flags_ & kTypeMask); } + bool IsNull() const { return flags_ == kNullFlag; } + bool IsFalse() const { return flags_ == kFalseFlag; } + bool IsTrue() const { return flags_ == kTrueFlag; } + bool IsBool() const { return (flags_ & kBoolFlag) != 0; } + bool IsObject() const { return flags_ == kObjectFlag; } + bool IsArray() const { return flags_ == kArrayFlag; } + bool IsNumber() const { return (flags_ & kNumberFlag) != 0; } + bool IsInt() const { return (flags_ & kIntFlag) != 0; } + bool IsUint() const { return (flags_ & kUintFlag) != 0; } + bool IsInt64() const { return (flags_ & kInt64Flag) != 0; } + bool IsUint64() const { return (flags_ & kUint64Flag) != 0; } + bool IsDouble() const { return (flags_ & kDoubleFlag) != 0; } + bool IsString() const { return (flags_ & kStringFlag) != 0; } + + //@} + + //!@name Null + //@{ + + GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; } + + //@} + + //!@name Bool + //@{ + + bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return flags_ == kTrueFlag; } + GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; } + + //@} + + //!@name Object + //@{ + + //! Set this value as an empty object. + GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; } + + //! Get the value associated with the object's name. + GenericValue& operator[](const Ch* name) { + if (Member* member = FindMember(name)) + return member->value; + else { + static GenericValue NullValue; + return NullValue; + } + } + const GenericValue& operator[](const Ch* name) const { return const_cast(*this)[name]; } + + //! Member iterators. + ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.members; } + ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.members + data_.o.size; } + MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return data_.o.members; } + MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return data_.o.members + data_.o.size; } + + //! Check whether a member exists in the object. + bool HasMember(const Ch* name) const { return FindMember(name) != 0; } + + //! Add a member (name-value pair) to the object. + /*! \param name A string value as name of member. + \param value Value of any type. + \param allocator Allocator for reallocating memory. + \return The value itself for fluent API. + \note The ownership of name and value will be transfered to this object if success. + */ + GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsObject()); + RAPIDJSON_ASSERT(name.IsString()); + Object& o = data_.o; + if (o.size >= o.capacity) { + if (o.capacity == 0) { + o.capacity = kDefaultObjectCapacity; + o.members = (Member*)allocator.Malloc(o.capacity * sizeof(Member)); + } + else { + SizeType oldCapacity = o.capacity; + o.capacity *= 2; + o.members = (Member*)allocator.Realloc(o.members, oldCapacity * sizeof(Member), o.capacity * sizeof(Member)); + } + } + o.members[o.size].name.RawAssign(name); + o.members[o.size].value.RawAssign(value); + o.size++; + return *this; + } + + GenericValue& AddMember(const Ch* name, Allocator& nameAllocator, GenericValue& value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name), nameAllocator); + return AddMember(n, value, allocator); + } + + GenericValue& AddMember(const Ch* name, GenericValue& value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name)); + return AddMember(n, value, allocator); + } + + template + GenericValue& AddMember(const Ch* name, T value, Allocator& allocator) { + GenericValue n(name, internal::StrLen(name)); + GenericValue v(value); + return AddMember(n, v, allocator); + } + + //! Remove a member in object by its name. + /*! \param name Name of member to be removed. + \return Whether the member existed. + \note Removing member is implemented by moving the last member. So the ordering of members is changed. + */ + bool RemoveMember(const Ch* name) { + RAPIDJSON_ASSERT(IsObject()); + if (Member* m = FindMember(name)) { + RAPIDJSON_ASSERT(data_.o.size > 0); + RAPIDJSON_ASSERT(data_.o.members != 0); + + Member* last = data_.o.members + (data_.o.size - 1); + if (data_.o.size > 1 && m != last) { + // Move the last one to this place + m->name = last->name; + m->value = last->value; + } + else { + // Only one left, just destroy + m->name.~GenericValue(); + m->value.~GenericValue(); + } + --data_.o.size; + return true; + } + return false; + } + + //@} + + //!@name Array + //@{ + + //! Set this value as an empty array. + GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; } + + //! Get the number of elements in array. + SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; } + + //! Get the capacity of array. + SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; } + + //! Check whether the array is empty. + bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; } + + //! Remove all elements in the array. + /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged. + */ + void Clear() { + RAPIDJSON_ASSERT(IsArray()); + for (SizeType i = 0; i < data_.a.size; ++i) + data_.a.elements[i].~GenericValue(); + data_.a.size = 0; + } + + //! Get an element from array by index. + /*! \param index Zero-based index of element. + \note +\code +Value a(kArrayType); +a.PushBack(123); +int x = a[0].GetInt(); // Error: operator[ is ambiguous, as 0 also mean a null pointer of const char* type. +int y = a[SizeType(0)].GetInt(); // Cast to SizeType will work. +int z = a[0u].GetInt(); // This works too. +\endcode + */ + GenericValue& operator[](SizeType index) { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(index < data_.a.size); + return data_.a.elements[index]; + } + const GenericValue& operator[](SizeType index) const { return const_cast(*this)[index]; } + + //! Element iterator + ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements; } + ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return data_.a.elements + data_.a.size; } + ConstValueIterator Begin() const { return const_cast(*this).Begin(); } + ConstValueIterator End() const { return const_cast(*this).End(); } + + //! Request the array to have enough capacity to store elements. + /*! \param newCapacity The capacity that the array at least need to have. + \param allocator The allocator for allocating memory. It must be the same one use previously. + \return The value itself for fluent API. + */ + GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (newCapacity > data_.a.capacity) { + data_.a.elements = (GenericValue*)allocator.Realloc(data_.a.elements, data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue)); + data_.a.capacity = newCapacity; + } + return *this; + } + + //! Append a value at the end of the array. + /*! \param value The value to be appended. + \param allocator The allocator for allocating memory. It must be the same one use previously. + \return The value itself for fluent API. + \note The ownership of the value will be transfered to this object if success. + \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient. + */ + GenericValue& PushBack(GenericValue& value, Allocator& allocator) { + RAPIDJSON_ASSERT(IsArray()); + if (data_.a.size >= data_.a.capacity) + Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : data_.a.capacity * 2, allocator); + data_.a.elements[data_.a.size++].RawAssign(value); + return *this; + } + + template + GenericValue& PushBack(T value, Allocator& allocator) { + GenericValue v(value); + return PushBack(v, allocator); + } + + //! Remove the last element in the array. + GenericValue& PopBack() { + RAPIDJSON_ASSERT(IsArray()); + RAPIDJSON_ASSERT(!Empty()); + data_.a.elements[--data_.a.size].~GenericValue(); + return *this; + } + //@} + + //!@name Number + //@{ + + int GetInt() const { RAPIDJSON_ASSERT(flags_ & kIntFlag); return data_.n.i.i; } + unsigned GetUint() const { RAPIDJSON_ASSERT(flags_ & kUintFlag); return data_.n.u.u; } + int64_t GetInt64() const { RAPIDJSON_ASSERT(flags_ & kInt64Flag); return data_.n.i64; } + uint64_t GetUint64() const { RAPIDJSON_ASSERT(flags_ & kUint64Flag); return data_.n.u64; } + + double GetDouble() const { + RAPIDJSON_ASSERT(IsNumber()); + if ((flags_ & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion. + if ((flags_ & kIntFlag) != 0) return data_.n.i.i; // int -> double + if ((flags_ & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double + if ((flags_ & kInt64Flag) != 0) return (double)data_.n.i64; // int64_t -> double (may lose precision) + RAPIDJSON_ASSERT((flags_ & kUint64Flag) != 0); return (double)data_.n.u64; // uint64_t -> double (may lose precision) + } + + GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; } + GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; } + GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; } + GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; } + GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; } + + //@} + + //!@name String + //@{ + + const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return data_.s.str; } + + //! Get the length of string. + /*! Since rapidjson permits "\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength(). + */ + SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return data_.s.length; } + + //! Set this value as a string without copying source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string pointer. + \param length The length of source string, excluding the trailing null terminator. + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, SizeType length) { this->~GenericValue(); SetStringRaw(s, length); return *this; } + + //! Set this value as a string without copying source string. + /*! \param s source string pointer. + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s) { return SetString(s, internal::StrLen(s)); } + + //! Set this value as a string by copying from source string. + /*! This version has better performance with supplied length, and also support string containing null character. + \param s source string. + \param length The length of source string, excluding the trailing null terminator. + \param allocator Allocator for allocating copied buffer. Commonly use document.GetAllocator(). + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, length, allocator); return *this; } + + //! Set this value as a string by copying from source string. + /*! \param s source string. + \param allocator Allocator for allocating copied buffer. Commonly use document.GetAllocator(). + \return The value itself for fluent API. + */ + GenericValue& SetString(const Ch* s, Allocator& allocator) { SetString(s, internal::StrLen(s), allocator); return *this; } + + //@} + + //! Generate events of this value to a Handler. + /*! This function adopts the GoF visitor pattern. + Typical usage is to output this JSON value as JSON text via Writer, which is a Handler. + It can also be used to deep clone this value via GenericDocument, which is also a Handler. + \tparam Handler type of handler. + \param handler An object implementing concept Handler. + */ + template + const GenericValue& Accept(Handler& handler) const { + switch(GetType()) { + case kNullType: handler.Null(); break; + case kFalseType: handler.Bool(false); break; + case kTrueType: handler.Bool(true); break; + + case kObjectType: + handler.StartObject(); + for (Member* m = data_.o.members; m != data_.o.members + data_.o.size; ++m) { + handler.String(m->name.data_.s.str, m->name.data_.s.length, false); + m->value.Accept(handler); + } + handler.EndObject(data_.o.size); + break; + + case kArrayType: + handler.StartArray(); + for (GenericValue* v = data_.a.elements; v != data_.a.elements + data_.a.size; ++v) + v->Accept(handler); + handler.EndArray(data_.a.size); + break; + + case kStringType: + handler.String(data_.s.str, data_.s.length, false); + break; + + case kNumberType: + if (IsInt()) handler.Int(data_.n.i.i); + else if (IsUint()) handler.Uint(data_.n.u.u); + else if (IsInt64()) handler.Int64(data_.n.i64); + else if (IsUint64()) handler.Uint64(data_.n.u64); + else handler.Double(data_.n.d); + break; + } + return *this; + } + +private: + template + friend class GenericDocument; + + enum { + kBoolFlag = 0x100, + kNumberFlag = 0x200, + kIntFlag = 0x400, + kUintFlag = 0x800, + kInt64Flag = 0x1000, + kUint64Flag = 0x2000, + kDoubleFlag = 0x4000, + kStringFlag = 0x100000, + kCopyFlag = 0x200000, + + // Initial flags of different types. + kNullFlag = kNullType, + kTrueFlag = kTrueType | kBoolFlag, + kFalseFlag = kFalseType | kBoolFlag, + kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag, + kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag, + kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag, + kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag, + kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag, + kConstStringFlag = kStringType | kStringFlag, + kCopyStringFlag = kStringType | kStringFlag | kCopyFlag, + kObjectFlag = kObjectType, + kArrayFlag = kArrayType, + + kTypeMask = 0xFF // bitwise-and with mask of 0xFF can be optimized by compiler + }; + + static const SizeType kDefaultArrayCapacity = 16; + static const SizeType kDefaultObjectCapacity = 16; + + struct String { + const Ch* str; + SizeType length; + unsigned hashcode; //!< reserved + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + // By using proper binary layout, retrieval of different integer types do not need conversions. + union Number { +#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN + struct I { + int i; + char padding[4]; + }i; + struct U { + unsigned u; + char padding2[4]; + }u; +#else + struct I { + char padding[4]; + int i; + }i; + struct U { + char padding2[4]; + unsigned u; + }u; +#endif + int64_t i64; + uint64_t u64; + double d; + }; // 8 bytes + + struct Object { + Member* members; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + struct Array { + GenericValue* elements; + SizeType size; + SizeType capacity; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + union Data { + String s; + Number n; + Object o; + Array a; + }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode + + //! Find member by name. + Member* FindMember(const Ch* name) { + RAPIDJSON_ASSERT(name); + RAPIDJSON_ASSERT(IsObject()); + + SizeType length = internal::StrLen(name); + + Object& o = data_.o; + for (Member* member = o.members; member != data_.o.members + data_.o.size; ++member) + if (length == member->name.data_.s.length && memcmp(member->name.data_.s.str, name, length * sizeof(Ch)) == 0) + return member; + + return 0; + } + const Member* FindMember(const Ch* name) const { return const_cast(*this).FindMember(name); } + + // Initialize this value as array with initial data, without calling destructor. + void SetArrayRaw(GenericValue* values, SizeType count, Allocator& alloctaor) { + flags_ = kArrayFlag; + data_.a.elements = (GenericValue*)alloctaor.Malloc(count * sizeof(GenericValue)); + memcpy(data_.a.elements, values, count * sizeof(GenericValue)); + data_.a.size = data_.a.capacity = count; + } + + //! Initialize this value as object with initial data, without calling destructor. + void SetObjectRaw(Member* members, SizeType count, Allocator& alloctaor) { + flags_ = kObjectFlag; + data_.o.members = (Member*)alloctaor.Malloc(count * sizeof(Member)); + memcpy(data_.o.members, members, count * sizeof(Member)); + data_.o.size = data_.o.capacity = count; + } + + //! Initialize this value as constant string, without calling destructor. + void SetStringRaw(const Ch* s, SizeType length) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kConstStringFlag; + data_.s.str = s; + data_.s.length = length; + } + + //! Initialize this value as copy string with initial data, without calling destructor. + void SetStringRaw(const Ch* s, SizeType length, Allocator& allocator) { + RAPIDJSON_ASSERT(s != NULL); + flags_ = kCopyStringFlag; + data_.s.str = (Ch *)allocator.Malloc((length + 1) * sizeof(Ch)); + data_.s.length = length; + memcpy(const_cast(data_.s.str), s, length * sizeof(Ch)); + const_cast(data_.s.str)[length] = '\0'; + } + + //! Assignment without calling destructor + void RawAssign(GenericValue& rhs) { + memcpy(this, &rhs, sizeof(GenericValue)); + rhs.flags_ = kNullFlag; + } + + Data data_; + unsigned flags_; +}; +#pragma pack (pop) + +//! Value with UTF8 encoding. +typedef GenericValue > Value; + +/////////////////////////////////////////////////////////////////////////////// +// GenericDocument + +//! A document for parsing JSON text as DOM. +/*! + \implements Handler + \tparam Encoding encoding for both parsing and string storage. + \tparam Alloactor allocator for allocating memory for the DOM, and the stack during parsing. +*/ +template > +class GenericDocument : public GenericValue { +public: + typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding. + typedef GenericValue ValueType; //!< Value type of the document. + typedef Allocator AllocatorType; //!< Allocator type from template parameter. + + //! Constructor + /*! \param allocator Optional allocator for allocating stack memory. + \param stackCapacity Initial capacity of stack in bytes. + */ + GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseError_(0), errorOffset_(0) {} + + //! Parse JSON text from an input stream. + /*! \tparam parseFlags Combination of ParseFlag. + \param stream Input stream to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseStream(Stream& stream) { + ValueType::SetNull(); // Remove existing root if exist + GenericReader reader; + if (reader.template Parse(stream, *this)) { + RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object + this->RawAssign(*stack_.template Pop(1)); // Add this-> to prevent issue 13. + parseError_ = 0; + errorOffset_ = 0; + } + else { + parseError_ = reader.GetParseError(); + errorOffset_ = reader.GetErrorOffset(); + ClearStack(); + } + return *this; + } + + //! Parse JSON text from a mutable string. + /*! \tparam parseFlags Combination of ParseFlag. + \param str Mutable zero-terminated string to be parsed. + \return The document itself for fluent API. + */ + template + GenericDocument& ParseInsitu(Ch* str) { + GenericInsituStringStream s(str); + return ParseStream(s); + } + + //! Parse JSON text from a read-only string. + /*! \tparam parseFlags Combination of ParseFlag (must not contain kParseInsituFlag). + \param str Read-only zero-terminated string to be parsed. + */ + template + GenericDocument& Parse(const Ch* str) { + RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); + GenericStringStream s(str); + return ParseStream(s); + } + + //! Whether a parse error was occured in the last parsing. + bool HasParseError() const { return parseError_ != 0; } + + //! Get the message of parsing error. + const char* GetParseError() const { return parseError_; } + + //! Get the offset in character of the parsing error. + size_t GetErrorOffset() const { return errorOffset_; } + + //! Get the allocator of this document. + Allocator& GetAllocator() { return stack_.GetAllocator(); } + + //! Get the capacity of stack in bytes. + size_t GetStackCapacity() const { return stack_.GetCapacity(); } + +private: + // Prohibit assignment + GenericDocument& operator=(const GenericDocument&); + + friend class GenericReader; // for Reader to call the following private handler functions + + // Implementation of Handler + void Null() { new (stack_.template Push()) ValueType(); } + void Bool(bool b) { new (stack_.template Push()) ValueType(b); } + void Int(int i) { new (stack_.template Push()) ValueType(i); } + void Uint(unsigned i) { new (stack_.template Push()) ValueType(i); } + void Int64(int64_t i) { new (stack_.template Push()) ValueType(i); } + void Uint64(uint64_t i) { new (stack_.template Push()) ValueType(i); } + void Double(double d) { new (stack_.template Push()) ValueType(d); } + + void String(const Ch* str, SizeType length, bool copy) { + if (copy) + new (stack_.template Push()) ValueType(str, length, GetAllocator()); + else + new (stack_.template Push()) ValueType(str, length); + } + + void StartObject() { new (stack_.template Push()) ValueType(kObjectType); } + + void EndObject(SizeType memberCount) { + typename ValueType::Member* members = stack_.template Pop(memberCount); + stack_.template Top()->SetObjectRaw(members, (SizeType)memberCount, GetAllocator()); + } + + void StartArray() { new (stack_.template Push()) ValueType(kArrayType); } + + void EndArray(SizeType elementCount) { + ValueType* elements = stack_.template Pop(elementCount); + stack_.template Top()->SetArrayRaw(elements, elementCount, GetAllocator()); + } + + void ClearStack() { + if (Allocator::kNeedFree) + while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects) + (stack_.template Pop(1))->~ValueType(); + else + stack_.Clear(); + } + + static const size_t kDefaultStackCapacity = 1024; + internal::Stack stack_; + const char* parseError_; + size_t errorOffset_; +}; + +typedef GenericDocument > Document; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_DOCUMENT_H_ diff --git a/third-party/rapidjson/filestream.h b/third-party/rapidjson/filestream.h new file mode 100644 index 000000000..885894963 --- /dev/null +++ b/third-party/rapidjson/filestream.h @@ -0,0 +1,46 @@ +#ifndef RAPIDJSON_FILESTREAM_H_ +#define RAPIDJSON_FILESTREAM_H_ + +#include + +namespace rapidjson { + +//! Wrapper of C file stream for input or output. +/*! + This simple wrapper does not check the validity of the stream. + \implements Stream +*/ +class FileStream { +public: + typedef char Ch; //!< Character type. Only support char. + + FileStream(FILE* fp) : fp_(fp), count_(0) { Read(); } + char Peek() const { return current_; } + char Take() { char c = current_; Read(); return c; } + size_t Tell() const { return count_; } + void Put(char c) { fputc(c, fp_); } + + // Not implemented + char* PutBegin() { return 0; } + size_t PutEnd(char*) { return 0; } + +private: + void Read() { + RAPIDJSON_ASSERT(fp_ != 0); + int c = fgetc(fp_); + if (c != EOF) { + current_ = (char)c; + count_++; + } + else + current_ = '\0'; + } + + FILE* fp_; + char current_; + size_t count_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/third-party/rapidjson/internal/pow10.h b/third-party/rapidjson/internal/pow10.h new file mode 100644 index 000000000..bf3a9afb0 --- /dev/null +++ b/third-party/rapidjson/internal/pow10.h @@ -0,0 +1,54 @@ +#ifndef RAPIDJSON_POW10_ +#define RAPIDJSON_POW10_ + +namespace rapidjson { +namespace internal { + +//! Computes integer powers of 10 in double (10.0^n). +/*! This function uses lookup table for fast and accurate results. + \param n positive/negative exponent. Must <= 308. + \return 10.0^n +*/ +inline double Pow10(int n) { + static const double e[] = { // 1e-308...1e308: 617 * 8 bytes = 4936 bytes + 1e-308,1e-307,1e-306,1e-305,1e-304,1e-303,1e-302,1e-301,1e-300, + 1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,1e-283,1e-282,1e-281,1e-280, + 1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,1e-263,1e-262,1e-261,1e-260, + 1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,1e-243,1e-242,1e-241,1e-240, + 1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,1e-223,1e-222,1e-221,1e-220, + 1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,1e-203,1e-202,1e-201,1e-200, + 1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,1e-183,1e-182,1e-181,1e-180, + 1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,1e-163,1e-162,1e-161,1e-160, + 1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,1e-143,1e-142,1e-141,1e-140, + 1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,1e-123,1e-122,1e-121,1e-120, + 1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,1e-103,1e-102,1e-101,1e-100, + 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, + 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, + 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, + 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, + 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+0, + 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, + 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, + 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, + 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, + 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120, + 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140, + 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160, + 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180, + 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200, + 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220, + 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240, + 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260, + 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280, + 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, + 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 + }; + RAPIDJSON_ASSERT(n <= 308); + return n < -308 ? 0.0 : e[n + 308]; +} + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_POW10_ diff --git a/third-party/rapidjson/internal/stack.h b/third-party/rapidjson/internal/stack.h new file mode 100644 index 000000000..966893b3f --- /dev/null +++ b/third-party/rapidjson/internal/stack.h @@ -0,0 +1,82 @@ +#ifndef RAPIDJSON_INTERNAL_STACK_H_ +#define RAPIDJSON_INTERNAL_STACK_H_ + +namespace rapidjson { +namespace internal { + +/////////////////////////////////////////////////////////////////////////////// +// Stack + +//! A type-unsafe stack for storing different types of data. +/*! \tparam Allocator Allocator for allocating stack memory. +*/ +template +class Stack { +public: + Stack(Allocator* allocator, size_t stack_capacity) : allocator_(allocator), own_allocator_(0), stack_(0), stack_top_(0), stack_end_(0), stack_capacity_(stack_capacity) { + RAPIDJSON_ASSERT(stack_capacity_ > 0); + if (!allocator_) + own_allocator_ = allocator_ = new Allocator(); + stack_top_ = stack_ = (char*)allocator_->Malloc(stack_capacity_); + stack_end_ = stack_ + stack_capacity_; + } + + ~Stack() { + Allocator::Free(stack_); + delete own_allocator_; // Only delete if it is owned by the stack + } + + void Clear() { /*stack_top_ = 0;*/ stack_top_ = stack_; } + + template + T* Push(size_t count = 1) { + // Expand the stack if needed + if (stack_top_ + sizeof(T) * count >= stack_end_) { + size_t new_capacity = stack_capacity_ * 2; + size_t size = GetSize(); + size_t new_size = GetSize() + sizeof(T) * count; + if (new_capacity < new_size) + new_capacity = new_size; + stack_ = (char*)allocator_->Realloc(stack_, stack_capacity_, new_capacity); + stack_capacity_ = new_capacity; + stack_top_ = stack_ + size; + stack_end_ = stack_ + stack_capacity_; + } + T* ret = (T*)stack_top_; + stack_top_ += sizeof(T) * count; + return ret; + } + + template + T* Pop(size_t count) { + RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T)); + stack_top_ -= count * sizeof(T); + return (T*)stack_top_; + } + + template + T* Top() { + RAPIDJSON_ASSERT(GetSize() >= sizeof(T)); + return (T*)(stack_top_ - sizeof(T)); + } + + template + T* Bottom() { return (T*)stack_; } + + Allocator& GetAllocator() { return *allocator_; } + size_t GetSize() const { return stack_top_ - stack_; } + size_t GetCapacity() const { return stack_capacity_; } + +private: + Allocator* allocator_; + Allocator* own_allocator_; + char *stack_; + char *stack_top_; + char *stack_end_; + size_t stack_capacity_; +}; + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_STACK_H_ diff --git a/third-party/rapidjson/internal/strfunc.h b/third-party/rapidjson/internal/strfunc.h new file mode 100644 index 000000000..bbf444fe6 --- /dev/null +++ b/third-party/rapidjson/internal/strfunc.h @@ -0,0 +1,24 @@ +#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_ +#define RAPIDJSON_INTERNAL_STRFUNC_H_ + +namespace rapidjson { +namespace internal { + +//! Custom strlen() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s Null-terminated input string. + \return Number of characters in the string. + \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints. +*/ +template +inline SizeType StrLen(const Ch* s) { + const Ch* p = s; + while (*p != '\0') + ++p; + return SizeType(p - s); +} + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_INTERNAL_STRFUNC_H_ diff --git a/third-party/rapidjson/license.txt b/third-party/rapidjson/license.txt new file mode 100644 index 000000000..03d97d163 --- /dev/null +++ b/third-party/rapidjson/license.txt @@ -0,0 +1,19 @@ +Copyright (C) 2011 Milo Yip + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/third-party/rapidjson/prettywriter.h b/third-party/rapidjson/prettywriter.h new file mode 100644 index 000000000..238ff5ff6 --- /dev/null +++ b/third-party/rapidjson/prettywriter.h @@ -0,0 +1,156 @@ +#ifndef RAPIDJSON_PRETTYWRITER_H_ +#define RAPIDJSON_PRETTYWRITER_H_ + +#include "writer.h" + +namespace rapidjson { + +//! Writer with indentation and spacing. +/*! + \tparam Stream Type of ouptut stream. + \tparam Encoding Encoding of both source strings and output. + \tparam Allocator Type of allocator for allocating memory of stack. +*/ +template, typename Allocator = MemoryPoolAllocator<> > +class PrettyWriter : public Writer { +public: + typedef Writer Base; + typedef typename Base::Ch Ch; + + //! Constructor + /*! \param stream Output stream. + \param allocator User supplied allocator. If it is null, it will create a private one. + \param levelDepth Initial capacity of + */ + PrettyWriter(Stream& stream, Allocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : + Base(stream, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {} + + //! Set custom indentation. + /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\t', '\n', '\r'). + \param indentCharCount Number of indent characters for each indentation level. + \note The default indentation is 4 spaces. + */ + PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) { + RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r'); + indentChar_ = indentChar; + indentCharCount_ = indentCharCount; + return *this; + } + + //@name Implementation of Handler. + //@{ + + PrettyWriter& Null() { PrettyPrefix(kNullType); Base::WriteNull(); return *this; } + PrettyWriter& Bool(bool b) { PrettyPrefix(b ? kTrueType : kFalseType); Base::WriteBool(b); return *this; } + PrettyWriter& Int(int i) { PrettyPrefix(kNumberType); Base::WriteInt(i); return *this; } + PrettyWriter& Uint(unsigned u) { PrettyPrefix(kNumberType); Base::WriteUint(u); return *this; } + PrettyWriter& Int64(int64_t i64) { PrettyPrefix(kNumberType); Base::WriteInt64(i64); return *this; } + PrettyWriter& Uint64(uint64_t u64) { PrettyPrefix(kNumberType); Base::WriteUint64(u64); return *this; } + PrettyWriter& Double(double d) { PrettyPrefix(kNumberType); Base::WriteDouble(d); return *this; } + + PrettyWriter& String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + PrettyPrefix(kStringType); + Base::WriteString(str, length); + return *this; + } + + PrettyWriter& StartObject() { + PrettyPrefix(kObjectType); + new (Base::level_stack_.template Push()) typename Base::Level(false); + Base::WriteStartObject(); + return *this; + } + + PrettyWriter& EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(!Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + Base::stream_.Put('\n'); + WriteIndent(); + } + Base::WriteEndObject(); + return *this; + } + + PrettyWriter& StartArray() { + PrettyPrefix(kArrayType); + new (Base::level_stack_.template Push()) typename Base::Level(true); + Base::WriteStartArray(); + return *this; + } + + PrettyWriter& EndArray(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); + RAPIDJSON_ASSERT(Base::level_stack_.template Top()->inArray); + bool empty = Base::level_stack_.template Pop(1)->valueCount == 0; + + if (!empty) { + Base::stream_.Put('\n'); + WriteIndent(); + } + Base::WriteEndArray(); + return *this; + } + + //@} + + //! Simpler but slower overload. + PrettyWriter& String(const Ch* str) { return String(str, internal::StrLen(str)); } + +protected: + void PrettyPrefix(Type type) { + (void)type; + if (Base::level_stack_.GetSize() != 0) { // this value is not at root + typename Base::Level* level = Base::level_stack_.template Top(); + + if (level->inArray) { + if (level->valueCount > 0) { + Base::stream_.Put(','); // add comma if it is not the first element in array + Base::stream_.Put('\n'); + } + else + Base::stream_.Put('\n'); + WriteIndent(); + } + else { // in object + if (level->valueCount > 0) { + if (level->valueCount % 2 == 0) { + Base::stream_.Put(','); + Base::stream_.Put('\n'); + } + else { + Base::stream_.Put(':'); + Base::stream_.Put(' '); + } + } + else + Base::stream_.Put('\n'); + + if (level->valueCount % 2 == 0) + WriteIndent(); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else + RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + } + + void WriteIndent() { + size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_; + PutN(Base::stream_, indentChar_, count); + } + + Ch indentChar_; + unsigned indentCharCount_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/third-party/rapidjson/rapidjson.h b/third-party/rapidjson/rapidjson.h new file mode 100644 index 000000000..7acb2aa4f --- /dev/null +++ b/third-party/rapidjson/rapidjson.h @@ -0,0 +1,525 @@ +#ifndef RAPIDJSON_RAPIDJSON_H_ +#define RAPIDJSON_RAPIDJSON_H_ + +// Copyright (c) 2011-2012 Milo Yip (miloyip@gmail.com) +// Version 0.11 + +#include // malloc(), realloc(), free() +#include // memcpy() + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_INT64DEFINE + +// Here defines int64_t and uint64_t types in global namespace. +// If user have their own definition, can define RAPIDJSON_NO_INT64DEFINE to disable this. +#ifndef RAPIDJSON_NO_INT64DEFINE +#ifdef _MSC_VER +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif +#endif // RAPIDJSON_NO_INT64TYPEDEF + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ENDIAN +#define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine +#define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine + +//! Endianness of the machine. +/*! GCC provided macro for detecting endianness of the target machine. But other + compilers may not have this. User can define RAPIDJSON_ENDIAN to either + RAPIDJSON_LITTLEENDIAN or RAPIDJSON_BIGENDIAN. +*/ +#ifndef RAPIDJSON_ENDIAN +#ifdef __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN +#else +#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN +#endif // __BYTE_ORDER__ +#else +#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN // Assumes little endian otherwise. +#endif +#endif // RAPIDJSON_ENDIAN + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD + +// Enable SSE2 optimization. +//#define RAPIDJSON_SSE2 + +// Enable SSE4.2 optimization. +//#define RAPIDJSON_SSE42 + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) +#define RAPIDJSON_SIMD +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_NO_SIZETYPEDEFINE + +#ifndef RAPIDJSON_NO_SIZETYPEDEFINE +namespace rapidjson { +//! Use 32-bit array/string indices even for 64-bit platform, instead of using size_t. +/*! User may override the SizeType by defining RAPIDJSON_NO_SIZETYPEDEFINE. +*/ +typedef unsigned SizeType; +} // namespace rapidjson +#endif + +/////////////////////////////////////////////////////////////////////////////// +// RAPIDJSON_ASSERT + +//! Assertion. +/*! By default, rapidjson uses C assert() for assertion. + User can override it by defining RAPIDJSON_ASSERT(x) macro. +*/ +#ifndef RAPIDJSON_ASSERT +#include +#define RAPIDJSON_ASSERT(x) assert(x) +#endif // RAPIDJSON_ASSERT + +/////////////////////////////////////////////////////////////////////////////// +// Helpers + +#define RAPIDJSON_MULTILINEMACRO_BEGIN do { +#define RAPIDJSON_MULTILINEMACRO_END \ +} while((void)0, 0) + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// Allocator + +/*! \class rapidjson::Allocator + \brief Concept for allocating, resizing and freeing memory block. + + Note that Malloc() and Realloc() are non-static but Free() is static. + + So if an allocator need to support Free(), it needs to put its pointer in + the header of memory block. + +\code +concept Allocator { + static const bool kNeedFree; //!< Whether this allocator needs to call Free(). + + // Allocate a memory block. + // \param size of the memory block in bytes. + // \returns pointer to the memory block. + void* Malloc(size_t size); + + // Resize a memory block. + // \param originalPtr The pointer to current memory block. Null pointer is permitted. + // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) + // \param newSize the new size in bytes. + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); + + // Free a memory block. + // \param pointer to the memory block. Null pointer is permitted. + static void Free(void *ptr); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// CrtAllocator + +//! C-runtime library allocator. +/*! This class is just wrapper for standard C library memory routines. + \implements Allocator +*/ +class CrtAllocator { +public: + static const bool kNeedFree = true; + void* Malloc(size_t size) { return malloc(size); } + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { (void)originalSize; return realloc(originalPtr, newSize); } + static void Free(void *ptr) { free(ptr); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// MemoryPoolAllocator + +//! Default memory allocator used by the parser and DOM. +/*! This allocator allocate memory blocks from pre-allocated memory chunks. + + It does not free memory blocks. And Realloc() only allocate new memory. + + The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. + + User may also supply a buffer as the first chunk. + + If the user-buffer is full then additional chunks are allocated by BaseAllocator. + + The user-buffer is not deallocated by this allocator. + + \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. + \implements Allocator +*/ +template +class MemoryPoolAllocator { +public: + static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) + + //! Constructor with chunkSize. + /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + if (!baseAllocator_) + ownBaseAllocator_ = baseAllocator_ = new BaseAllocator(); + AddChunk(chunk_capacity_); + } + + //! Constructor with user-supplied buffer. + /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. + + The user buffer will not be deallocated when this allocator is destructed. + + \param buffer User supplied buffer. + \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). + \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. + \param baseAllocator The allocator for allocating memory chunks. + */ + MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : + chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) + { + RAPIDJSON_ASSERT(buffer != 0); + RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); + chunkHead_ = (ChunkHeader*)buffer; + chunkHead_->capacity = size - sizeof(ChunkHeader); + chunkHead_->size = 0; + chunkHead_->next = 0; + } + + //! Destructor. + /*! This deallocates all memory chunks, excluding the user-supplied buffer. + */ + ~MemoryPoolAllocator() { + Clear(); + delete ownBaseAllocator_; + } + + //! Deallocates all memory chunks, excluding the user-supplied buffer. + void Clear() { + while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) { + ChunkHeader* next = chunkHead_->next; + baseAllocator_->Free(chunkHead_); + chunkHead_ = next; + } + } + + //! Computes the total capacity of allocated memory chunks. + /*! \return total capacity in bytes. + */ + size_t Capacity() { + size_t capacity = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + capacity += c->capacity; + return capacity; + } + + //! Computes the memory blocks allocated. + /*! \return total used bytes. + */ + size_t Size() { + size_t size = 0; + for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) + size += c->size; + return size; + } + + //! Allocates a memory block. (concept Allocator) + void* Malloc(size_t size) { + size = (size + 3) & ~3; // Force aligning size to 4 + + if (chunkHead_->size + size > chunkHead_->capacity) + AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size); + + char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size; + RAPIDJSON_ASSERT(((uintptr_t)buffer & 3) == 0); // returned buffer is aligned to 4 + chunkHead_->size += size; + + return buffer; + } + + //! Resizes a memory block (concept Allocator) + void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { + if (originalPtr == 0) + return Malloc(newSize); + + // Do not shrink if new size is smaller than original + if (originalSize >= newSize) + return originalPtr; + + // Simply expand it if it is the last allocation and there is sufficient space + if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) { + size_t increment = newSize - originalSize; + increment = (increment + 3) & ~3; // Force aligning size to 4 + if (chunkHead_->size + increment <= chunkHead_->capacity) { + chunkHead_->size += increment; + RAPIDJSON_ASSERT(((uintptr_t)originalPtr & 3) == 0); // returned buffer is aligned to 4 + return originalPtr; + } + } + + // Realloc process: allocate and copy memory, do not free original buffer. + void* newBuffer = Malloc(newSize); + RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly. + return memcpy(newBuffer, originalPtr, originalSize); + } + + //! Frees a memory block (concept Allocator) + static void Free(void *) {} // Do nothing + +private: + //! Creates a new chunk. + /*! \param capacity Capacity of the chunk in bytes. + */ + void AddChunk(size_t capacity) { + ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity); + chunk->capacity = capacity; + chunk->size = 0; + chunk->next = chunkHead_; + chunkHead_ = chunk; + } + + static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. + + //! Chunk header for perpending to each chunk. + /*! Chunks are stored as a singly linked list. + */ + struct ChunkHeader { + size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). + size_t size; //!< Current size of allocated memory in bytes. + ChunkHeader *next; //!< Next chunk in the linked list. + }; + + ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. + size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. + char *userBuffer_; //!< User supplied buffer. + BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. + BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. +}; + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. + + //! \brief Encode a Unicode codepoint to a buffer. + //! \param buffer pointer to destination buffer to store the result. It should have sufficient size of encoding one character. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + //! \returns the pointer to the next character after the encoded data. + static Ch* Encode(Ch *buffer, unsigned codepoint); +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + \tparam CharType Type for storing 8-bit UTF-8 data. Default is char. + \implements Encoding +*/ +template +struct UTF8 { + typedef CharType Ch; + + static Ch* Encode(Ch *buffer, unsigned codepoint) { + if (codepoint <= 0x7F) + *buffer++ = codepoint & 0xFF; + else if (codepoint <= 0x7FF) { + *buffer++ = 0xC0 | ((codepoint >> 6) & 0xFF); + *buffer++ = 0x80 | ((codepoint & 0x3F)); + } + else if (codepoint <= 0xFFFF) { + *buffer++ = 0xE0 | ((codepoint >> 12) & 0xFF); + *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F); + *buffer++ = 0x80 | (codepoint & 0x3F); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + *buffer++ = 0xF0 | ((codepoint >> 18) & 0xFF); + *buffer++ = 0x80 | ((codepoint >> 12) & 0x3F); + *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F); + *buffer++ = 0x80 | (codepoint & 0x3F); + } + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \implements Encoding +*/ +template +struct UTF16 { + typedef CharType Ch; + + static Ch* Encode(Ch* buffer, unsigned codepoint) { + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + *buffer++ = static_cast(codepoint); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + *buffer++ = static_cast((v >> 10) + 0xD800); + *buffer++ = (v & 0x3FF) + 0xDC00; + } + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \implements Encoding +*/ +template +struct UTF32 { + typedef CharType Ch; + + static Ch *Encode(Ch* buffer, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + *buffer++ = codepoint; + return buffer; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Stream + +/*! \class rapidjson::Stream + \brief Concept for reading and writing characters. + + For read-only stream, no need to implement PutBegin(), Put() and PutEnd(). + + For write-only stream, only need to implement Put(). + +\code +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +\endcode +*/ + +//! Put N copies of a character to a stream. +template +inline void PutN(Stream& stream, Ch c, size_t n) { + for (size_t i = 0; i < n; i++) + stream.Put(c); +} + +/////////////////////////////////////////////////////////////////////////////// +// StringStream + +//! Read-only string stream. +/*! \implements Stream +*/ +template +struct GenericStringStream { + typedef typename Encoding::Ch Ch; + + GenericStringStream(const Ch *src) : src_(src), head_(src) {} + + Ch Peek() const { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() const { return src_ - head_; } + + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + void Put(Ch) { RAPIDJSON_ASSERT(false); } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + + const Ch* src_; //!< Current read position. + const Ch* head_; //!< Original head of the string. +}; + +typedef GenericStringStream > StringStream; + +/////////////////////////////////////////////////////////////////////////////// +// InsituStringStream + +//! A read-write string stream. +/*! This string stream is particularly designed for in-situ parsing. + \implements Stream +*/ +template +struct GenericInsituStringStream { + typedef typename Encoding::Ch Ch; + + GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {} + + // Read + Ch Peek() { return *src_; } + Ch Take() { return *src_++; } + size_t Tell() { return src_ - head_; } + + // Write + Ch* PutBegin() { return dst_ = src_; } + void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; } + size_t PutEnd(Ch* begin) { return dst_ - begin; } + + Ch* src_; + Ch* dst_; + Ch* head_; +}; + +typedef GenericInsituStringStream > InsituStringStream; + +/////////////////////////////////////////////////////////////////////////////// +// Type + +//! Type of JSON value +enum Type { + kNullType = 0, //!< null + kFalseType = 1, //!< false + kTrueType = 2, //!< true + kObjectType = 3, //!< object + kArrayType = 4, //!< array + kStringType = 5, //!< string + kNumberType = 6, //!< number +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/third-party/rapidjson/reader.h b/third-party/rapidjson/reader.h new file mode 100644 index 000000000..96bbc6eb5 --- /dev/null +++ b/third-party/rapidjson/reader.h @@ -0,0 +1,683 @@ +#ifndef RAPIDJSON_READER_H_ +#define RAPIDJSON_READER_H_ + +// Copyright (c) 2011 Milo Yip (miloyip@gmail.com) +// Version 0.1 + +#include "rapidjson.h" +#include "internal/pow10.h" +#include "internal/stack.h" +#include + +#ifdef RAPIDJSON_SSE42 +#include +#elif defined(RAPIDJSON_SSE2) +#include +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +#ifndef RAPIDJSON_PARSE_ERROR +#define RAPIDJSON_PARSE_ERROR(msg, offset) \ + RAPIDJSON_MULTILINEMACRO_BEGIN \ + parseError_ = msg; \ + errorOffset_ = offset; \ + longjmp(jmpbuf_, 1); \ + RAPIDJSON_MULTILINEMACRO_END +#endif + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// ParseFlag + +//! Combination of parseFlags +enum ParseFlag { + kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. + kParseInsituFlag = 1 //!< In-situ(destructive) parsing. +}; + +/////////////////////////////////////////////////////////////////////////////// +// Handler + +/*! \class rapidjson::Handler + \brief Concept for receiving events from GenericReader upon parsing. +\code +concept Handler { + typename Ch; + + void Null(); + void Bool(bool b); + void Int(int i); + void Uint(unsigned i); + void Int64(int64_t i); + void Uint64(uint64_t i); + void Double(double d); + void String(const Ch* str, SizeType length, bool copy); + void StartObject(); + void EndObject(SizeType memberCount); + void StartArray(); + void EndArray(SizeType elementCount); +}; +\endcode +*/ +/////////////////////////////////////////////////////////////////////////////// +// BaseReaderHandler + +//! Default implementation of Handler. +/*! This can be used as base class of any reader handler. + \implements Handler +*/ +template > +struct BaseReaderHandler { + typedef typename Encoding::Ch Ch; + + void Default() {} + void Null() { Default(); } + void Bool(bool) { Default(); } + void Int(int) { Default(); } + void Uint(unsigned) { Default(); } + void Int64(int64_t) { Default(); } + void Uint64(uint64_t) { Default(); } + void Double(double) { Default(); } + void String(const Ch*, SizeType, bool) { Default(); } + void StartObject() { Default(); } + void EndObject(SizeType) { Default(); } + void StartArray() { Default(); } + void EndArray(SizeType) { Default(); } +}; + +/////////////////////////////////////////////////////////////////////////////// +// SkipWhitespace + +//! Skip the JSON white spaces in a stream. +/*! \param stream A input stream for skipping white spaces. + \note This function has SSE2/SSE4.2 specialization. +*/ +template +void SkipWhitespace(Stream& stream) { + Stream s = stream; // Use a local copy for optimization + while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + s.Take(); + stream = s; +} + +#ifdef RAPIDJSON_SSE42 +//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + static const char whitespace[16] = " \n\r\t"; + __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]); + + for (;;) { + __m128i s = _mm_loadu_si128((const __m128i *)p); + unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); + if (r == 0) // all 16 characters are whitespace + p += 16; + else { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + if (_BitScanForward(&offset, r)) + return p + offset; +#else + if (r != 0) + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#elif defined(RAPIDJSON_SSE2) + +//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + static const char whitespaces[4][17] = { + " ", + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r", + "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"}; + + __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]); + __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]); + __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]); + __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]); + + for (;;) { + __m128i s = _mm_loadu_si128((const __m128i *)p); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = ~_mm_movemask_epi8(x); + if (r == 0) // all 16 characters are whitespace + p += 16; + else { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + if (_BitScanForward(&offset, r)) + return p + offset; +#else + if (r != 0) + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +#endif // RAPIDJSON_SSE2 + +#ifdef RAPIDJSON_SIMD +//! Template function specialization for InsituStringStream +template<> inline void SkipWhitespace(InsituStringStream& stream) { + stream.src_ = const_cast(SkipWhitespace_SIMD(stream.src_)); +} + +//! Template function specialization for StringStream +template<> inline void SkipWhitespace(StringStream& stream) { + stream.src_ = SkipWhitespace_SIMD(stream.src_); +} +#endif // RAPIDJSON_SIMD + +/////////////////////////////////////////////////////////////////////////////// +// GenericReader + +//! SAX-style JSON parser. Use Reader for UTF8 encoding and default allocator. +/*! GenericReader parses JSON text from a stream, and send events synchronously to an + object implementing Handler concept. + + It needs to allocate a stack for storing a single decoded string during + non-destructive parsing. + + For in-situ parsing, the decoded string is directly written to the source + text string, no temporary buffer is required. + + A GenericReader object can be reused for parsing multiple JSON text. + + \tparam Encoding Encoding of both the stream and the parse output. + \tparam Allocator Allocator type for stack. +*/ +template > +class GenericReader { +public: + typedef typename Encoding::Ch Ch; + + //! Constructor. + /*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) + */ + GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseError_(0), errorOffset_(0) {} + + //! Parse JSON text. + /*! \tparam parseFlags Combination of ParseFlag. + \tparam Stream Type of input stream. + \tparam Handler Type of handler which must implement Handler concept. + \param stream Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template + bool Parse(Stream& stream, Handler& handler) { + parseError_ = 0; + errorOffset_ = 0; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4611) // interaction between '_setjmp' and C++ object destruction is non-portable +#endif + if (setjmp(jmpbuf_)) { +#ifdef _MSC_VER +#pragma warning(pop) +#endif + stack_.Clear(); + return false; + } + + SkipWhitespace(stream); + + if (stream.Peek() == '\0') + RAPIDJSON_PARSE_ERROR("Text only contains white space(s)", stream.Tell()); + else { + switch (stream.Peek()) { + case '{': ParseObject(stream, handler); break; + case '[': ParseArray(stream, handler); break; + default: RAPIDJSON_PARSE_ERROR("Expect either an object or array at root", stream.Tell()); + } + SkipWhitespace(stream); + + if (stream.Peek() != '\0') + RAPIDJSON_PARSE_ERROR("Nothing should follow the root object or array.", stream.Tell()); + } + + return true; + } + + bool HasParseError() const { return parseError_ != 0; } + const char* GetParseError() const { return parseError_; } + size_t GetErrorOffset() const { return errorOffset_; } + +private: + // Parse object: { string : value, ... } + template + void ParseObject(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == '{'); + stream.Take(); // Skip '{' + handler.StartObject(); + SkipWhitespace(stream); + + if (stream.Peek() == '}') { + stream.Take(); + handler.EndObject(0); // empty object + return; + } + + for (SizeType memberCount = 0;;) { + if (stream.Peek() != '"') { + RAPIDJSON_PARSE_ERROR("Name of an object member must be a string", stream.Tell()); + break; + } + + ParseString(stream, handler); + SkipWhitespace(stream); + + if (stream.Take() != ':') { + RAPIDJSON_PARSE_ERROR("There must be a colon after the name of object member", stream.Tell()); + break; + } + SkipWhitespace(stream); + + ParseValue(stream, handler); + SkipWhitespace(stream); + + ++memberCount; + + switch(stream.Take()) { + case ',': SkipWhitespace(stream); break; + case '}': handler.EndObject(memberCount); return; + default: RAPIDJSON_PARSE_ERROR("Must be a comma or '}' after an object member", stream.Tell()); + } + } + } + + // Parse array: [ value, ... ] + template + void ParseArray(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == '['); + stream.Take(); // Skip '[' + handler.StartArray(); + SkipWhitespace(stream); + + if (stream.Peek() == ']') { + stream.Take(); + handler.EndArray(0); // empty array + return; + } + + for (SizeType elementCount = 0;;) { + ParseValue(stream, handler); + ++elementCount; + SkipWhitespace(stream); + + switch (stream.Take()) { + case ',': SkipWhitespace(stream); break; + case ']': handler.EndArray(elementCount); return; + default: RAPIDJSON_PARSE_ERROR("Must be a comma or ']' after an array element.", stream.Tell()); + } + } + } + + template + void ParseNull(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 'n'); + stream.Take(); + + if (stream.Take() == 'u' && stream.Take() == 'l' && stream.Take() == 'l') + handler.Null(); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell() - 1); + } + + template + void ParseTrue(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 't'); + stream.Take(); + + if (stream.Take() == 'r' && stream.Take() == 'u' && stream.Take() == 'e') + handler.Bool(true); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell()); + } + + template + void ParseFalse(Stream& stream, Handler& handler) { + RAPIDJSON_ASSERT(stream.Peek() == 'f'); + stream.Take(); + + if (stream.Take() == 'a' && stream.Take() == 'l' && stream.Take() == 's' && stream.Take() == 'e') + handler.Bool(false); + else + RAPIDJSON_PARSE_ERROR("Invalid value", stream.Tell() - 1); + } + + // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). + template + unsigned ParseHex4(Stream& stream) { + Stream s = stream; // Use a local copy for optimization + unsigned codepoint = 0; + for (int i = 0; i < 4; i++) { + Ch c = s.Take(); + codepoint <<= 4; + codepoint += c; + if (c >= '0' && c <= '9') + codepoint -= '0'; + else if (c >= 'A' && c <= 'F') + codepoint -= 'A' - 10; + else if (c >= 'a' && c <= 'f') + codepoint -= 'a' - 10; + else + RAPIDJSON_PARSE_ERROR("Incorrect hex digit after \\u escape", s.Tell() - 1); + } + stream = s; // Restore stream + return codepoint; + } + + // Parse string, handling the prefix and suffix double quotes and escaping. + template + void ParseString(Stream& stream, Handler& handler) { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + static const Ch escape[256] = { + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 + }; +#undef Z16 + + Stream s = stream; // Use a local copy for optimization + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + Ch *head; + SizeType len; + if (parseFlags & kParseInsituFlag) + head = s.PutBegin(); + else + len = 0; + +#define RAPIDJSON_PUT(x) \ + do { \ + if (parseFlags & kParseInsituFlag) \ + s.Put(x); \ + else { \ + *stack_.template Push() = x; \ + ++len; \ + } \ + } while(false) + + for (;;) { + Ch c = s.Take(); + if (c == '\\') { // Escape + Ch e = s.Take(); + if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e]) + RAPIDJSON_PUT(escape[(unsigned char)e]); + else if (e == 'u') { // Unicode + unsigned codepoint = ParseHex4(s); + if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { // Handle UTF-16 surrogate pair + if (s.Take() != '\\' || s.Take() != 'u') { + RAPIDJSON_PARSE_ERROR("Missing the second \\u in surrogate pair", s.Tell() - 2); + return; + } + unsigned codepoint2 = ParseHex4(s); + if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF) { + RAPIDJSON_PARSE_ERROR("The second \\u in surrogate pair is invalid", s.Tell() - 2); + return; + } + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + + Ch buffer[4]; + SizeType count = SizeType(Encoding::Encode(buffer, codepoint) - &buffer[0]); + + if (parseFlags & kParseInsituFlag) + for (SizeType i = 0; i < count; i++) + s.Put(buffer[i]); + else { + memcpy(stack_.template Push(count), buffer, count * sizeof(Ch)); + len += count; + } + } + else { + RAPIDJSON_PARSE_ERROR("Unknown escape character", stream.Tell() - 1); + return; + } + } + else if (c == '"') { // Closing double quote + if (parseFlags & kParseInsituFlag) { + size_t length = s.PutEnd(head); + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + RAPIDJSON_PUT('\0'); // null-terminate the string + handler.String(head, SizeType(length), false); + } + else { + RAPIDJSON_PUT('\0'); + handler.String(stack_.template Pop(len), len - 1, true); + } + stream = s; // restore stream + return; + } + else if (c == '\0') { + RAPIDJSON_PARSE_ERROR("lacks ending quotation before the end of string", stream.Tell() - 1); + return; + } + else if ((unsigned)c < 0x20) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1); + return; + } + else + RAPIDJSON_PUT(c); // Normal character, just copy + } +#undef RAPIDJSON_PUT + } + + template + void ParseNumber(Stream& stream, Handler& handler) { + Stream s = stream; // Local copy for optimization + // Parse minus + bool minus = false; + if (s.Peek() == '-') { + minus = true; + s.Take(); + } + + // Parse int: zero / ( digit1-9 *DIGIT ) + unsigned i; + bool try64bit = false; + if (s.Peek() == '0') { + i = 0; + s.Take(); + } + else if (s.Peek() >= '1' && s.Peek() <= '9') { + i = s.Take() - '0'; + + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 214748364) { // 2^31 = 2147483648 + if (i != 214748364 || s.Peek() > '8') { + try64bit = true; + break; + } + } + i = i * 10 + (s.Take() - '0'); + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i >= 429496729) { // 2^32 - 1 = 4294967295 + if (i != 429496729 || s.Peek() > '5') { + try64bit = true; + break; + } + } + i = i * 10 + (s.Take() - '0'); + } + } + else { + RAPIDJSON_PARSE_ERROR("Expect a value here.", stream.Tell()); + return; + } + + // Parse 64bit int + uint64_t i64 = 0; + bool useDouble = false; + if (try64bit) { + i64 = i; + if (minus) + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= 922337203685477580uLL) // 2^63 = 9223372036854775808 + if (i64 != 922337203685477580uLL || s.Peek() > '8') { + useDouble = true; + break; + } + i64 = i64 * 10 + (s.Take() - '0'); + } + else + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (i64 >= 1844674407370955161uLL) // 2^64 - 1 = 18446744073709551615 + if (i64 != 1844674407370955161uLL || s.Peek() > '5') { + useDouble = true; + break; + } + i64 = i64 * 10 + (s.Take() - '0'); + } + } + + // Force double for big integer + double d = 0.0; + if (useDouble) { + d = (double)i64; + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (d >= 1E307) { + RAPIDJSON_PARSE_ERROR("Number too big to store in double", stream.Tell()); + return; + } + d = d * 10 + (s.Take() - '0'); + } + } + + // Parse frac = decimal-point 1*DIGIT + int expFrac = 0; + if (s.Peek() == '.') { + if (!useDouble) { + d = try64bit ? (double)i64 : (double)i; + useDouble = true; + } + s.Take(); + + if (s.Peek() >= '0' && s.Peek() <= '9') { + d = d * 10 + (s.Take() - '0'); + --expFrac; + } + else { + RAPIDJSON_PARSE_ERROR("At least one digit in fraction part", stream.Tell()); + return; + } + + while (s.Peek() >= '0' && s.Peek() <= '9') { + if (expFrac > -16) { + d = d * 10 + (s.Peek() - '0'); + --expFrac; + } + s.Take(); + } + } + + // Parse exp = e [ minus / plus ] 1*DIGIT + int exp = 0; + if (s.Peek() == 'e' || s.Peek() == 'E') { + if (!useDouble) { + d = try64bit ? (double)i64 : (double)i; + useDouble = true; + } + s.Take(); + + bool expMinus = false; + if (s.Peek() == '+') + s.Take(); + else if (s.Peek() == '-') { + s.Take(); + expMinus = true; + } + + if (s.Peek() >= '0' && s.Peek() <= '9') { + exp = s.Take() - '0'; + while (s.Peek() >= '0' && s.Peek() <= '9') { + exp = exp * 10 + (s.Take() - '0'); + if (exp > 308) { + RAPIDJSON_PARSE_ERROR("Number too big to store in double", stream.Tell()); + return; + } + } + } + else { + RAPIDJSON_PARSE_ERROR("At least one digit in exponent", s.Tell()); + return; + } + + if (expMinus) + exp = -exp; + } + + // Finish parsing, call event according to the type of number. + if (useDouble) { + d *= internal::Pow10(exp + expFrac); + handler.Double(minus ? -d : d); + } + else { + if (try64bit) { + if (minus) + handler.Int64(-(int64_t)i64); + else + handler.Uint64(i64); + } + else { + if (minus) + handler.Int(-(int)i); + else + handler.Uint(i); + } + } + + stream = s; // restore stream + } + + // Parse any JSON value + template + void ParseValue(Stream& stream, Handler& handler) { + switch (stream.Peek()) { + case 'n': ParseNull (stream, handler); break; + case 't': ParseTrue (stream, handler); break; + case 'f': ParseFalse (stream, handler); break; + case '"': ParseString(stream, handler); break; + case '{': ParseObject(stream, handler); break; + case '[': ParseArray (stream, handler); break; + default : ParseNumber(stream, handler); + } + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + jmp_buf jmpbuf_; //!< setjmp buffer for fast exit from nested parsing function calls. + const char* parseError_; + size_t errorOffset_; +}; // class GenericReader + +//! Reader with UTF8 encoding and default allocator. +typedef GenericReader > Reader; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_READER_H_ diff --git a/third-party/rapidjson/stringbuffer.h b/third-party/rapidjson/stringbuffer.h new file mode 100644 index 000000000..269ae1076 --- /dev/null +++ b/third-party/rapidjson/stringbuffer.h @@ -0,0 +1,49 @@ +#ifndef RAPIDJSON_STRINGBUFFER_H_ +#define RAPIDJSON_STRINGBUFFER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" + +namespace rapidjson { + +//! Represents an in-memory output stream. +/*! + \tparam Encoding Encoding of the stream. + \tparam Allocator type for allocating memory buffer. + \implements Stream +*/ +template +struct GenericStringBuffer { + typedef typename Encoding::Ch Ch; + + GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {} + + void Put(Ch c) { *stack_.template Push() = c; } + + void Clear() { stack_.Clear(); } + + const char* GetString() const { + // Push and pop a null terminator. This is safe. + *stack_.template Push() = '\0'; + stack_.template Pop(1); + + return stack_.template Bottom(); + } + + size_t Size() const { return stack_.GetSize(); } + + static const size_t kDefaultCapacity = 256; + mutable internal::Stack stack_; +}; + +typedef GenericStringBuffer > StringBuffer; + +//! Implement specialized version of PutN() with memset() for better performance. +template<> +inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { + memset(stream.stack_.Push(n), c, n * sizeof(c)); +} + +} // namespace rapidjson + +#endif // RAPIDJSON_STRINGBUFFER_H_ diff --git a/third-party/rapidjson/writer.h b/third-party/rapidjson/writer.h new file mode 100644 index 000000000..d96f2081a --- /dev/null +++ b/third-party/rapidjson/writer.h @@ -0,0 +1,241 @@ +#ifndef RAPIDJSON_WRITER_H_ +#define RAPIDJSON_WRITER_H_ + +#include "rapidjson.h" +#include "internal/stack.h" +#include "internal/strfunc.h" +#include // snprintf() or _sprintf_s() +#include // placement new + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif + +namespace rapidjson { + +//! JSON writer +/*! Writer implements the concept Handler. + It generates JSON text by events to an output stream. + + User may programmatically calls the functions of a writer to generate JSON text. + + On the other side, a writer can also be passed to objects that generates events, + + for example Reader::Parse() and Document::Accept(). + + \tparam Stream Type of ouptut stream. + \tparam Encoding Encoding of both source strings and output. + \implements Handler +*/ +template, typename Allocator = MemoryPoolAllocator<> > +class Writer { +public: + typedef typename Encoding::Ch Ch; + + Writer(Stream& stream, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : + stream_(stream), level_stack_(allocator, levelDepth * sizeof(Level)) {} + + //@name Implementation of Handler + //@{ + Writer& Null() { Prefix(kNullType); WriteNull(); return *this; } + Writer& Bool(bool b) { Prefix(b ? kTrueType : kFalseType); WriteBool(b); return *this; } + Writer& Int(int i) { Prefix(kNumberType); WriteInt(i); return *this; } + Writer& Uint(unsigned u) { Prefix(kNumberType); WriteUint(u); return *this; } + Writer& Int64(int64_t i64) { Prefix(kNumberType); WriteInt64(i64); return *this; } + Writer& Uint64(uint64_t u64) { Prefix(kNumberType); WriteUint64(u64); return *this; } + Writer& Double(double d) { Prefix(kNumberType); WriteDouble(d); return *this; } + + Writer& String(const Ch* str, SizeType length, bool copy = false) { + (void)copy; + Prefix(kStringType); + WriteString(str, length); + return *this; + } + + Writer& StartObject() { + Prefix(kObjectType); + new (level_stack_.template Push()) Level(false); + WriteStartObject(); + return *this; + } + + Writer& EndObject(SizeType memberCount = 0) { + (void)memberCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(!level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + WriteEndObject(); + return *this; + } + + Writer& StartArray() { + Prefix(kArrayType); + new (level_stack_.template Push()) Level(true); + WriteStartArray(); + return *this; + } + + Writer& EndArray(SizeType elementCount = 0) { + (void)elementCount; + RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); + RAPIDJSON_ASSERT(level_stack_.template Top()->inArray); + level_stack_.template Pop(1); + WriteEndArray(); + return *this; + } + //@} + + //! Simpler but slower overload. + Writer& String(const Ch* str) { return String(str, internal::StrLen(str)); } + +protected: + //! Information for each nested level + struct Level { + Level(bool inArray_) : inArray(inArray_), valueCount(0) {} + bool inArray; //!< true if in array, otherwise in object + size_t valueCount; //!< number of values in this level + }; + + static const size_t kDefaultLevelDepth = 32; + + void WriteNull() { + stream_.Put('n'); stream_.Put('u'); stream_.Put('l'); stream_.Put('l'); + } + + void WriteBool(bool b) { + if (b) { + stream_.Put('t'); stream_.Put('r'); stream_.Put('u'); stream_.Put('e'); + } + else { + stream_.Put('f'); stream_.Put('a'); stream_.Put('l'); stream_.Put('s'); stream_.Put('e'); + } + } + + void WriteInt(int i) { + if (i < 0) { + stream_.Put('-'); + i = -i; + } + WriteUint((unsigned)i); + } + + void WriteUint(unsigned u) { + char buffer[10]; + char *p = buffer; + do { + *p++ = (u % 10) + '0'; + u /= 10; + } while (u > 0); + + do { + --p; + stream_.Put(*p); + } while (p != buffer); + } + + void WriteInt64(int64_t i64) { + if (i64 < 0) { + stream_.Put('-'); + i64 = -i64; + } + WriteUint64((uint64_t)i64); + } + + void WriteUint64(uint64_t u64) { + char buffer[20]; + char *p = buffer; + do { + *p++ = char(u64 % 10) + '0'; + u64 /= 10; + } while (u64 > 0); + + do { + --p; + stream_.Put(*p); + } while (p != buffer); + } + + //! \todo Optimization with custom double-to-string converter. + void WriteDouble(double d) { + char buffer[100]; +#if _MSC_VER + int ret = sprintf_s(buffer, sizeof(buffer), "%g", d); +#else + int ret = snprintf(buffer, sizeof(buffer), "%g", d); +#endif + RAPIDJSON_ASSERT(ret >= 1); + for (int i = 0; i < ret; i++) + stream_.Put(buffer[i]); + } + + void WriteString(const Ch* str, SizeType length) { + static const char hexDigits[] = "0123456789ABCDEF"; + static const char escape[256] = { +#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 + //0 1 2 3 4 5 6 7 8 9 A B C D E F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + + stream_.Put('\"'); + for (const Ch* p = str; p != str + length; ++p) { + if ((sizeof(Ch) == 1 || *p < 256) && escape[(unsigned char)*p]) { + stream_.Put('\\'); + stream_.Put(escape[(unsigned char)*p]); + if (escape[(unsigned char)*p] == 'u') { + stream_.Put('0'); + stream_.Put('0'); + stream_.Put(hexDigits[(*p) >> 4]); + stream_.Put(hexDigits[(*p) & 0xF]); + } + } + else + stream_.Put(*p); + } + stream_.Put('\"'); + } + + void WriteStartObject() { stream_.Put('{'); } + void WriteEndObject() { stream_.Put('}'); } + void WriteStartArray() { stream_.Put('['); } + void WriteEndArray() { stream_.Put(']'); } + + void Prefix(Type type) { + (void)type; + if (level_stack_.GetSize() != 0) { // this value is not at root + Level* level = level_stack_.template Top(); + if (level->valueCount > 0) { + if (level->inArray) + stream_.Put(','); // add comma if it is not the first element in array + else // in object + stream_.Put((level->valueCount % 2 == 0) ? ',' : ':'); + } + if (!level->inArray && level->valueCount % 2 == 0) + RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name + level->valueCount++; + } + else + RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType); + } + + Stream& stream_; + internal::Stack level_stack_; + +private: + // Prohibit assignment for VC C4512 warning + Writer& operator=(const Writer& w); +}; + +} // namespace rapidjson + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // RAPIDJSON_RAPIDJSON_H_ diff --git a/tools/db_sanity_test.cc b/tools/db_sanity_test.cc index e970f5e91..4ae120c21 100644 --- a/tools/db_sanity_test.cc +++ b/tools/db_sanity_test.cc @@ -4,6 +4,7 @@ // of patent rights can be found in the PATENTS file in the same directory. #include +#include #include #include diff --git a/util/arena.cc b/util/arena.cc index 1775addcd..60a01c245 100644 --- a/util/arena.cc +++ b/util/arena.cc @@ -14,6 +14,7 @@ namespace rocksdb { +const size_t Arena::kInlineSize; const size_t Arena::kMinBlockSize = 4096; const size_t Arena::kMaxBlockSize = 2 << 30; static const int kAlignUnit = sizeof(void*); @@ -34,6 +35,10 @@ size_t OptimizeBlockSize(size_t block_size) { Arena::Arena(size_t block_size) : kBlockSize(OptimizeBlockSize(block_size)) { assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize && kBlockSize % kAlignUnit == 0); + alloc_bytes_remaining_ = sizeof(inline_block_); + blocks_memory_ += alloc_bytes_remaining_; + aligned_alloc_ptr_ = inline_block_; + unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_; } Arena::~Arena() { @@ -71,17 +76,17 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) { } } -char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size, +char* Arena::AllocateAligned(size_t bytes, size_t huage_page_size, Logger* logger) { assert((kAlignUnit & (kAlignUnit - 1)) == 0); // Pointer size should be a power of 2 #ifdef MAP_HUGETLB - if (huge_page_tlb_size > 0 && bytes > 0) { + if (huage_page_size > 0 && bytes > 0) { // Allocate from a huge page TBL table. assert(logger != nullptr); // logger need to be passed in. size_t reserved_size = - ((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size; + ((bytes - 1U) / huage_page_size + 1U) * huage_page_size; assert(reserved_size >= bytes); void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0); diff --git a/util/arena.h b/util/arena.h index 161a253aa..53acd2270 100644 --- a/util/arena.h +++ b/util/arena.h @@ -13,6 +13,7 @@ #pragma once #include +#include #include #include #include @@ -28,6 +29,7 @@ class Arena { Arena(const Arena&) = delete; void operator=(const Arena&) = delete; + static const size_t kInlineSize = 2048; static const size_t kMinBlockSize; static const size_t kMaxBlockSize; @@ -36,18 +38,19 @@ class Arena { char* Allocate(size_t bytes); - // huge_page_tlb_size: if >0, allocate bytes from huge page TLB and the size - // of the huge page TLB. Bytes will be rounded up to multiple and 2MB and - // allocate huge pages through mmap anonymous option with huge page on. - // The extra space allocated will be wasted. To enable it, need to reserve - // huge pages for it to be allocated, like: + // huge_page_size: if >0, will try to allocate from huage page TLB. + // The argument will be the size of the page size for huge page TLB. Bytes + // will be rounded up to multiple of the page size to allocate through mmap + // anonymous option with huge page on. The extra space allocated will be + // wasted. If allocation fails, will fall back to normal case. To enable it, + // need to reserve huge pages for it to be allocated, like: // sysctl -w vm.nr_hugepages=20 // See linux doc Documentation/vm/hugetlbpage.txt for details. // huge page allocation can fail. In this case it will fail back to // normal cases. The messages will be logged to logger. So when calling with // huge_page_tlb_size > 0, we highly recommend a logger is passed in. // Otherwise, the error message will be printed out to stderr directly. - char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0, + char* AllocateAligned(size_t bytes, size_t huge_page_size = 0, Logger* logger = nullptr); // Returns an estimate of the total memory usage of data allocated @@ -69,6 +72,7 @@ class Arena { size_t BlockSize() const { return kBlockSize; } private: + char inline_block_[kInlineSize]; // Number of bytes allocated in one block const size_t kBlockSize; // Array of new[] allocated memory blocks diff --git a/util/arena_test.cc b/util/arena_test.cc index 1b2b53175..7b6cfd0af 100644 --- a/util/arena_test.cc +++ b/util/arena_test.cc @@ -31,9 +31,11 @@ TEST(ArenaTest, MemoryAllocatedBytes) { for (int i = 0; i < N; i++) { arena.Allocate(req_sz); } - expected_memory_allocated = req_sz * N; + expected_memory_allocated = req_sz * N + Arena::kInlineSize; ASSERT_EQ(arena.MemoryAllocatedBytes(), expected_memory_allocated); + arena.Allocate(Arena::kInlineSize - 1); + // requested size < quarter of a block: // allocate a block with the default size, then try to use unused part // of the block. So one new block will be allocated for the first @@ -64,12 +66,19 @@ TEST(ArenaTest, ApproximateMemoryUsageTest) { Arena arena(kBlockSize); ASSERT_EQ(kZero, arena.ApproximateMemoryUsage()); + // allocate inline bytes + arena.AllocateAligned(8); + arena.AllocateAligned(Arena::kInlineSize / 2 - 16); + arena.AllocateAligned(Arena::kInlineSize / 2); + ASSERT_EQ(arena.ApproximateMemoryUsage(), Arena::kInlineSize - 8); + ASSERT_EQ(arena.MemoryAllocatedBytes(), Arena::kInlineSize); + auto num_blocks = kBlockSize / kEntrySize; // first allocation arena.AllocateAligned(kEntrySize); auto mem_usage = arena.MemoryAllocatedBytes(); - ASSERT_EQ(mem_usage, kBlockSize); + ASSERT_EQ(mem_usage, kBlockSize + Arena::kInlineSize); auto usage = arena.ApproximateMemoryUsage(); ASSERT_LT(usage, mem_usage); for (size_t i = 1; i < num_blocks; ++i) { diff --git a/util/env_hdfs.cc b/util/env_hdfs.cc index c724b2302..eb2b12cba 100644 --- a/util/env_hdfs.cc +++ b/util/env_hdfs.cc @@ -18,6 +18,9 @@ #include "hdfs/hdfs.h" #include "hdfs/env_hdfs.h" +#define HDFS_EXISTS 0 +#define HDFS_DOESNT_EXIST 1 + // // This file defines an HDFS environment for rocksdb. It uses the libhdfs // api to access HDFS. All HDFS files created by one instance of rocksdb @@ -39,7 +42,8 @@ static Logger* mylog = nullptr; // Used for reading a file from HDFS. It implements both sequential-read // access methods as well as random read access methods. -class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAccessFile { +class HdfsReadableFile : virtual public SequentialFile, + virtual public RandomAccessFile { private: hdfsFS fileSys_; std::string filename_; @@ -73,17 +77,34 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce Status s; Log(mylog, "[hdfs] HdfsReadableFile reading %s %ld\n", filename_.c_str(), n); - size_t bytes_read = hdfsRead(fileSys_, hfile_, scratch, (tSize)n); - Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str()); - *result = Slice(scratch, bytes_read); - if (bytes_read < n) { - if (feof()) { - // We leave status as ok if we hit the end of the file - } else { - // A partial read with an error: return a non-ok status - s = IOError(filename_, errno); + + char* buffer = scratch; + size_t total_bytes_read = 0; + tSize bytes_read = 0; + tSize remaining_bytes = (tSize)n; + + // Read a total of n bytes repeatedly until we hit error or eof + while (remaining_bytes > 0) { + bytes_read = hdfsRead(fileSys_, hfile_, buffer, remaining_bytes); + if (bytes_read <= 0) { + break; } + assert(bytes_read <= remaining_bytes); + + total_bytes_read += bytes_read; + remaining_bytes -= bytes_read; + buffer += bytes_read; + } + assert(total_bytes_read <= n); + + Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str()); + + if (bytes_read < 0) { + s = IOError(filename_, errno); + } else { + *result = Slice(scratch, total_bytes_read); } + return s; } @@ -139,8 +160,7 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce size = pFileInfo->mSize; hdfsFreeFileInfo(pFileInfo, 1); } else { - throw rocksdb::HdfsFatalException("fileSize on unknown file " + - filename_); + throw HdfsFatalException("fileSize on unknown file " + filename_); } return size; } @@ -236,9 +256,8 @@ class HdfsLogger : public Logger { uint64_t (*gettid_)(); // Return the thread id for the current thread public: - HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)(), - const InfoLogLevel log_level = InfoLogLevel::ERROR) - : Logger(log_level), file_(f), gettid_(gettid) { + HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)()) + : file_(f), gettid_(gettid) { Log(mylog, "[hdfs] HdfsLogger opened %s\n", file_->getName().c_str()); } @@ -324,40 +343,52 @@ class HdfsLogger : public Logger { // Finally, the hdfs environment +const std::string HdfsEnv::kProto = "hdfs://"; +const std::string HdfsEnv::pathsep = "/"; + // open a file for sequential reading Status HdfsEnv::NewSequentialFile(const std::string& fname, - SequentialFile** result) { + unique_ptr* result, + const EnvOptions& options) { + result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); - if (f == nullptr) { + if (f == nullptr || !f->isValid()) { + delete f; *result = nullptr; return IOError(fname, errno); } - *result = dynamic_cast(f); + result->reset(dynamic_cast(f)); return Status::OK(); } // open a file for random reading Status HdfsEnv::NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) { + unique_ptr* result, + const EnvOptions& options) { + result->reset(); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); - if (f == nullptr) { + if (f == nullptr || !f->isValid()) { + delete f; *result = nullptr; return IOError(fname, errno); } - *result = dynamic_cast(f); + result->reset(dynamic_cast(f)); return Status::OK(); } // create a new file for writing Status HdfsEnv::NewWritableFile(const std::string& fname, - WritableFile** result) { + unique_ptr* result, + const EnvOptions& options) { + result->reset(); Status s; HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); if (f == nullptr || !f->isValid()) { + delete f; *result = nullptr; return IOError(fname, errno); } - *result = dynamic_cast(f); + result->reset(dynamic_cast(f)); return Status::OK(); } @@ -367,24 +398,30 @@ Status HdfsEnv::NewRandomRWFile(const std::string& fname, return Status::NotSupported("NewRandomRWFile not supported on HdfsEnv"); } -virtual Status NewDirectory(const std::string& name, - unique_ptr* result) { - return Status::NotSupported("NewDirectory not yet supported on HdfsEnv"); +Status HdfsEnv::NewDirectory(const std::string& name, + unique_ptr* result) { + return Status::NotSupported("NewDirectory not supported on HdfsEnv"); } bool HdfsEnv::FileExists(const std::string& fname) { int value = hdfsExists(fileSys_, fname.c_str()); - if (value == 0) { + switch (value) { + case HDFS_EXISTS: return true; + case HDFS_DOESNT_EXIST: + return false; + default: // anything else should be an error + Log(mylog, "FileExists hdfsExists call failed"); + throw HdfsFatalException("hdfsExists call failed with error " + + std::to_string(value) + ".\n"); } - return false; } Status HdfsEnv::GetChildren(const std::string& path, std::vector* result) { int value = hdfsExists(fileSys_, path.c_str()); switch (value) { - case 0: { + case HDFS_EXISTS: { // directory exists int numEntries = 0; hdfsFileInfo* pHdfsFileInfo = 0; pHdfsFileInfo = hdfsListDirectory(fileSys_, path.c_str(), &numEntries); @@ -402,15 +439,17 @@ Status HdfsEnv::GetChildren(const std::string& path, } else { // numEntries < 0 indicates error Log(mylog, "hdfsListDirectory call failed with error "); - throw HdfsFatalException("hdfsListDirectory call failed negative error.\n"); + throw HdfsFatalException( + "hdfsListDirectory call failed negative error.\n"); } break; } - case 1: // directory does not exist, exit + case HDFS_DOESNT_EXIST: // directory does not exist, exit break; default: // anything else should be an error - Log(mylog, "hdfsListDirectory call failed with error "); - throw HdfsFatalException("hdfsListDirectory call failed with error.\n"); + Log(mylog, "GetChildren hdfsExists call failed"); + throw HdfsFatalException("hdfsExists call failed with error " + + std::to_string(value) + ".\n"); } return Status::OK(); } @@ -432,10 +471,15 @@ Status HdfsEnv::CreateDir(const std::string& name) { Status HdfsEnv::CreateDirIfMissing(const std::string& name) { const int value = hdfsExists(fileSys_, name.c_str()); // Not atomic. state might change b/w hdfsExists and CreateDir. - if (value == 0) { + switch (value) { + case HDFS_EXISTS: return Status::OK(); - } else { + case HDFS_DOESNT_EXIST: return CreateDir(name); + default: // anything else should be an error + Log(mylog, "CreateDirIfMissing hdfsExists call failed"); + throw HdfsFatalException("hdfsExists call failed with error " + + std::to_string(value) + ".\n"); } }; @@ -492,11 +536,12 @@ Status HdfsEnv::NewLogger(const std::string& fname, shared_ptr* result) { HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); if (f == nullptr || !f->isValid()) { + delete f; *result = nullptr; return IOError(fname, errno); } HdfsLogger* h = new HdfsLogger(f, &HdfsEnv::gettid); - *result = h; + result->reset(h); if (mylog == nullptr) { // mylog = h; // uncomment this for detailed logging } diff --git a/util/log_buffer.h b/util/log_buffer.h index 8ebe92e03..2a24bf854 100644 --- a/util/log_buffer.h +++ b/util/log_buffer.h @@ -8,6 +8,7 @@ #include "rocksdb/env.h" #include "util/arena.h" #include "util/autovector.h" +#include namespace rocksdb { diff --git a/util/logging.cc b/util/logging.cc index 02e356001..1b5549d73 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -9,6 +9,8 @@ #include "util/logging.h" +#define __STDC_FORMAT_MACROS +#include #include #include #include @@ -18,6 +20,26 @@ namespace rocksdb { + +// for sizes >=10TB, print "XXTB" +// for sizes >=10GB, print "XXGB" +// etc. +// append file size summary to output and return the len +int AppendHumanBytes(uint64_t bytes, char* output, int len) { + const uint64_t ull10 = 10; + if (bytes >= ull10 << 40) { + return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40); + } else if (bytes >= ull10 << 30) { + return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30); + } else if (bytes >= ull10 << 20) { + return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20); + } else if (bytes >= ull10 << 10) { + return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10); + } else { + return snprintf(output, len, "%" PRIu64 "B", bytes); + } +} + void AppendNumberTo(std::string* str, uint64_t num) { char buf[30]; snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); diff --git a/util/logging.h b/util/logging.h index d8ce45efc..ce0269726 100644 --- a/util/logging.h +++ b/util/logging.h @@ -21,6 +21,9 @@ namespace rocksdb { class Slice; class WritableFile; +// Append a human-readable size in bytes +int AppendHumanBytes(uint64_t bytes, char* output, int len); + // Append a human-readable printout of "num" to *str extern void AppendNumberTo(std::string* str, uint64_t num);