fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
				
			
			
		
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							309 lines
						
					
					
						
							11 KiB
						
					
					
				
			
		
		
	
	
							309 lines
						
					
					
						
							11 KiB
						
					
					
				| // Copyright 2013 Facebook
 | |
| /**
 | |
|  * RedisListIterator:
 | |
|  * An abstraction over the "list" concept (e.g.: for redis lists).
 | |
|  * Provides functionality to read, traverse, edit, and write these lists.
 | |
|  *
 | |
|  * Upon construction, the RedisListIterator is given a block of list data.
 | |
|  * Internally, it stores a pointer to the data and a pointer to current item.
 | |
|  * It also stores a "result" list that will be mutated over time.
 | |
|  *
 | |
|  * Traversal and mutation are done by "forward iteration".
 | |
|  * The Push() and Skip() methods will advance the iterator to the next item.
 | |
|  * However, Push() will also "write the current item to the result".
 | |
|  * Skip() will simply move to next item, causing current item to be dropped.
 | |
|  *
 | |
|  * Upon completion, the result (accessible by WriteResult()) will be saved.
 | |
|  * All "skipped" items will be gone; all "pushed" items will remain.
 | |
|  *
 | |
|  * @throws Any of the operations may throw a RedisListException if an invalid
 | |
|  *          operation is performed or if the data is found to be corrupt.
 | |
|  *
 | |
|  * @notes By default, if WriteResult() is called part-way through iteration,
 | |
|  *        it will automatically advance the iterator to the end, and Keep()
 | |
|  *        all items that haven't been traversed yet. This may be subject
 | |
|  *        to review.
 | |
|  *
 | |
|  * @notes Can access the "current" item via GetCurrent(), and other
 | |
|  *        list-specific information such as Length().
 | |
|  *
 | |
|  * @notes The internal representation is due to change at any time. Presently,
 | |
|  *        the list is represented as follows:
 | |
|  *          - 32-bit integer header: the number of items in the list
 | |
|  *          - For each item:
 | |
|  *              - 32-bit int (n): the number of bytes representing this item
 | |
|  *              - n bytes of data: the actual data.
 | |
|  *
 | |
|  * @author Deon Nicholas (dnicholas@fb.com)
 | |
|  */
 | |
| 
 | |
| #pragma once
 | |
| #ifndef ROCKSDB_LITE
 | |
| 
 | |
| #include <string>
 | |
| 
 | |
| #include "redis_list_exception.h"
 | |
| #include "rocksdb/slice.h"
 | |
| #include "util/coding.h"
 | |
| 
 | |
| namespace rocksdb {
 | |
| 
 | |
| /// An abstraction over the "list" concept.
 | |
| /// All operations may throw a RedisListException
 | |
| class RedisListIterator {
 | |
|  public:
 | |
|   /// Construct a redis-list-iterator based on data.
 | |
|   /// If the data is non-empty, it must formatted according to @notes above.
 | |
|   ///
 | |
|   /// If the data is valid, we can assume the following invariant(s):
 | |
|   ///  a) length_, num_bytes_ are set correctly.
 | |
|   ///  b) cur_byte_ always refers to the start of the current element,
 | |
|   ///       just before the bytes that specify element length.
 | |
|   ///  c) cur_elem_ is always the index of the current element.
 | |
|   ///  d) cur_elem_length_ is always the number of bytes in current element,
 | |
|   ///       excluding the 4-byte header itself.
 | |
|   ///  e) result_ will always contain data_[0..cur_byte_) and a header
 | |
|   ///  f) Whenever corrupt data is encountered or an invalid operation is
 | |
|   ///      attempted, a RedisListException will immediately be thrown.
 | |
|   explicit RedisListIterator(const std::string& list_data)
 | |
|       : data_(list_data.data()),
 | |
|         num_bytes_(static_cast<uint32_t>(list_data.size())),
 | |
|         cur_byte_(0),
 | |
|         cur_elem_(0),
 | |
|         cur_elem_length_(0),
 | |
|         length_(0),
 | |
|         result_() {
 | |
|     // Initialize the result_ (reserve enough space for header)
 | |
|     InitializeResult();
 | |
| 
 | |
|     // Parse the data only if it is not empty.
 | |
|     if (num_bytes_ == 0) {
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     // If non-empty, but less than 4 bytes, data must be corrupt
 | |
|     if (num_bytes_ < sizeof(length_)) {
 | |
|       ThrowError("Corrupt header.");    // Will break control flow
 | |
|     }
 | |
| 
 | |
|     // Good. The first bytes specify the number of elements
 | |
|     length_ = DecodeFixed32(data_);
 | |
|     cur_byte_ = sizeof(length_);
 | |
| 
 | |
|     // If we have at least one element, point to that element.
 | |
|     // Also, read the first integer of the element (specifying the size),
 | |
|     //   if possible.
 | |
|     if (length_ > 0) {
 | |
|       if (cur_byte_ + sizeof(cur_elem_length_) <= num_bytes_) {
 | |
|         cur_elem_length_ = DecodeFixed32(data_+cur_byte_);
 | |
|       } else {
 | |
|         ThrowError("Corrupt data for first element.");
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // At this point, we are fully set-up.
 | |
|     // The invariants described in the header should now be true.
 | |
|   }
 | |
| 
 | |
|   /// Reserve some space for the result_.
 | |
|   /// Equivalent to result_.reserve(bytes).
 | |
|   void Reserve(int bytes) {
 | |
|     result_.reserve(bytes);
 | |
|   }
 | |
| 
 | |
|   /// Go to next element in data file.
 | |
|   /// Also writes the current element to result_.
 | |
|   RedisListIterator& Push() {
 | |
|     WriteCurrentElement();
 | |
|     MoveNext();
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   /// Go to next element in data file.
 | |
|   /// Drops/skips the current element. It will not be written to result_.
 | |
|   RedisListIterator& Skip() {
 | |
|     MoveNext();
 | |
|     --length_;          // One less item
 | |
|     --cur_elem_;        // We moved one forward, but index did not change
 | |
|     return *this;
 | |
|   }
 | |
| 
 | |
|   /// Insert elem into the result_ (just BEFORE the current element / byte)
 | |
|   /// Note: if Done() (i.e.: iterator points to end), this will append elem.
 | |
|   void InsertElement(const Slice& elem) {
 | |
|     // Ensure we are in a valid state
 | |
|     CheckErrors();
 | |
| 
 | |
|     const int kOrigSize = static_cast<int>(result_.size());
 | |
|     result_.resize(kOrigSize + SizeOf(elem));
 | |
|     EncodeFixed32(result_.data() + kOrigSize,
 | |
|                   static_cast<uint32_t>(elem.size()));
 | |
|     memcpy(result_.data() + kOrigSize + sizeof(uint32_t), elem.data(),
 | |
|            elem.size());
 | |
|     ++length_;
 | |
|     ++cur_elem_;
 | |
|   }
 | |
| 
 | |
|   /// Access the current element, and save the result into *curElem
 | |
|   void GetCurrent(Slice* curElem) {
 | |
|     // Ensure we are in a valid state
 | |
|     CheckErrors();
 | |
| 
 | |
|     // Ensure that we are not past the last element.
 | |
|     if (Done()) {
 | |
|       ThrowError("Invalid dereferencing.");
 | |
|     }
 | |
| 
 | |
|     // Dereference the element
 | |
|     *curElem = Slice(data_+cur_byte_+sizeof(cur_elem_length_),
 | |
|                      cur_elem_length_);
 | |
|   }
 | |
| 
 | |
|   // Number of elements
 | |
|   int Length() const {
 | |
|     return length_;
 | |
|   }
 | |
| 
 | |
|   // Number of bytes in the final representation (i.e: WriteResult().size())
 | |
|   int Size() const {
 | |
|     // result_ holds the currently written data
 | |
|     // data_[cur_byte..num_bytes-1] is the remainder of the data
 | |
|     return static_cast<int>(result_.size() + (num_bytes_ - cur_byte_));
 | |
|   }
 | |
| 
 | |
|   // Reached the end?
 | |
|   bool Done() const {
 | |
|     return cur_byte_ >= num_bytes_ || cur_elem_ >= length_;
 | |
|   }
 | |
| 
 | |
|   /// Returns a string representing the final, edited, data.
 | |
|   /// Assumes that all bytes of data_ in the range [0,cur_byte_) have been read
 | |
|   ///  and that result_ contains this data.
 | |
|   /// The rest of the data must still be written.
 | |
|   /// So, this method ADVANCES THE ITERATOR TO THE END before writing.
 | |
|   Slice WriteResult() {
 | |
|     CheckErrors();
 | |
| 
 | |
|     // The header should currently be filled with dummy data (0's)
 | |
|     // Correctly update the header.
 | |
|     // Note, this is safe since result_ is a vector (guaranteed contiguous)
 | |
|     EncodeFixed32(&result_[0],length_);
 | |
| 
 | |
|     // Append the remainder of the data to the result.
 | |
|     result_.insert(result_.end(),data_+cur_byte_, data_ +num_bytes_);
 | |
| 
 | |
|     // Seek to end of file
 | |
|     cur_byte_ = num_bytes_;
 | |
|     cur_elem_ = length_;
 | |
|     cur_elem_length_ = 0;
 | |
| 
 | |
|     // Return the result
 | |
|     return Slice(result_.data(),result_.size());
 | |
|   }
 | |
| 
 | |
|  public: // Static public functions
 | |
| 
 | |
|   /// An upper-bound on the amount of bytes needed to store this element.
 | |
|   /// This is used to hide representation information from the client.
 | |
|   /// E.G. This can be used to compute the bytes we want to Reserve().
 | |
|   static uint32_t SizeOf(const Slice& elem) {
 | |
|     // [Integer Length . Data]
 | |
|     return static_cast<uint32_t>(sizeof(uint32_t) + elem.size());
 | |
|   }
 | |
| 
 | |
|  private: // Private functions
 | |
| 
 | |
|   /// Initializes the result_ string.
 | |
|   /// It will fill the first few bytes with 0's so that there is
 | |
|   ///  enough space for header information when we need to write later.
 | |
|   /// Currently, "header information" means: the length (number of elements)
 | |
|   /// Assumes that result_ is empty to begin with
 | |
|   void InitializeResult() {
 | |
|     assert(result_.empty());            // Should always be true.
 | |
|     result_.resize(sizeof(uint32_t),0); // Put a block of 0's as the header
 | |
|   }
 | |
| 
 | |
|   /// Go to the next element (used in Push() and Skip())
 | |
|   void MoveNext() {
 | |
|     CheckErrors();
 | |
| 
 | |
|     // Check to make sure we are not already in a finished state
 | |
|     if (Done()) {
 | |
|       ThrowError("Attempting to iterate past end of list.");
 | |
|     }
 | |
| 
 | |
|     // Move forward one element.
 | |
|     cur_byte_ += sizeof(cur_elem_length_) + cur_elem_length_;
 | |
|     ++cur_elem_;
 | |
| 
 | |
|     // If we are at the end, finish
 | |
|     if (Done()) {
 | |
|       cur_elem_length_ = 0;
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     // Otherwise, we should be able to read the new element's length
 | |
|     if (cur_byte_ + sizeof(cur_elem_length_) > num_bytes_) {
 | |
|       ThrowError("Corrupt element data.");
 | |
|     }
 | |
| 
 | |
|     // Set the new element's length
 | |
|     cur_elem_length_ = DecodeFixed32(data_+cur_byte_);
 | |
| 
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   /// Append the current element (pointed to by cur_byte_) to result_
 | |
|   /// Assumes result_ has already been reserved appropriately.
 | |
|   void WriteCurrentElement() {
 | |
|     // First verify that the iterator is still valid.
 | |
|     CheckErrors();
 | |
|     if (Done()) {
 | |
|       ThrowError("Attempting to write invalid element.");
 | |
|     }
 | |
| 
 | |
|     // Append the cur element.
 | |
|     result_.insert(result_.end(),
 | |
|                    data_+cur_byte_,
 | |
|                    data_+cur_byte_+ sizeof(uint32_t) + cur_elem_length_);
 | |
|   }
 | |
| 
 | |
|   /// Will ThrowError() if necessary.
 | |
|   /// Checks for common/ubiquitous errors that can arise after most operations.
 | |
|   /// This method should be called before any reading operation.
 | |
|   /// If this function succeeds, then we are guaranteed to be in a valid state.
 | |
|   /// Other member functions should check for errors and ThrowError() also
 | |
|   ///  if an error occurs that is specific to it even while in a valid state.
 | |
|   void CheckErrors() {
 | |
|     // Check if any crazy thing has happened recently
 | |
|     if ((cur_elem_ > length_) ||                              // Bad index
 | |
|         (cur_byte_ > num_bytes_) ||                           // No more bytes
 | |
|         (cur_byte_ + cur_elem_length_ > num_bytes_) ||        // Item too large
 | |
|         (cur_byte_ == num_bytes_ && cur_elem_ != length_) ||  // Too many items
 | |
|         (cur_elem_ == length_ && cur_byte_ != num_bytes_)) {  // Too many bytes
 | |
|       ThrowError("Corrupt data.");
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /// Will throw an exception based on the passed-in message.
 | |
|   /// This function is guaranteed to STOP THE CONTROL-FLOW.
 | |
|   /// (i.e.: you do not have to call "return" after calling ThrowError)
 | |
|   void ThrowError(const char* const /*msg*/ = nullptr) {
 | |
|     // TODO: For now we ignore the msg parameter. This can be expanded later.
 | |
|     throw RedisListException();
 | |
|   }
 | |
| 
 | |
|  private:
 | |
|   const char* const data_;      // A pointer to the data (the first byte)
 | |
|   const uint32_t num_bytes_;    // The number of bytes in this list
 | |
| 
 | |
|   uint32_t cur_byte_;           // The current byte being read
 | |
|   uint32_t cur_elem_;           // The current element being read
 | |
|   uint32_t cur_elem_length_;    // The number of bytes in current element
 | |
| 
 | |
|   uint32_t length_;             // The number of elements in this list
 | |
|   std::vector<char> result_;    // The output data
 | |
| };
 | |
| 
 | |
| } // namespace rocksdb
 | |
| #endif  // ROCKSDB_LITE
 | |
| 
 |