Pysim more algorithms (#5644)

Summary: This PR adds four more eviction policies. - OPT [1] - Hyperbolic caching [2] - ARC [3] - GreedyDualSize [4] [1] L. A. Belady. 1966. A Study of Replacement Algorithms for a Virtual-storage Computer. IBM Syst. J. 5, 2 (June 1966), 78-101. DOI=http://dx.doi.org/10.1147/sj.52.0078 [2] Aaron Blankstein, Siddhartha Sen, and Michael J. Freedman. 2017. Hyperbolic caching: flexible caching for web applications. In Proceedings of the 2017 USENIX Conference on Usenix Annual Technical Conference (USENIX ATC '17). USENIX Association, Berkeley, CA, USA, 499-511. [3] Nimrod Megiddo and Dharmendra S. Modha. 2003. ARC: A Self-Tuning, Low Overhead Replacement Cache. In Proceedings of the 2nd USENIX Conference on File and Storage Technologies (FAST '03). USENIX Association, Berkeley, CA, USA, 115-130. [4] N. Young. The k-server dual and loose competitiveness for paging. Algorithmica, June 1994, vol. 11,(no.6):525-41. Rewritten version of ''On-line caching as cache size varies'', in The 2nd Annual ACM-SIAM Symposium on Discrete Algorithms, 241-250, 1991. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5644 Differential Revision: D16548817 Pulled By: HaoyuHuang fbshipit-source-id: 838f76db9179f07911abaab46c97e1c929cfcd63
6 years ago · 6e78fe3c8d
parent d150e01474
commit 6e78fe3c8d
9 changed files with 2111 additions and 370 deletions
--- a/tools/block_cache_analyzer/block_cache_pysim.py
+++ b/tools/block_cache_analyzer/block_cache_pysim.py
--- a/tools/block_cache_analyzer/block_cache_pysim.sh
+++ b/tools/block_cache_analyzer/block_cache_pysim.sh
@ -10,6 +10,10 @@
 # warmup_seconds: The number of seconds used for warmup.
 # max_jobs: The max number of concurrent pysims to run.

+# Install required packages to run simulations.
+# sudo dnf install -y numpy scipy python-matplotlib ipython python-pandas sympy python-nose atlas-devel
+ulimit -c 0
+
 if [ $# -ne 5 ]; then
  echo "Usage: ./block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs"
  exit 0
@ -20,17 +24,26 @@ result_dir="$2"
 downsample_size="$3"
 warmup_seconds="$4"
 max_jobs="$5"
-current_jobs=0
+max_num_accesses=100000000
+current_jobs=1

 ml_tmp_result_dir="$result_dir/ml"
 rm -rf "$ml_tmp_result_dir"
 mkdir -p "$result_dir"
 mkdir -p "$ml_tmp_result_dir"

-for cache_type in "ts" "linucb" "ts_hybrid" "linucb_hybrid"
+# Report miss ratio in the trace.
+current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
+for cf_name in "all"
+do
+for cache_size in "1G" "2G" "4G" "8G" "16G" #"12G" "16G" "1T"
 do
-for cache_size in "16M" "256M" "1G" "2G" "4G" "8G" "12G" "16G"
+for cache_type in "opt" "lru" "pylru" "pycctbbt" "pyhb" "ts" "trace" "lru_hybrid"  #"pycctblevelbt" #"lru_hybridn" "opt" #"pylru" "pylru_hybrid" "pycctbbt" "pycccfbt" "trace"
 do
+    if [[ $cache_type == "trace" && $cache_size != "16G" ]]; then
+      # We only need to collect miss ratios observed in the trace once.
+      continue
+    fi
    while [ "$current_jobs" -ge "$max_jobs" ]
    do
      sleep 10
@ -38,12 +51,13 @@ do
      current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
      echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
    done
-    output="log-ml-$cache_type-$cache_size"
-    echo "Running simulation for $cache_type and cache size $cache_size. Number of running jobs: $current_jobs. "
-    nohup python block_cache_pysim.py "$cache_type" "$cache_size" "$downsample_size" "$warmup_seconds" "$trace_file" "$ml_tmp_result_dir" >& $ml_tmp_result_dir/$output &
+    output="log-ml-$cache_type-$cache_size-$cf_name"
+    echo "Running simulation for $cache_type, cache size $cache_size, and cf_name $cf_name. Number of running jobs: $current_jobs. "
+    nohup python block_cache_pysim.py "$cache_type" "$cache_size" "$downsample_size" "$warmup_seconds" "$trace_file" "$ml_tmp_result_dir" "$max_num_accesses" "$cf_name" >& "$ml_tmp_result_dir/$output" &
    current_jobs=$((current_jobs+1))
 done
 done
+done

 # Wait for all jobs to complete.
 while [ $current_jobs -gt 0 ]
@ -57,14 +71,14 @@ done
 echo "Combine individual pysim output files"

 rm -rf "$result_dir/ml_*"
-mrc_file="$result_dir/ml_mrc"
 for header in "header-" "data-"
 do
-for fn in $ml_tmp_result_dir/*
+for fn in "$ml_tmp_result_dir"/*
 do
  sum_file=""
  time_unit=""
  capacity=""
+  target_cf_name=""
  if [[ $fn == *"timeline"* ]]; then
    tmpfn="$fn"
    IFS='-' read -ra elements <<< "$tmpfn"
@ -79,24 +93,43 @@ do
    done
    time_unit_index=$((time_unit_index+1))
    capacity_index=$((time_unit_index+2))
+    target_cf_name_index=$((time_unit_index+3))
    time_unit="${elements[$time_unit_index]}_"
    capacity="${elements[$capacity_index]}_"
+    target_cf_name="${elements[$target_cf_name_index]}_"
  fi

-  if [[ $fn == "${header}ml-policy-timeline"* ]]; then
-    sum_file="$result_dir/ml_${capacity}${time_unit}policy_timeline"
+  if [[ $fn == *"${header}ml-policy-timeline"* ]]; then
+    sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}policy_timeline"
+  fi
+  if [[ $fn == *"${header}ml-policy-ratio-timeline"* ]]; then
+    sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}policy_ratio_timeline"
  fi
-  if [[ $fn == "${header}ml-policy-ratio-timeline"* ]]; then
-    sum_file="$result_dir/ml_${capacity}${time_unit}policy_ratio_timeline"
+  if [[ $fn == *"${header}ml-miss-timeline"* ]]; then
+    sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}miss_timeline"
  fi
-  if [[ $fn == "${header}ml-miss-timeline"* ]]; then
-    sum_file="$result_dir/ml_${capacity}${time_unit}miss_timeline"
+  if [[ $fn == *"${header}ml-miss-ratio-timeline"* ]]; then
+    sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}miss_ratio_timeline"
+  fi
+  if [[ $fn == *"${header}ml-mrc"* ]]; then
+    tmpfn="$fn"
+    IFS='-' read -ra elements <<< "$tmpfn"
+    target_cf_name=${elements[-1]}
+    sum_file="${result_dir}/ml_${target_cf_name}_mrc"
  fi
-  if [[ $fn == "${header}ml-miss-ratio-timeline"* ]]; then
-    sum_file="$result_dir/ml_${capacity}${time_unit}miss_ratio_timeline"
+  if [[ $fn == *"${header}ml-avgmb"* ]]; then
+    tmpfn="$fn"
+    IFS='-' read -ra elements <<< "$tmpfn"
+    time_unit=${elements[3]}
+    target_cf_name=${elements[-1]}
+    sum_file="${result_dir}/ml_${time_unit}_${target_cf_name}_avgmb"
  fi
-  if [[ $fn == "${header}ml-mrc"* ]]; then
-    sum_file="$mrc_file"
+  if [[ $fn == *"${header}ml-p95mb"* ]]; then
+    tmpfn="$fn"
+    IFS='-' read -ra elements <<< "$tmpfn"
+    time_unit=${elements[3]}
+    target_cf_name=${elements[-1]}
+    sum_file="${result_dir}/ml_${time_unit}_${target_cf_name}_p95mb"
  fi
  if [[ $sum_file == "" ]]; then
    continue
@ -106,13 +139,18 @@ do
      continue
    fi
  fi
-  cat "$ml_tmp_result_dir/$fn" >> "$sum_file"
+  cat "$fn" >> "$sum_file"
 done
 done

 echo "Done"
-# Sort MRC file by cache_type and cache_size.
-tmp_file="$result_dir/tmp_mrc"
-cat "$mrc_file" | sort -t ',' -k1,1 -k4,4n > "$tmp_file"
-cat "$tmp_file" > "$mrc_file"
-rm -rf "$tmp_file"
+for fn in $result_dir/*
+do
+  if [[ $fn == *"_mrc" || $fn == *"_avgmb" || $fn == *"_p95mb" ]]; then
+    # Sort MRC file by cache_type and cache_size.
+    tmp_file="$result_dir/tmp_mrc"
+    cat "$fn" | sort -t ',' -k1,1 -k4,4n > "$tmp_file"
+    cat "$tmp_file" > "$fn"
+    rm -rf "$tmp_file"
+  fi
+done
--- a/tools/block_cache_analyzer/block_cache_pysim_test.py
+++ b/tools/block_cache_analyzer/block_cache_pysim_test.py
@ -1,17 +1,30 @@
 #!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

+import os
 import random
+import sys

 from block_cache_pysim import (
+    ARCCache,
+    CacheEntry,
+    GDSizeCache,
    HashTable,
+    HyperbolicPolicy,
    LFUPolicy,
    LinUCBCache,
+    LRUCache,
    LRUPolicy,
    MRUPolicy,
+    OPTCache,
+    OPTCacheEntry,
    ThompsonSamplingCache,
+    TraceCache,
    TraceRecord,
+    create_cache,
+    kMicrosInSecond,
    kSampleSize,
+    run,
 )


@ -33,30 +46,44 @@ def test_hash_table():
    records = 100
    for i in range(n):
        key_id = random.randint(0, records)
+        v = random.randint(0, records)
        key = "k{}".format(key_id)
-        value = "v{}".format(key_id)
-        action = random.randint(0, 2)
-        # print "{}:{}:{}".format(action, key, value)
+        value = CacheEntry(v, v, v, v, v, v, v)
+        action = random.randint(0, 10)
        assert len(truth_map) == table.elements, "{} {} {}".format(
            len(truth_map), table.elements, i
        )
-        if action == 0:
-            table.insert(key, key_id, value)
-            truth_map[key] = value
-        elif action == 1:
+        if action <= 8:
            if key in truth_map:
                assert table.lookup(key, key_id) is not None
-                assert truth_map[key] == table.lookup(key, key_id)
+                assert truth_map[key].value_size == table.lookup(key, key_id).value_size
            else:
                assert table.lookup(key, key_id) is None
+            table.insert(key, key_id, value)
+            truth_map[key] = value
        else:
-            table.delete(key, key_id)
+            deleted = table.delete(key, key_id)
+            if deleted:
+                assert key in truth_map
            if key in truth_map:
                del truth_map[key]
+
+    # Check all keys are unique in the sample set.
+    for _i in range(10):
+        samples = table.random_sample(kSampleSize)
+        unique_keys = {}
+        for sample in samples:
+            unique_keys[sample.key] = True
+        assert len(samples) == len(unique_keys)
+
+    assert len(table) == len(truth_map)
+    for key in truth_map:
+        assert table.lookup(key, int(key[1:])) is not None
+        assert truth_map[key].value_size == table.lookup(key, int(key[1:])).value_size
    print("Test hash table: Success")


-def assert_metrics(cache, expected_value):
+def assert_metrics(cache, expected_value, expected_value_size=1, custom_hashtable=True):
    assert cache.used_size == expected_value[0], "Expected {}, Actual {}".format(
        expected_value[0], cache.used_size
    )
@ -70,24 +97,35 @@ def assert_metrics(cache, expected_value):
    ), "Expected {}, Actual {}".format(
        expected_value[2], cache.miss_ratio_stats.num_misses
    )
-    assert cache.table.elements == len(expected_value[3]) + len(
+    assert len(cache.table) == len(expected_value[3]) + len(
        expected_value[4]
    ), "Expected {}, Actual {}".format(
        len(expected_value[3]) + len(expected_value[4]), cache.table.elements
    )
    for expeceted_k in expected_value[3]:
-        val = cache.table.lookup("b{}".format(expeceted_k), expeceted_k)
-        assert val is not None
-        assert val.value_size == 1
+        if custom_hashtable:
+            val = cache.table.lookup("b{}".format(expeceted_k), expeceted_k)
+        else:
+            val = cache.table["b{}".format(expeceted_k)]
+        assert val is not None, "Expected {} Actual: Not Exist {}, Table: {}".format(
+            expeceted_k, expected_value, cache.table
+        )
+        assert val.value_size == expected_value_size
    for expeceted_k in expected_value[4]:
-        val = cache.table.lookup("g{}".format(expeceted_k), expeceted_k)
+        if custom_hashtable:
+            val = cache.table.lookup("g0-{}".format(expeceted_k), expeceted_k)
+        else:
+            val = cache.table["g0-{}".format(expeceted_k)]
        assert val is not None
-        assert val.value_size == 1
+        assert val.value_size == expected_value_size


 # Access k1, k1, k2, k3, k3, k3, k4
-def test_cache(policies, expected_value):
-    cache = ThompsonSamplingCache(3, False, policies)
+# When k4 is inserted,
+#   LRU should evict k1.
+#   LFU should evict k2.
+#   MRU should evict k3.
+def test_cache(cache, expected_value, custom_hashtable=True):
    k1 = TraceRecord(
        access_time=0,
        block_id=1,
@ -103,6 +141,14 @@ def test_cache(policies, expected_value):
        key_id=1,
        kv_size=5,
        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=0,
    )
    k2 = TraceRecord(
        access_time=1,
@ -119,6 +165,14 @@ def test_cache(policies, expected_value):
        key_id=1,
        kv_size=5,
        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=0,
    )
    k3 = TraceRecord(
        access_time=2,
@ -135,6 +189,14 @@ def test_cache(policies, expected_value):
        key_id=1,
        kv_size=5,
        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=0,
    )
    k4 = TraceRecord(
        access_time=3,
@ -151,6 +213,14 @@ def test_cache(policies, expected_value):
        key_id=1,
        kv_size=5,
        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=0,
    )
    sequence = [k1, k1, k2, k3, k3, k3]
    index = 0
@ -167,20 +237,29 @@ def test_cache(policies, expected_value):
    expected_values.append([3, 5, 3, [1, 2, 3], []])
    # Access k3, hit.
    expected_values.append([3, 6, 3, [1, 2, 3], []])
+    access_time = 0
    for access in sequence:
+        access.access_time = access_time
        cache.access(access)
-        assert_metrics(cache, expected_values[index])
+        assert_metrics(
+            cache,
+            expected_values[index],
+            expected_value_size=1,
+            custom_hashtable=custom_hashtable,
+        )
+        access_time += 1
        index += 1
+    k4.access_time = access_time
    cache.access(k4)
-    assert_metrics(cache, expected_value)
+    assert_metrics(
+        cache, expected_value, expected_value_size=1, custom_hashtable=custom_hashtable
+    )


-def test_lru_cache():
+def test_lru_cache(cache, custom_hashtable):
    print("Test LRU cache")
-    policies = []
-    policies.append(LRUPolicy())
    # Access k4, miss. evict k1
-    test_cache(policies, [3, 7, 4, [2, 3, 4], []])
+    test_cache(cache, [3, 7, 4, [2, 3, 4], []], custom_hashtable)
    print("Test LRU cache: Success")


@ -189,7 +268,10 @@ def test_mru_cache():
    policies = []
    policies.append(MRUPolicy())
    # Access k4, miss. evict k3
-    test_cache(policies, [3, 7, 4, [1, 2, 4], []])
+    test_cache(
+        ThompsonSamplingCache(3, False, policies, cost_class_label=None),
+        [3, 7, 4, [1, 2, 4], []],
+    )
    print("Test MRU cache: Success")


@ -198,22 +280,36 @@ def test_lfu_cache():
    policies = []
    policies.append(LFUPolicy())
    # Access k4, miss. evict k2
-    test_cache(policies, [3, 7, 4, [1, 3, 4], []])
+    test_cache(
+        ThompsonSamplingCache(3, False, policies, cost_class_label=None),
+        [3, 7, 4, [1, 3, 4], []],
+    )
    print("Test LFU cache: Success")


 def test_mix(cache):
    print("Test Mix {} cache".format(cache.cache_name()))
    n = 100000
-    records = 199
+    records = 100
+    block_size_table = {}
+    trace_num_misses = 0
    for i in range(n):
        key_id = random.randint(0, records)
        vs = random.randint(0, 10)
+        now = i * kMicrosInSecond
+        block_size = vs
+        if key_id in block_size_table:
+            block_size = block_size_table[key_id]
+        else:
+            block_size_table[key_id] = block_size
+        is_hit = key_id % 2
+        if is_hit == 0:
+            trace_num_misses += 1
        k = TraceRecord(
-            access_time=i,
+            access_time=now,
            block_id=key_id,
            block_type=1,
-            block_size=vs,
+            block_size=block_size,
            cf_id=0,
            cf_name="",
            level=0,
@ -223,13 +319,117 @@ def test_mix(cache):
            get_id=key_id,
            key_id=key_id,
            kv_size=5,
-            is_hit=1,
+            is_hit=is_hit,
+            referenced_key_exist_in_block=1,
+            num_keys_in_block=0,
+            table_id=0,
+            seq_number=0,
+            block_key_size=0,
+            key_size=0,
+            block_offset_in_file=0,
+            next_access_seq_no=vs,
        )
        cache.access(k)
    assert cache.miss_ratio_stats.miss_ratio() > 0
+    if cache.cache_name() == "Trace":
+        assert cache.miss_ratio_stats.num_accesses == n
+        assert cache.miss_ratio_stats.num_misses == trace_num_misses
+    else:
+        assert cache.used_size <= cache.cache_size
+        all_values = cache.table.values()
+        cached_size = 0
+        for value in all_values:
+            cached_size += value.value_size
+        assert cached_size == cache.used_size, "Expeced {} Actual {}".format(
+            cache.used_size, cached_size
+        )
    print("Test Mix {} cache: Success".format(cache.cache_name()))


+def test_end_to_end():
+    print("Test All caches")
+    n = 100000
+    nblocks = 1000
+    block_size = 16 * 1024
+    ncfs = 7
+    nlevels = 6
+    nfds = 100000
+    trace_file_path = "test_trace"
+    # All blocks are of the same size so that OPT must achieve the lowest miss
+    # ratio.
+    with open(trace_file_path, "w+") as trace_file:
+        access_records = ""
+        for i in range(n):
+            key_id = random.randint(0, nblocks)
+            cf_id = random.randint(0, ncfs)
+            level = random.randint(0, nlevels)
+            fd = random.randint(0, nfds)
+            now = i * kMicrosInSecond
+            access_record = ""
+            access_record += "{},".format(now)
+            access_record += "{},".format(key_id)
+            access_record += "{},".format(9)  # block type
+            access_record += "{},".format(block_size)  # block size
+            access_record += "{},".format(cf_id)
+            access_record += "cf_{},".format(cf_id)
+            access_record += "{},".format(level)
+            access_record += "{},".format(fd)
+            access_record += "{},".format(key_id % 3)  # caller
+            access_record += "{},".format(0)  # no insert
+            access_record += "{},".format(i)  # get_id
+            access_record += "{},".format(i)  # key_id
+            access_record += "{},".format(100)  # kv_size
+            access_record += "{},".format(1)  # is_hit
+            access_record += "{},".format(1)  # referenced_key_exist_in_block
+            access_record += "{},".format(10)  # num_keys_in_block
+            access_record += "{},".format(1)  # table_id
+            access_record += "{},".format(0)  # seq_number
+            access_record += "{},".format(10)  # block key size
+            access_record += "{},".format(20)  # key size
+            access_record += "{},".format(0)  # block offset
+            access_record = access_record[:-1]
+            access_records += access_record + "\n"
+        trace_file.write(access_records)
+
+    print("Test All caches: Start testing caches")
+    cache_size = block_size * nblocks / 10
+    downsample_size = 1
+    cache_ms = {}
+    for cache_type in [
+        "ts",
+        "opt",
+        "lru",
+        "pylru",
+        "linucb",
+        "gdsize",
+        "pyccbt",
+        "pycctbbt",
+    ]:
+        cache = create_cache(cache_type, cache_size, downsample_size)
+        run(trace_file_path, cache_type, cache, 0, -1, "all")
+        cache_ms[cache_type] = cache
+        assert cache.miss_ratio_stats.num_accesses == n
+
+    for cache_type in cache_ms:
+        cache = cache_ms[cache_type]
+        ms = cache.miss_ratio_stats.miss_ratio()
+        assert ms <= 100.0 and ms >= 0.0
+        # OPT should perform the best.
+        assert cache_ms["opt"].miss_ratio_stats.miss_ratio() <= ms
+        assert cache.used_size <= cache.cache_size
+        all_values = cache.table.values()
+        cached_size = 0
+        for value in all_values:
+            cached_size += value.value_size
+        assert cached_size == cache.used_size, "Expeced {} Actual {}".format(
+            cache.used_size, cached_size
+        )
+        print("Test All {}: Success".format(cache.cache_name()))
+
+    os.remove(trace_file_path)
+    print("Test All: Success")
+
+
 def test_hybrid(cache):
    print("Test {} cache".format(cache.cache_name()))
    k = TraceRecord(
@ -247,6 +447,14 @@ def test_hybrid(cache):
        key_id=1,
        kv_size=0,  # no size.
        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=0,
    )
    cache.access(k)  # Expect a miss.
    # used size, num accesses, num misses, hash table size, blocks, get keys.
@ -319,22 +527,208 @@ def test_hybrid(cache):
    k.key_id = 4  # Same row key and should not be inserted again.
    k.kv_size = 1
    cache.access(k)
-    assert_metrics(cache, [16, 103, 99, [i for i in range(101 - kSampleSize, 101)], []])
+    assert_metrics(
+        cache, [kSampleSize, 103, 99, [i for i in range(101 - kSampleSize, 101)], []]
+    )
    print("Test {} cache: Success".format(cache.cache_name()))


+def test_opt_cache():
+    print("Test OPT cache")
+    cache = OPTCache(3)
+    # seq:         0,  1,  2,  3,  4,  5,  6,  7,  8
+    # key:         k1, k2, k3, k4, k5, k6, k7, k1, k8
+    # next_access: 7,  19, 18, M,  M,  17, 16, 25, M
+    k = TraceRecord(
+        access_time=0,
+        block_id=1,
+        block_type=1,
+        block_size=1,
+        cf_id=0,
+        cf_name="",
+        level=0,
+        fd=0,
+        caller=1,
+        no_insert=0,
+        get_id=1,  # the first get request.
+        key_id=1,
+        kv_size=0,  # no size.
+        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=7,
+    )
+    cache.access(k)
+    assert_metrics(
+        cache, [1, 1, 1, [1], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 2
+    k.next_access_seq_no = 19
+    cache.access(k)
+    assert_metrics(
+        cache, [2, 2, 2, [1, 2], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 3
+    k.next_access_seq_no = 18
+    cache.access(k)
+    assert_metrics(
+        cache, [3, 3, 3, [1, 2, 3], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 4
+    k.next_access_seq_no = sys.maxsize  # Never accessed again.
+    cache.access(k)
+    # Evict 2 since its next access 19 is the furthest in the future.
+    assert_metrics(
+        cache, [3, 4, 4, [1, 3, 4], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 5
+    k.next_access_seq_no = sys.maxsize  # Never accessed again.
+    cache.access(k)
+    # Evict 4 since its next access MAXINT is the furthest in the future.
+    assert_metrics(
+        cache, [3, 5, 5, [1, 3, 5], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 6
+    k.next_access_seq_no = 17
+    cache.access(k)
+    # Evict 5 since its next access MAXINT is the furthest in the future.
+    assert_metrics(
+        cache, [3, 6, 6, [1, 3, 6], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 7
+    k.next_access_seq_no = 16
+    cache.access(k)
+    # Evict 3 since its next access 18 is the furthest in the future.
+    assert_metrics(
+        cache, [3, 7, 7, [1, 6, 7], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 1
+    k.next_access_seq_no = 25
+    cache.access(k)
+    assert_metrics(
+        cache, [3, 8, 7, [1, 6, 7], []], expected_value_size=1, custom_hashtable=False
+    )
+    k.access_time += 1
+    k.block_id = 8
+    k.next_access_seq_no = sys.maxsize
+    cache.access(k)
+    # Evict 1 since its next access 25 is the furthest in the future.
+    assert_metrics(
+        cache, [3, 9, 8, [6, 7, 8], []], expected_value_size=1, custom_hashtable=False
+    )
+
+    # Insert a large kv pair to evict all keys.
+    k.access_time += 1
+    k.block_id = 10
+    k.block_size = 3
+    k.next_access_seq_no = sys.maxsize
+    cache.access(k)
+    assert_metrics(
+        cache, [3, 10, 9, [10], []], expected_value_size=3, custom_hashtable=False
+    )
+    print("Test OPT cache: Success")
+
+
+def test_trace_cache():
+    print("Test trace cache")
+    cache = TraceCache(0)
+    k = TraceRecord(
+        access_time=0,
+        block_id=1,
+        block_type=1,
+        block_size=1,
+        cf_id=0,
+        cf_name="",
+        level=0,
+        fd=0,
+        caller=1,
+        no_insert=0,
+        get_id=1,
+        key_id=1,
+        kv_size=0,
+        is_hit=1,
+        referenced_key_exist_in_block=1,
+        num_keys_in_block=0,
+        table_id=0,
+        seq_number=0,
+        block_key_size=0,
+        key_size=0,
+        block_offset_in_file=0,
+        next_access_seq_no=7,
+    )
+    cache.access(k)
+    assert cache.miss_ratio_stats.num_accesses == 1
+    assert cache.miss_ratio_stats.num_misses == 0
+    k.is_hit = 0
+    cache.access(k)
+    assert cache.miss_ratio_stats.num_accesses == 2
+    assert cache.miss_ratio_stats.num_misses == 1
+    print("Test trace cache: Success")
+
+
 if __name__ == "__main__":
-    policies = []
-    policies.append(MRUPolicy())
-    policies.append(LRUPolicy())
-    policies.append(LFUPolicy())
    test_hash_table()
-    test_lru_cache()
+    test_trace_cache()
+    test_opt_cache()
+    test_lru_cache(
+        ThompsonSamplingCache(
+            3, enable_cache_row_key=0, policies=[LRUPolicy()], cost_class_label=None
+        ),
+        custom_hashtable=True,
+    )
+    test_lru_cache(LRUCache(3, enable_cache_row_key=0), custom_hashtable=False)
    test_mru_cache()
    test_lfu_cache()
-    test_mix(ThompsonSamplingCache(100, False, policies))
-    test_mix(ThompsonSamplingCache(100, True, policies))
-    test_mix(LinUCBCache(100, False, policies))
-    test_mix(LinUCBCache(100, True, policies))
-    test_hybrid(ThompsonSamplingCache(kSampleSize, True, [LRUPolicy()]))
-    test_hybrid(LinUCBCache(kSampleSize, True, [LRUPolicy()]))
+    test_hybrid(
+        ThompsonSamplingCache(
+            kSampleSize,
+            enable_cache_row_key=1,
+            policies=[LRUPolicy()],
+            cost_class_label=None,
+        )
+    )
+    test_hybrid(
+        LinUCBCache(
+            kSampleSize,
+            enable_cache_row_key=1,
+            policies=[LRUPolicy()],
+            cost_class_label=None,
+        )
+    )
+    for cache_type in [
+        "ts",
+        "opt",
+        "arc",
+        "pylfu",
+        "pymru",
+        "trace",
+        "pyhb",
+        "lru",
+        "pylru",
+        "linucb",
+        "gdsize",
+        "pycctbbt",
+        "pycctb",
+        "pyccbt",
+    ]:
+        for enable_row_cache in [0, 1, 2]:
+            cache_type_str = cache_type
+            if cache_type != "opt" and cache_type != "trace":
+                if enable_row_cache == 1:
+                    cache_type_str += "_hybrid"
+                elif enable_row_cache == 2:
+                    cache_type_str += "_hybridn"
+            test_mix(create_cache(cache_type_str, cache_size=100, downsample_size=1))
+    test_end_to_end()
--- a/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
+++ b/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
@ -127,6 +127,9 @@ DEFINE_string(analyze_get_spatial_locality_labels, "",
              "Group data blocks using these labels.");
 DEFINE_string(analyze_get_spatial_locality_buckets, "",
              "Group data blocks by their statistics using these buckets.");
+DEFINE_string(skew_labels, "",
+              "Group the access count of a block using these labels.");
+DEFINE_string(skew_buckets, "", "Group the skew labels using these buckets.");
 DEFINE_bool(mrc_only, false,
            "Evaluate alternative cache policies only. When this flag is true, "
            "the analyzer does NOT maintain states of each block in memory for "
@ -147,6 +150,7 @@ namespace {

 const std::string kMissRatioCurveFileName = "mrc";
 const std::string kGroupbyBlock = "block";
+const std::string kGroupbyTable = "table";
 const std::string kGroupbyColumnFamily = "cf";
 const std::string kGroupbySSTFile = "sst";
 const std::string kGroupbyBlockType = "bt";
@ -164,6 +168,7 @@ const std::string kSupportedCacheNames =
 // The suffix for the generated csv files.
 const std::string kFileNameSuffixMissRatioTimeline = "miss_ratio_timeline";
 const std::string kFileNameSuffixMissTimeline = "miss_timeline";
+const std::string kFileNameSuffixSkew = "skewness";
 const std::string kFileNameSuffixAccessTimeline = "access_timeline";
 const std::string kFileNameSuffixCorrelation = "correlation_input";
 const std::string kFileNameSuffixAvgReuseIntervalNaccesses =
@ -540,6 +545,62 @@ void BlockCacheTraceAnalyzer::WriteMissTimeline(uint64_t time_unit) const {
  }
 }

+void BlockCacheTraceAnalyzer::WriteSkewness(
+    const std::string& label_str, const std::vector<uint64_t>& percent_buckets,
+    TraceType target_block_type) const {
+  std::set<std::string> labels = ParseLabelStr(label_str);
+  std::map<std::string, uint64_t> label_naccesses;
+  uint64_t total_naccesses = 0;
+  auto block_callback = [&](const std::string& cf_name, uint64_t fd,
+                            uint32_t level, TraceType type,
+                            const std::string& /*block_key*/, uint64_t block_id,
+                            const BlockAccessInfo& block) {
+    if (target_block_type != TraceType::kTraceMax &&
+        target_block_type != type) {
+      return;
+    }
+    const std::string label = BuildLabel(
+        labels, cf_name, fd, level, type,
+        TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
+    label_naccesses[label] += block.num_accesses;
+    total_naccesses += block.num_accesses;
+  };
+  TraverseBlocks(block_callback, &labels);
+  std::map<std::string, std::map<uint64_t, uint64_t>> label_bucket_naccesses;
+  std::vector<std::pair<std::string, uint64_t>> pairs;
+  for (auto const& itr : label_naccesses) {
+    pairs.push_back(itr);
+  }
+  // Sort in descending order.
+  sort(
+      pairs.begin(), pairs.end(),
+      [=](std::pair<std::string, uint64_t>& a,
+          std::pair<std::string, uint64_t>& b) { return b.second < a.second; });
+
+  size_t prev_start_index = 0;
+  for (auto const& percent : percent_buckets) {
+    label_bucket_naccesses[label_str][percent] = 0;
+    size_t end_index = 0;
+    if (percent == port::kMaxUint64) {
+      end_index = label_naccesses.size();
+    } else {
+      end_index = percent * label_naccesses.size() / 100;
+    }
+    for (size_t i = prev_start_index; i < end_index; i++) {
+      label_bucket_naccesses[label_str][percent] += pairs[i].second;
+    }
+    prev_start_index = end_index;
+  }
+  std::string filename_suffix;
+  if (target_block_type != TraceType::kTraceMax) {
+    filename_suffix = block_type_to_string(target_block_type);
+    filename_suffix += "_";
+  }
+  filename_suffix += kFileNameSuffixSkew;
+  WriteStatsToFile(label_str, percent_buckets, filename_suffix,
+                   label_bucket_naccesses, total_naccesses);
+}
+
 void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
    const std::string& label_str, uint32_t max_number_of_values) const {
  std::set<std::string> labels = ParseLabelStr(label_str);
@ -549,12 +610,16 @@ void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
      [&](const std::string& cf_name, uint64_t fd, uint32_t level,
          TraceType block_type, const std::string& /*block_key*/,
          uint64_t /*block_key_id*/, const BlockAccessInfo& block) {
+        if (block.table_id == 0 && labels.find(kGroupbyTable) != labels.end()) {
+          // We only know table id information for get requests.
+          return;
+        }
        if (labels.find(kGroupbyCaller) != labels.end()) {
          // Group by caller.
          for (auto const& caller_map : block.caller_access_timeline) {
            const std::string label =
                BuildLabel(labels, cf_name, fd, level, block_type,
-                           caller_map.first, /*block_id=*/0);
+                           caller_map.first, /*block_id=*/0, block);
            auto it = block.caller_access_sequence__number_timeline.find(
                caller_map.first);
            assert(it != block.caller_access_sequence__number_timeline.end());
@ -563,14 +628,15 @@ void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
          }
          return;
        }
-        const std::string label = BuildLabel(
-            labels, cf_name, fd, level, block_type,
-            TableReaderCaller::kMaxBlockCacheLookupCaller, /*block_id=*/0);
+        const std::string label =
+            BuildLabel(labels, cf_name, fd, level, block_type,
+                       TableReaderCaller::kMaxBlockCacheLookupCaller,
+                       /*block_id=*/0, block);
        UpdateFeatureVectors(block.access_sequence_number_timeline,
                             block.access_timeline, label, &label_features,
                             &label_predictions);
      };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);
  WriteCorrelationFeaturesToFile(label_str, label_features, label_predictions,
                                 max_number_of_values);
 }
@ -656,7 +722,7 @@ std::set<std::string> BlockCacheTraceAnalyzer::ParseLabelStr(
 std::string BlockCacheTraceAnalyzer::BuildLabel(
    const std::set<std::string>& labels, const std::string& cf_name,
    uint64_t fd, uint32_t level, TraceType type, TableReaderCaller caller,
-    uint64_t block_key) const {
+    uint64_t block_key, const BlockAccessInfo& block) const {
  std::map<std::string, std::string> label_value_map;
  label_value_map[kGroupbyAll] = kGroupbyAll;
  label_value_map[kGroupbyLevel] = std::to_string(level);
@ -665,6 +731,7 @@ std::string BlockCacheTraceAnalyzer::BuildLabel(
  label_value_map[kGroupbyBlockType] = block_type_to_string(type);
  label_value_map[kGroupbyColumnFamily] = cf_name;
  label_value_map[kGroupbyBlock] = std::to_string(block_key);
+  label_value_map[kGroupbyTable] = std::to_string(block.table_id);
  // Concatenate the label values.
  std::string label;
  for (auto const& l : labels) {
@ -683,7 +750,8 @@ void BlockCacheTraceAnalyzer::TraverseBlocks(
                       const std::string& /*block_key*/,
                       uint64_t /*block_key_id*/,
                       const BlockAccessInfo& /*block_access_info*/)>
-        block_callback) const {
+        block_callback,
+    std::set<std::string>* labels) const {
  for (auto const& cf_aggregates : cf_aggregates_map_) {
    // Stats per column family.
    const std::string& cf_name = cf_aggregates.first;
@ -698,6 +766,11 @@ void BlockCacheTraceAnalyzer::TraverseBlocks(
        for (auto const& block_access_info :
             block_type_aggregates.second.block_access_info_map) {
          // Stats per block.
+          if (labels && block_access_info.second.table_id == 0 &&
+              labels->find(kGroupbyTable) != labels->end()) {
+            // We only know table id information for get requests.
+            return;
+          }
          block_callback(cf_name, fd, level, type, block_access_info.first,
                         block_access_info.second.block_id,
                         block_access_info.second);
@ -733,7 +806,7 @@ void BlockCacheTraceAnalyzer::WriteGetSpatialLocality(
    }
    const std::string label =
        BuildLabel(labels, cf_name, fd, level, TraceType::kBlockTraceDataBlock,
-                   TableReaderCaller::kUserGet, /*block_id=*/0);
+                   TableReaderCaller::kUserGet, /*block_id=*/0, block);

    const uint64_t percent_referenced_for_existing_keys =
        static_cast<uint64_t>(std::max(
@ -761,7 +834,7 @@ void BlockCacheTraceAnalyzer::WriteGetSpatialLocality(
        ->second += 1;
    nblocks += 1;
  };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);
  WriteStatsToFile(label_str, percent_buckets, kFileNameSuffixPercentRefKeys,
                   label_pnrefkeys_nblocks, nblocks);
  WriteStatsToFile(label_str, percent_buckets,
@ -792,7 +865,7 @@ void BlockCacheTraceAnalyzer::WriteAccessTimeline(const std::string& label_str,
        continue;
      }
      const std::string label =
-          BuildLabel(labels, cf_name, fd, level, type, caller, block_id);
+          BuildLabel(labels, cf_name, fd, level, type, caller, block_id, block);
      for (auto const& naccess : timeline.second) {
        const uint64_t timestamp = naccess.first / time_unit;
        const uint64_t num = naccess.second;
@ -806,7 +879,7 @@ void BlockCacheTraceAnalyzer::WriteAccessTimeline(const std::string& label_str,
      access_count_block_id_map[naccesses].push_back(std::to_string(block_id));
    }
  };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);

  // We have label_access_timeline now. Write them into a file.
  const std::string user_access_prefix =
@ -877,9 +950,9 @@ void BlockCacheTraceAnalyzer::WriteReuseDistance(
                            uint32_t level, TraceType type,
                            const std::string& /*block_key*/, uint64_t block_id,
                            const BlockAccessInfo& block) {
-    const std::string label =
-        BuildLabel(labels, cf_name, fd, level, type,
-                   TableReaderCaller::kMaxBlockCacheLookupCaller, block_id);
+    const std::string label = BuildLabel(
+        labels, cf_name, fd, level, type,
+        TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
    if (label_distance_num_reuses.find(label) ==
        label_distance_num_reuses.end()) {
      // The first time we encounter this label.
@ -894,7 +967,7 @@ void BlockCacheTraceAnalyzer::WriteReuseDistance(
      total_num_reuses += reuse_distance.second;
    }
  };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);
  // We have label_naccesses and label_distance_num_reuses now. Write them into
  // a file.
  const std::string output_path =
@ -1016,17 +1089,17 @@ void BlockCacheTraceAnalyzer::WriteReuseInterval(
    if (labels.find(kGroupbyCaller) != labels.end()) {
      for (auto const& timeline : block.caller_num_accesses_timeline) {
        const TableReaderCaller caller = timeline.first;
-        const std::string label =
-            BuildLabel(labels, cf_name, fd, level, type, caller, block_id);
+        const std::string label = BuildLabel(labels, cf_name, fd, level, type,
+                                             caller, block_id, block);
        UpdateReuseIntervalStats(label, time_buckets, timeline.second,
                                 &label_time_num_reuses, &total_num_reuses);
      }
      return;
    }
    // Does not group by caller so we need to flatten the access timeline.
-    const std::string label =
-        BuildLabel(labels, cf_name, fd, level, type,
-                   TableReaderCaller::kMaxBlockCacheLookupCaller, block_id);
+    const std::string label = BuildLabel(
+        labels, cf_name, fd, level, type,
+        TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
    std::map<uint64_t, uint64_t> timeline;
    for (auto const& caller_timeline : block.caller_num_accesses_timeline) {
      for (auto const& time_naccess : caller_timeline.second) {
@ -1045,7 +1118,7 @@ void BlockCacheTraceAnalyzer::WriteReuseInterval(
    label_avg_reuse_naccesses[label].upper_bound(avg_reuse_interval)->second +=
        block.num_accesses;
  };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);

  // Write the stats into files.
  WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseInterval,
@ -1074,9 +1147,9 @@ void BlockCacheTraceAnalyzer::WriteReuseLifetime(
    } else {
      lifetime = port::kMaxUint64 - 1;
    }
-    const std::string label =
-        BuildLabel(labels, cf_name, fd, level, type,
-                   TableReaderCaller::kMaxBlockCacheLookupCaller, block_id);
+    const std::string label = BuildLabel(
+        labels, cf_name, fd, level, type,
+        TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);

    if (label_lifetime_nblocks.find(label) == label_lifetime_nblocks.end()) {
      // The first time we encounter this label.
@ -1087,7 +1160,7 @@ void BlockCacheTraceAnalyzer::WriteReuseLifetime(
    label_lifetime_nblocks[label].upper_bound(lifetime)->second += 1;
    total_nblocks += 1;
  };
-  TraverseBlocks(block_callback);
+  TraverseBlocks(block_callback, &labels);
  WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseLifetime,
                   label_lifetime_nblocks, total_nblocks);
 }
@ -1396,11 +1469,17 @@ Status BlockCacheTraceAnalyzer::WriteHumanReadableTraceRecord(
  int ret = snprintf(
      trace_record_buffer_, sizeof(trace_record_buffer_),
      "%" PRIu64 ",%" PRIu64 ",%u,%" PRIu64 ",%" PRIu64 ",%s,%" PRIu32
-      ",%" PRIu64 ",%u,%u,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%u\n",
+      ",%" PRIu64 ",%u,%u,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%u,%u,%" PRIu64
+      ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
      access.access_timestamp, block_id, access.block_type, access.block_size,
      access.cf_id, access.cf_name.c_str(), access.level, access.sst_fd_number,
      access.caller, access.no_insert, access.get_id, get_key_id,
-      access.referenced_data_size, access.is_cache_hit);
+      access.referenced_data_size, access.is_cache_hit,
+      access.referenced_key_exist_in_block, access.num_keys_in_block,
+      BlockCacheTraceHelper::GetTableId(access),
+      BlockCacheTraceHelper::GetSequenceNumber(access), access.block_key.size(),
+      access.referenced_key.size(),
+      BlockCacheTraceHelper::GetBlockOffsetInFile(access));
  if (ret < 0) {
    return Status::IOError("failed to format the output");
  }
@ -1432,13 +1511,13 @@ Status BlockCacheTraceAnalyzer::RecordAccess(
  uint64_t get_key_id = 0;
  if (access.caller == TableReaderCaller::kUserGet &&
      access.get_id != BlockCacheTraceHelper::kReservedGetId) {
-    std::string row_key = BlockCacheTraceHelper::ComputeRowKey(access);
-    if (get_key_info_map_.find(row_key) == get_key_info_map_.end()) {
-      get_key_info_map_[row_key].key_id = unique_get_key_id_;
-      get_key_id = unique_get_key_id_;
+    std::string user_key = ExtractUserKey(access.referenced_key).ToString();
+    if (get_key_info_map_.find(user_key) == get_key_info_map_.end()) {
+      get_key_info_map_[user_key].key_id = unique_get_key_id_;
      unique_get_key_id_++;
    }
-    get_key_info_map_[row_key].AddAccess(access, access_sequence_number_);
+    get_key_id = get_key_info_map_[user_key].key_id;
+    get_key_info_map_[user_key].AddAccess(access, access_sequence_number_);
  }

  if (compute_reuse_distance_) {
@ -2224,6 +2303,25 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) {
    analyzer.WriteCorrelationFeaturesForGet(
        FLAGS_analyze_correlation_coefficients_max_number_of_values);
  }
+
+  if (!FLAGS_skew_labels.empty() && !FLAGS_skew_buckets.empty()) {
+    std::vector<uint64_t> buckets = parse_buckets(FLAGS_skew_buckets);
+    std::stringstream ss(FLAGS_skew_labels);
+    while (ss.good()) {
+      std::string label;
+      getline(ss, label, ',');
+      if (label.find("block") != std::string::npos) {
+        analyzer.WriteSkewness(label, buckets,
+                               TraceType::kBlockTraceIndexBlock);
+        analyzer.WriteSkewness(label, buckets,
+                               TraceType::kBlockTraceFilterBlock);
+        analyzer.WriteSkewness(label, buckets, TraceType::kBlockTraceDataBlock);
+        analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
+      } else {
+        analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
+      }
+    }
+  }
  return 0;
 }

--- a/tools/block_cache_analyzer/block_cache_trace_analyzer.h
+++ b/tools/block_cache_analyzer/block_cache_trace_analyzer.h
@ -33,6 +33,8 @@ struct GetKeyInfo {
 // Statistics of a block.
 struct BlockAccessInfo {
  uint64_t block_id = 0;
+  uint64_t table_id = 0;
+  uint64_t block_offset = 0;
  uint64_t num_accesses = 0;
  uint64_t block_size = 0;
  uint64_t first_access_time = 0;
@ -73,6 +75,8 @@ struct BlockAccessInfo {
    if (first_access_time == 0) {
      first_access_time = access.access_timestamp;
    }
+    table_id = BlockCacheTraceHelper::GetTableId(access);
+    block_offset = BlockCacheTraceHelper::GetBlockOffsetInFile(access);
    last_access_time = access.access_timestamp;
    block_size = access.block_size;
    caller_num_access_map[access.caller]++;
@ -301,6 +305,10 @@ class BlockCacheTraceAnalyzer {

  void WriteCorrelationFeaturesForGet(uint32_t max_number_of_values) const;

+  void WriteSkewness(const std::string& label_str,
+                     const std::vector<uint64_t>& percent_buckets,
+                     TraceType target_block_type) const;
+
  const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
  TEST_cf_aggregates_map() const {
    return cf_aggregates_map_;
@ -312,7 +320,8 @@ class BlockCacheTraceAnalyzer {
  std::string BuildLabel(const std::set<std::string>& labels,
                         const std::string& cf_name, uint64_t fd,
                         uint32_t level, TraceType type,
-                         TableReaderCaller caller, uint64_t block_key) const;
+                         TableReaderCaller caller, uint64_t block_key,
+                         const BlockAccessInfo& block) const;

  void ComputeReuseDistance(BlockAccessInfo* info) const;

@ -341,7 +350,8 @@ class BlockCacheTraceAnalyzer {
                         const std::string& /*block_key*/,
                         uint64_t /*block_key_id*/,
                         const BlockAccessInfo& /*block_access_info*/)>
-          block_callback) const;
+          block_callback,
+      std::set<std::string>* labels = nullptr) const;

  void UpdateFeatureVectors(
      const std::vector<uint64_t>& access_sequence_number_timeline,
--- a/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc
+++ b/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc
@ -181,7 +181,9 @@ class BlockCacheTracerTest : public testing::Test {
            analyze_get_spatial_locality_labels_,
        "-analyze_get_spatial_locality_buckets=" +
            analyze_get_spatial_locality_buckets_,
-        "-analyze_correlation_coefficients_labels=all"};
+        "-analyze_correlation_coefficients_labels=all",
+        "-skew_labels=all",
+        "-skew_buckets=10,50,100"};
    char arg_buffer[kArgBufferSize];
    char* argv[kMaxArgCount];
    int argc = 0;
@ -331,6 +333,33 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
      }
    }
  }
+  {
+    // Validate the skewness csv file.
+    const std::string skewness_file_path = test_path_ + "/all_skewness";
+    std::ifstream skew_file(skewness_file_path);
+    // Read header.
+    std::string line;
+    ASSERT_TRUE(getline(skew_file, line));
+    std::stringstream ss(line);
+    double sum_percent = 0;
+    while (getline(skew_file, line)) {
+      std::stringstream ss_naccess(line);
+      std::string substr;
+      bool read_label = false;
+      while (ss_naccess.good()) {
+        ASSERT_TRUE(getline(ss_naccess, substr, ','));
+        if (!read_label) {
+          read_label = true;
+          continue;
+        }
+        sum_percent += ParseDouble(substr);
+      }
+    }
+    ASSERT_EQ(100.0, sum_percent);
+    ASSERT_FALSE(getline(skew_file, line));
+    skew_file.close();
+    ASSERT_OK(env_->DeleteFile(skewness_file_path));
+  }
  {
    // Validate the timeline csv files.
    const std::vector<std::string> time_units{"_60", "_3600"};
--- a/trace_replay/block_cache_tracer.cc
+++ b/trace_replay/block_cache_tracer.cc
@ -61,11 +61,40 @@ std::string BlockCacheTraceHelper::ComputeRowKey(
    return "";
  }
  Slice key = ExtractUserKey(access.referenced_key);
-  uint64_t seq_no = access.get_from_user_specified_snapshot == Boolean::kFalse
-                        ? 0
-                        : 1 + GetInternalKeySeqno(access.referenced_key);
-  return std::to_string(access.sst_fd_number) + "_" + key.ToString() + "_" +
-         std::to_string(seq_no);
+  return std::to_string(access.sst_fd_number) + "_" + key.ToString();
+}
+
+uint64_t BlockCacheTraceHelper::GetTableId(
+    const BlockCacheTraceRecord& access) {
+  if (!IsGetOrMultiGet(access.caller) || access.referenced_key.size() < 4) {
+    return 0;
+  }
+  return static_cast<uint64_t>(DecodeFixed32(access.referenced_key.data())) + 1;
+}
+
+uint64_t BlockCacheTraceHelper::GetSequenceNumber(
+    const BlockCacheTraceRecord& access) {
+  if (!IsGetOrMultiGet(access.caller)) {
+    return 0;
+  }
+  return access.get_from_user_specified_snapshot == Boolean::kFalse
+             ? 0
+             : 1 + GetInternalKeySeqno(access.referenced_key);
+}
+
+uint64_t BlockCacheTraceHelper::GetBlockOffsetInFile(
+    const BlockCacheTraceRecord& access) {
+  Slice input(access.block_key);
+  uint64_t offset = 0;
+  while (true) {
+    uint64_t tmp = 0;
+    if (GetVarint64(&input, &tmp)) {
+      offset = tmp;
+    } else {
+      break;
+    }
+  }
+  return offset;
 }

 BlockCacheTraceWriter::BlockCacheTraceWriter(
--- a/trace_replay/block_cache_tracer.h
+++ b/trace_replay/block_cache_tracer.h
@ -31,6 +31,15 @@ class BlockCacheTraceHelper {
  // Row key is a concatenation of the access's fd_number and the referenced
  // user key.
  static std::string ComputeRowKey(const BlockCacheTraceRecord& access);
+  // The first four bytes of the referenced key in a Get request is the table
+  // id.
+  static uint64_t GetTableId(const BlockCacheTraceRecord& access);
+  // The sequence number of a get request is the last part of the referenced
+  // key.
+  static uint64_t GetSequenceNumber(const BlockCacheTraceRecord& access);
+  // Block offset in a file is the last varint64 in the block key.
+  static uint64_t GetBlockOffsetInFile(const BlockCacheTraceRecord& access);
+
  static const std::string kUnknownColumnFamilyName;
  static const uint64_t kReservedGetId;
 };
--- a/utilities/simulator_cache/cache_simulator_test.cc
+++ b/utilities/simulator_cache/cache_simulator_test.cc
@ -84,7 +84,7 @@ class CacheSimulatorTest : public testing::Test {
    for (auto const& key : keys) {
      std::string row_key = kRefKeyPrefix + key + kRefKeySequenceNumber;
      auto handle =
-          sim_cache->Lookup("0_" + ExtractUserKey(row_key).ToString() + "_0");
+          sim_cache->Lookup("0_" + ExtractUserKey(row_key).ToString());
      EXPECT_NE(nullptr, handle);
      sim_cache->Release(handle);
    }
@ -229,10 +229,9 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulator) {
  ASSERT_EQ(100, cache_simulator->miss_ratio_stats().miss_ratio());
  ASSERT_EQ(10, cache_simulator->miss_ratio_stats().user_accesses());
  ASSERT_EQ(100, cache_simulator->miss_ratio_stats().user_miss_ratio());
-  auto handle = sim_cache->Lookup(
-      std::to_string(first_get.sst_fd_number) + "_" +
-      ExtractUserKey(first_get.referenced_key).ToString() + "_" +
-      std::to_string(1 + GetInternalKeySeqno(first_get.referenced_key)));
+  auto handle =
+      sim_cache->Lookup(std::to_string(first_get.sst_fd_number) + "_" +
+                        ExtractUserKey(first_get.referenced_key).ToString());
  ASSERT_NE(nullptr, handle);
  sim_cache->Release(handle);
  for (uint32_t i = 100; i < block_id; i++) {
@ -256,10 +255,9 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulator) {
  ASSERT_EQ(15, cache_simulator->miss_ratio_stats().user_accesses());
  ASSERT_EQ(66, static_cast<uint64_t>(
                    cache_simulator->miss_ratio_stats().user_miss_ratio()));
-  handle = sim_cache->Lookup(
-      std::to_string(second_get.sst_fd_number) + "_" +
-      ExtractUserKey(second_get.referenced_key).ToString() + "_" +
-      std::to_string(1 + GetInternalKeySeqno(second_get.referenced_key)));
+  handle =
+      sim_cache->Lookup(std::to_string(second_get.sst_fd_number) + "_" +
+                        ExtractUserKey(second_get.referenced_key).ToString());
  ASSERT_NE(nullptr, handle);
  sim_cache->Release(handle);
  for (uint32_t i = 100; i < block_id; i++) {
@ -394,7 +392,7 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulatorGetTest) {
  AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 7, 8, 4,
              {"1", "2", "3", "5"}, {"1", "2", "4"});
  for (auto const& key : {"1", "2", "4"}) {
-    auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key + "_0");
+    auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key);
    ASSERT_NE(nullptr, handle);
    sim_cache->Release(handle);
  }
@ -417,7 +415,7 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulatorGetTest) {
  AssertCache(sim_cache, cache_simulator->miss_ratio_stats(), 16, 103, 99, {},
              {});
  for (auto const& key : {"1", "2", "4"}) {
-    auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key + "_0");
+    auto handle = sim_cache->Lookup("0_" + kRefKeyPrefix + key);
    ASSERT_EQ(nullptr, handle);
  }
 }
@ -437,9 +435,9 @@ TEST_F(CacheSimulatorTest, HybridRowBlockNoInsertCacheSimulator) {
    cache_simulator->Access(first_get);
    block_id++;
  }
-  auto handle = sim_cache->Lookup(
-      std::to_string(first_get.sst_fd_number) + "_" +
-      ExtractUserKey(first_get.referenced_key).ToString() + "_0");
+  auto handle =
+      sim_cache->Lookup(std::to_string(first_get.sst_fd_number) + "_" +
+                        ExtractUserKey(first_get.referenced_key).ToString());
  ASSERT_NE(nullptr, handle);
  sim_cache->Release(handle);
  // All blocks are missing from the cache since insert_blocks_row_kvpair_misses