Skip to content

Commit 0a9ef95

Browse files
authored
chore: support huffman compression for string values (#6025)
* chore: support huffman compression for string values so now we can pass both --huffman_table=STRINGS:....,KEYS:.... Moreover, fix bugs around huffman and tiering so that at least a trivial manual test passes when both settings are enabled: ``` ./dragonfly --dbfilename= --noversion_check --maxmemory=8G --logtostderr --huffman_table=STRINGS:ChD4bAf/D/bPSwY= --tiered_prefix /tmp/tiered > debug populate 1000000 key 128 > get key:42 ``` * chore: comments
1 parent 4903469 commit 0a9ef95

File tree

5 files changed

+47
-11
lines changed

5 files changed

+47
-11
lines changed

src/core/compact_object.cc

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ size_t CompactObj::Size() const {
813813
}
814814
case EXTERNAL_TAG:
815815
raw_size = u_.ext_ptr.serialized_size;
816-
CHECK(mask_bits_.encoding != HUFFMAN_ENC);
816+
first_byte = GetFirstByte();
817817
break;
818818
case ROBJ_TAG:
819819
raw_size = u_.r_obj.Size();
@@ -1202,10 +1202,16 @@ void CompactObj::GetString(char* dest) const {
12021202
}
12031203

12041204
void CompactObj::SetExternal(size_t offset, uint32_t sz, ExternalRep rep) {
1205+
uint8_t first_byte = 0;
1206+
if (mask_bits_.encoding == HUFFMAN_ENC) {
1207+
CHECK(rep == ExternalRep::STRING);
1208+
first_byte = GetFirstByte();
1209+
}
12051210
SetMeta(EXTERNAL_TAG, mask_);
12061211

12071212
u_.ext_ptr.is_cool = 0;
12081213
u_.ext_ptr.representation = static_cast<uint8_t>(rep);
1214+
u_.ext_ptr.first_byte = first_byte;
12091215
u_.ext_ptr.page_offset = offset % 4096;
12101216
u_.ext_ptr.serialized_size = sz;
12111217
u_.ext_ptr.offload.page_index = offset / 4096;
@@ -1271,6 +1277,35 @@ void CompactObj::Reset() {
12711277
mask_ = 0;
12721278
}
12731279

1280+
uint8_t CompactObj::GetFirstByte() const {
1281+
DCHECK_EQ(ObjType(), OBJ_STRING);
1282+
1283+
if (IsInline()) {
1284+
return u_.inline_str[0];
1285+
}
1286+
1287+
if (taglen_ == ROBJ_TAG) {
1288+
CHECK_EQ(OBJ_STRING, u_.r_obj.type());
1289+
DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());
1290+
return *(uint8_t*)u_.r_obj.inner_obj();
1291+
}
1292+
1293+
if (taglen_ == SMALL_TAG) {
1294+
return u_.small_str.first_byte();
1295+
}
1296+
1297+
if (taglen_ == EXTERNAL_TAG) {
1298+
if (u_.ext_ptr.is_cool) {
1299+
const CompactObj& cooled_obj = u_.ext_ptr.cool_record->value;
1300+
return cooled_obj.GetFirstByte();
1301+
}
1302+
return u_.ext_ptr.first_byte;
1303+
}
1304+
1305+
LOG(DFATAL) << "Bad tag " << int(taglen_);
1306+
return 0;
1307+
}
1308+
12741309
// Frees all resources if owns.
12751310
void CompactObj::Free() {
12761311
DCHECK(HasAllocated());

src/core/compact_object.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ class CompactObj {
395395
return taglen_ <= kInlineLen;
396396
}
397397

398+
uint8_t GetFirstByte() const;
399+
398400
static constexpr unsigned InlineLen() {
399401
return kInlineLen;
400402
}
@@ -430,8 +432,7 @@ class CompactObj {
430432
memory_resource()->deallocate(ptr, sizeof(T), alignof(T));
431433
}
432434

433-
// returns raw (non-decoded) string together with the encoding mask.
434-
// Used to bypass decoding layer.
435+
// returns raw (non-decoded) string. Used to bypass decoding layer.
435436
// Precondition: the object is a non-inline string.
436437
StringOrView GetRawString() const;
437438

@@ -467,13 +468,13 @@ class CompactObj {
467468
mask_ = mask;
468469
}
469470

470-
// Must be 16 bytes.
471471
struct ExternalPtr {
472472
uint32_t serialized_size;
473473
uint16_t page_offset; // 0 for multi-page blobs. != 0 for small blobs.
474474
uint8_t is_cool : 1;
475475
uint8_t representation : 2; // See ExternalRep
476-
uint16_t is_reserved : 13;
476+
uint8_t is_reserved : 5;
477+
uint8_t first_byte;
477478

478479
// We do not have enough space in the common area to store page_index together with
479480
// cool_record pointer. Therefore, we moved this field into TieredColdRecord itself.
@@ -487,7 +488,7 @@ class CompactObj {
487488
detail::TieredColdRecord* cool_record;
488489
};
489490
} __attribute__((packed));
490-
491+
static_assert(sizeof(ExternalPtr) == 16);
491492
struct JsonConsT {
492493
JsonType* json_ptr;
493494
size_t bytes_used;
@@ -535,7 +536,7 @@ class CompactObj {
535536
uint8_t expire : 1;
536537
uint8_t mc_flag : 1; // Marks keys that have memcache flags assigned.
537538

538-
// See the Encoding enum for the meaning of these bits.
539+
// See the EncodingEnum for the meaning of these bits.
539540
uint8_t encoding : 2;
540541

541542
// IO_PENDING is set when the tiered storage has issued an i/o request to save the value.

src/server/debugcmd.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ void DoComputeHist(CompactObjType type, EngineShard* shard, ConnectionContext* c
307307
it->first.GetString(&scratch);
308308
}
309309
} else if (type == OBJ_STRING && it->second.ObjType() == OBJ_STRING) {
310-
if (it->first.MallocUsed() > 0) {
310+
if (it->second.MallocUsed() > 0) {
311311
it->second.GetString(&scratch);
312312
}
313313
} else if (type == OBJ_ZSET && it->second.ObjType() == OBJ_ZSET) {

src/server/main_service.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,9 @@ ABSL_FLAG(uint32_t, shard_thread_busy_polling_usec, 0,
125125

126126
ABSL_FLAG(string, huffman_table, "",
127127
"a comma separated map: domain1:code1,domain2:code2,... where "
128-
"domain can currently be only KEYS, code is base64 encoded huffman table exported via "
129-
"DEBUG COMPRESSION EXPORT. if empty no huffman compression is appplied.");
128+
"domain can currently be only KEYS or STRINGS, code is a base64-encoded huffman table"
129+
" exported via "
130+
"DEBUG COMPRESSION EXPORT. if the flag is empty no huffman compression is applied.");
130131

131132
ABSL_FLAG(bool, jsonpathv2, true,
132133
"If true uses Dragonfly jsonpath implementation, "

src/server/tiered_storage.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,6 @@ void TieredStorage::CoolDown(DbIndex db_ind, std::string_view str,
613613
record->value = std::move(*pv);
614614

615615
pv->SetCool(segment.offset, segment.length, record);
616-
DCHECK_EQ(pv->Size(), record->value.Size());
617616
}
618617

619618
PrimeValue TieredStorage::Warmup(DbIndex dbid, PrimeValue::CoolItem item) {

0 commit comments

Comments
 (0)