Skip to content

Commit

Permalink
fdsdump: add support for strings of variable length
Browse files Browse the repository at this point in the history
  • Loading branch information
sedmicha committed Feb 22, 2024
1 parent 1586414 commit edb69c5
Show file tree
Hide file tree
Showing 16 changed files with 244 additions and 37 deletions.
21 changes: 16 additions & 5 deletions src/tools/fdsdump/src/aggregator/aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ namespace fdsdump {
namespace aggregator {

Aggregator::Aggregator(const View &view) :
m_table(view.key_size(), view.value_size()),
m_key_buffer(view.key_size()),
m_table(view),
m_key_buffer(65535),
m_view(view)
{
}
Expand Down Expand Up @@ -65,23 +65,34 @@ void
Aggregator::aggregate(FlowContext &ctx)
{
// build key
for (const auto &pair : m_view.iter_keys(m_key_buffer.data())) {
uint32_t size = 0;
uint8_t *ptr = m_key_buffer.data();
if (!m_view.is_fixed_size()) {
size += sizeof(uint32_t);
}
for (const auto &pair : m_view.iter_keys(ptr)) {
if (!pair.field.load(ctx, pair.value)) {
return;
}
if (!m_view.is_fixed_size()) {
size += pair.field.size(&pair.value);
}
}
if (!m_view.is_fixed_size()) {
*reinterpret_cast<uint32_t *>(m_key_buffer.data()) = size;
}

// find in hash table
uint8_t *rec;
if (!m_table.find_or_create(m_key_buffer.data(), rec)) {
// init fields
for (const auto &pair : m_view.iter_values(rec + m_view.key_size())) {
for (const auto &pair : m_view.iter_values(rec)) {
pair.field.init(pair.value);
}
}

// aggregate
for (const auto &pair : m_view.iter_values(rec + m_view.key_size())) {
for (const auto &pair : m_view.iter_values(rec)) {
pair.field.aggregate(ctx, pair.value);
}
}
Expand Down
16 changes: 9 additions & 7 deletions src/tools/fdsdump/src/aggregator/fastHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ static constexpr double EXPAND_WHEN_THIS_FULL = 0.95;
static constexpr unsigned int EXPAND_WITH_FACTOR_OF = 2;
static constexpr uint8_t EMPTY_BIT = 0x80;

FastHashTable::FastHashTable(std::size_t key_size, std::size_t value_size) :
m_key_size(key_size), m_value_size(value_size)
FastHashTable::FastHashTable(const View &view) :
m_view(view)
{
init_blocks();
}
Expand All @@ -45,7 +45,8 @@ FastHashTable::init_blocks()
bool
FastHashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
{
uint64_t hash = XXH3_64bits(key, m_key_size); // The hash of the key
auto key_size = m_view.key_size(key);
uint64_t hash = XXH3_64bits(key, key_size); // The hash of the key
uint64_t index = (hash >> 7) & (m_block_count - 1); // The starting block index

for (;;) {
Expand All @@ -66,7 +67,7 @@ FastHashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
item_index += one_index;

uint8_t *record = block.items[item_index]; // The record whose item tag matched
if (memcmp(record, key, m_key_size) == 0) { // Does the key match as well or was it just a hash collision?
if (m_view.key_size(record) == key_size && memcmp(record, key, key_size) == 0) { // Does the key match as well or was it just a hash collision?
item = record;
return true; // We found the item
}
Expand All @@ -90,12 +91,12 @@ FastHashTable::lookup(uint8_t *key, uint8_t *&item, bool create_if_not_found)
auto empty_index = __builtin_ctz(empty_match);
block.tags[empty_index] = item_tag;

uint8_t *record = m_allocator.allocate(m_key_size + m_value_size);
uint8_t *record = m_allocator.allocate(key_size + m_view.value_size());
block.items[empty_index] = record;
m_items.push_back(record);
m_record_count++;

memcpy(record, key, m_key_size); // Copy the key, leave the value part uninitialized
memcpy(record, key, key_size); // Copy the key, leave the value part uninitialized
item = record;

// If the hash table has reached a specified percentage of fullness, expand the hash table
Expand All @@ -121,7 +122,8 @@ FastHashTable::expand()

// Reassign all the items to the newly initialized blocks
for (uint8_t *item : m_items) {
uint64_t hash = XXH3_64bits(item, m_key_size);
auto key_size = m_view.key_size(item);
uint64_t hash = XXH3_64bits(item, key_size);
uint64_t index = (hash >> 7) & (m_block_count - 1);
uint8_t item_tag = (hash & 0xFF) & ~EMPTY_BIT;

Expand Down
5 changes: 2 additions & 3 deletions src/tools/fdsdump/src/aggregator/fastHashTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class FastHashTable {
* @param[in] key_size Number of bytes of the key portion of the record
* @param[in] value_size Number of bytes of the value portion of the record
*/
FastHashTable(std::size_t key_size, std::size_t value_size);
FastHashTable(const View &view);

/**
* @brief Find a record corresponding to the provided key
Expand Down Expand Up @@ -70,8 +70,7 @@ class FastHashTable {
private:
std::size_t m_block_count = 4096;
std::size_t m_record_count = 0;
std::size_t m_key_size;
std::size_t m_value_size;
const View &m_view;

std::vector<HashTableBlock> m_blocks;
std::vector<uint8_t *> m_items;
Expand Down
5 changes: 5 additions & 0 deletions src/tools/fdsdump/src/aggregator/field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ Field::set_data_type(DataType data_type)
case DataType::Octets128B:
m_size = sizeof(Value::str);
break;
case DataType::VarString:
m_size = 0;
break;
case DataType::Unassigned:
throw std::logic_error("unexpected field data type");
}
Expand Down Expand Up @@ -174,6 +177,8 @@ Field::compare(const Value &a, const Value &b) const
case DataType::String128B:
case DataType::Octets128B:
return cmp(reinterpret_cast<const uint8_t *>(a.str), reinterpret_cast<const uint8_t *>(b.str), sizeof(a.str));
case DataType::VarString:
return cmp(a.varstr, b.varstr);
case DataType::Unassigned:
throw std::logic_error("cannot compare fields with unassigned data type");
}
Expand Down
2 changes: 1 addition & 1 deletion src/tools/fdsdump/src/aggregator/field.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Field {
/**
* @brief Get the size of the field in bytes
*/
size_t size() const { return m_size; }
virtual size_t size(const Value *value = nullptr) const { (void) value; return m_size; }

/**
* @brief Get the offset of the field from the beginning of the aggregation record
Expand Down
10 changes: 10 additions & 0 deletions src/tools/fdsdump/src/aggregator/ipfixField.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,5 +190,15 @@ IpfixField::operator==(const Field &other) const
return false;
}

size_t
IpfixField::size(const Value* value) const
{
if (data_type() == DataType::VarString && value != nullptr) {
return sizeof(value->varstr.len) + value->varstr.len;
} else {
return Field::size(value);
}
}

} // aggregator
} // fdsdump
5 changes: 5 additions & 0 deletions src/tools/fdsdump/src/aggregator/ipfixField.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ class IpfixField : public Field {
*/
bool operator==(const Field &other) const override;

/**
* @brief Get the size the value of this field occupies in the aggregation record
*/
size_t size(const Value* value = nullptr) const override;

private:
uint32_t m_pen;
uint16_t m_id;
Expand Down
49 changes: 49 additions & 0 deletions src/tools/fdsdump/src/aggregator/jsonPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ JSONPrinter::append_value(const Field &field, Value *value)
append_octet_value(value);
m_buffer.push_back('"');
return;
case DataType::VarString:
m_buffer.push_back('"');
append_varstring_value(value);
m_buffer.push_back('"');
return;
case DataType::Unassigned:
m_buffer.append("null");
return;
Expand Down Expand Up @@ -157,6 +162,50 @@ JSONPrinter::append_string_value(const Value *value)
}
}

void
JSONPrinter::append_varstring_value(const Value *value)
{
for (uint32_t i = 0; i < value->varstr.len; ++i) {
const char byte = value->varstr.text[i];

// Escape characters
switch (byte) {
case '"':
m_buffer.append("\\\"");
continue;
case '\'':
m_buffer.append("\\\\");
continue;
case '/':
m_buffer.append("\\/");
continue;;
case '\b':
m_buffer.append("\\b");
continue;
case '\f':
m_buffer.append("\\f");
continue;
case '\n':
m_buffer.append("\\n");
continue;
case '\r':
m_buffer.append("\\r");
continue;
case '\t':
m_buffer.append("\\t");
continue;
default:
break;
}

if (byte >= '\x00' && byte <= '\x1F') {
m_buffer.append(char2hex(byte));
} else {
m_buffer.append(1, byte);
}
}
}

void
JSONPrinter::append_octet_value(const Value *value)
{
Expand Down
1 change: 1 addition & 0 deletions src/tools/fdsdump/src/aggregator/jsonPrinter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class JSONPrinter : public Printer
void append_value(const Field &field, Value *value);
void append_string_value(const Value *value);
void append_octet_value(const Value *value);
void append_varstring_value(const Value *value);

std::shared_ptr<View> m_view;
std::string m_buffer;
Expand Down
25 changes: 25 additions & 0 deletions src/tools/fdsdump/src/aggregator/print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ get_width(const Field &field)
return 30;
case DataType::MacAddress:
return 17;
case DataType::VarString:
return 40;
default:
assert(0);
}
Expand Down Expand Up @@ -171,6 +173,26 @@ string_to_str(const char array[128])
return result;
}

static std::string
varstring_to_str(const char *text, uint32_t len)
{
std::string result;
result.reserve(len);

for (uint32_t i = 0; i < len; ++i) {
const char byte = text[i];

if (std::isprint(byte)) {
result.append(1, byte);
} else {
result.append("\\x");
result.append(char2hex(byte));
}
}

return result;
}

void
print_value(const Field &field, Value &value, std::string &buffer)
{
Expand Down Expand Up @@ -237,6 +259,9 @@ print_value(const Field &field, Value &value, std::string &buffer)
case DataType::DateTime:
buffer.append(datetime_to_str(value.ts_millisecs));
break;
case DataType::VarString:
buffer.append(varstring_to_str(value.varstr.text, value.varstr.len));
break;
default: assert(0);
}
}
Expand Down
21 changes: 12 additions & 9 deletions src/tools/fdsdump/src/aggregator/stdHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@
namespace fdsdump {
namespace aggregator {

StdHashTable::StdHashTable(std::size_t key_size, std::size_t value_size) :
m_key_size(key_size),
m_value_size(value_size)
StdHashTable::StdHashTable(const View& view) :
m_view(view)
{
auto hash = [=](const uint8_t *key) {
return XXH3_64bits(key, m_key_size);
auto hash = [this](const uint8_t *key) {
auto key_size = m_view.key_size(key);
return XXH3_64bits(key, key_size);
};
auto equals = [=](const uint8_t *a, const uint8_t *b) {
return std::memcmp(a, b, m_key_size) == 0;
auto equals = [this](const uint8_t *a, const uint8_t *b) {
auto key_size = m_view.key_size(a);
auto key_size2 = m_view.key_size(b);
return key_size == key_size2 && std::memcmp(a, b, key_size) == 0;
};
m_map = Map(1, hash, equals);
}
Expand All @@ -46,8 +48,9 @@ StdHashTable::find_or_create(uint8_t *key, uint8_t *&item)
item = it->second;
return true;
} else {
uint8_t *data = m_allocator.allocate(m_key_size + m_value_size);
std::memcpy(data, key, m_key_size);
auto key_size = m_view.key_size(key);
uint8_t *data = m_allocator.allocate(key_size + m_view.value_size());
std::memcpy(data, key, key_size);
m_map.insert({data, data});
m_items.push_back(data);
item = data;
Expand Down
8 changes: 4 additions & 4 deletions src/tools/fdsdump/src/aggregator/stdHashTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
*/
#pragma once

#include "allocator.hpp"
#include <aggregator/allocator.hpp>
#include <aggregator/view.hpp>

#include <cstdint>
#include <functional>
Expand All @@ -27,7 +28,7 @@ class StdHashTable {
* @param[in] key_size Number of bytes of the key portion of the record
* @param[in] value_size Number of bytes of the value portion of the record
*/
StdHashTable(std::size_t key_size, std::size_t value_size);
StdHashTable(const View &view);

/**
* @brief Find a record corresponding to the provided key
Expand Down Expand Up @@ -63,8 +64,7 @@ class StdHashTable {
std::function<std::size_t(const uint8_t *)>,
std::function<bool(const uint8_t *, const uint8_t *)>>;

std::size_t m_key_size;
std::size_t m_value_size;
const View& m_view;
std::vector<uint8_t *> m_items;
Allocator m_allocator;
Map m_map;
Expand Down
Loading

0 comments on commit edb69c5

Please sign in to comment.