Kuzu C++ API
Loading...
Searching...
No Matches
value_vector.h
Go to the documentation of this file.
1#pragma once
2
3#include <optional>
4#include <utility>
5
6#include "assert.h"
7#include "cast.h"
8#include "copy_constructors.h"
9#include "data_chunk_state.h"
10#include "null_mask.h"
11#include "ku_string.h"
12#include "auxiliary_buffer.h"
13
14namespace kuzu {
15namespace common {
16
17class Value;
18
22 friend class ListVector;
23 friend class ListAuxiliaryBuffer;
24 friend class StructVector;
25 friend class StringVector;
26 friend class ArrowColumnVector;
27
28public:
29 explicit ValueVector(LogicalType dataType, storage::MemoryManager* memoryManager = nullptr,
30 std::shared_ptr<DataChunkState> dataChunkState = nullptr);
31 explicit ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager* memoryManager = nullptr)
32 : ValueVector(LogicalType(dataTypeID), memoryManager) {
33 KU_ASSERT(dataTypeID != LogicalTypeID::LIST);
34 }
35
37 ~ValueVector() = default;
38
39 template<typename T>
40 std::optional<T> firstNonNull() const {
41 sel_t selectedSize = state->getSelSize();
42 if (selectedSize == 0) {
43 return std::nullopt;
44 }
45 if (hasNoNullsGuarantee()) {
46 return getValue<T>(state->getSelVector()[0]);
47 } else {
48 for (size_t i = 0; i < selectedSize; i++) {
49 auto pos = state->getSelVector()[i];
50 if (!isNull(pos)) {
51 return std::make_optional(getValue<T>(pos));
52 }
53 }
54 }
55 return std::nullopt;
56 }
57
58 template<class Func>
59 void forEachNonNull(Func&& func) const {
60 if (hasNoNullsGuarantee()) {
61 state->getSelVector().forEach(func);
62 } else {
63 state->getSelVector().forEach([&](auto i) {
64 if (!isNull(i)) {
65 func(i);
66 }
67 });
68 }
69 }
70
71 uint32_t countNonNull() const;
72
73 void setState(const std::shared_ptr<DataChunkState>& state_);
74
75 void setAllNull() { nullMask.setAllNull(); }
76 void setAllNonNull() { nullMask.setAllNonNull(); }
77 // On return true, there are no null. On return false, there may or may not be nulls.
78 bool hasNoNullsGuarantee() const { return nullMask.hasNoNullsGuarantee(); }
79 void setNullRange(uint32_t startPos, uint32_t len, bool value) {
80 nullMask.setNullFromRange(startPos, len, value);
81 }
82 const NullMask& getNullMask() const { return nullMask; }
83 void setNull(uint32_t pos, bool isNull);
84 uint8_t isNull(uint32_t pos) const { return nullMask.isNull(pos); }
86 state->getSelVectorUnsafe().setSelSize(1);
87 setNull(state->getSelVector()[0], true);
88 }
89
90 bool setNullFromBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
91 uint64_t numBitsToCopy, bool invert = false);
92
93 uint32_t getNumBytesPerValue() const { return numBytesPerValue; }
94
95 // TODO(Guodong): Rename this to getValueRef
96 template<typename T>
97 const T& getValue(uint32_t pos) const {
98 return ((T*)valueBuffer.get())[pos];
99 }
100 template<typename T>
101 T& getValue(uint32_t pos) {
102 return ((T*)valueBuffer.get())[pos];
103 }
104 template<typename T>
105 void setValue(uint32_t pos, T val);
106 // copyFromRowData assumes rowData is non-NULL.
107 void copyFromRowData(uint32_t pos, const uint8_t* rowData);
108 // copyToRowData assumes srcVectorData is non-NULL.
109 void copyToRowData(uint32_t pos, uint8_t* rowData,
110 InMemOverflowBuffer* rowOverflowBuffer) const;
111 // copyFromVectorData assumes srcVectorData is non-NULL.
112 void copyFromVectorData(uint8_t* dstData, const ValueVector* srcVector,
113 const uint8_t* srcVectorData);
114 void copyFromVectorData(uint64_t dstPos, const ValueVector* srcVector, uint64_t srcPos);
115 void copyFromValue(uint64_t pos, const Value& value);
116
117 std::unique_ptr<Value> getAsValue(uint64_t pos) const;
118
119 uint8_t* getData() const { return valueBuffer.get(); }
120
121 offset_t readNodeOffset(uint32_t pos) const {
122 KU_ASSERT(dataType.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID);
123 return getValue<nodeID_t>(pos).offset;
124 }
125
127
128 // If there is still non-null values after discarding, return true. Otherwise, return false.
129 // For an unflat vector, its selection vector is also updated to the resultSelVector.
130 static bool discardNull(ValueVector& vector);
131
132 void serialize(Serializer& ser) const;
133 static std::unique_ptr<ValueVector> deSerialize(Deserializer& deSer, storage::MemoryManager* mm,
134 std::shared_ptr<DataChunkState> dataChunkState);
135
137 return state ? &state->getSelVectorUnsafe() : nullptr;
138 }
139
140private:
141 uint32_t getDataTypeSize(const LogicalType& type);
142 void initializeValueBuffer();
143
144public:
146 std::shared_ptr<DataChunkState> state;
147
148private:
149 std::unique_ptr<uint8_t[]> valueBuffer;
150 NullMask nullMask;
151 uint32_t numBytesPerValue;
152 std::unique_ptr<AuxiliaryBuffer> auxiliaryBuffer;
153};
154
156public:
159 return ku_dynamic_cast<StringAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
160 ->getOverflowBuffer();
161 }
162
163 static void addString(ValueVector* vector, uint32_t vectorPos, ku_string_t& srcStr);
164 static void addString(ValueVector* vector, uint32_t vectorPos, const char* srcStr,
165 uint64_t length);
166 static void addString(ValueVector* vector, uint32_t vectorPos, const std::string& srcStr);
167 // Add empty string with space reserved for the provided size
168 // Returned value can be modified to set the string contents
169 static ku_string_t& reserveString(ValueVector* vector, uint32_t vectorPos, uint64_t length);
170 static void reserveString(ValueVector* vector, ku_string_t& dstStr, uint64_t length);
171 static void addString(ValueVector* vector, ku_string_t& dstStr, ku_string_t& srcStr);
172 static void addString(ValueVector* vector, ku_string_t& dstStr, const char* srcStr,
173 uint64_t length);
174 static void addString(kuzu::common::ValueVector* vector, ku_string_t& dstStr,
175 const std::string& srcStr);
176 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
177 InMemOverflowBuffer* rowOverflowBuffer);
178};
179
181 static void addBlob(ValueVector* vector, uint32_t pos, const char* data, uint32_t length) {
182 StringVector::addString(vector, pos, data, length);
183 } // namespace common
184 static void addBlob(ValueVector* vector, uint32_t pos, const uint8_t* data, uint64_t length) {
185 StringVector::addString(vector, pos, reinterpret_cast<const char*>(data), length);
186 }
187}; // namespace kuzu
188
189// ListVector is used for both LIST and ARRAY physical type
191public:
192 static const ListAuxiliaryBuffer& getAuxBuffer(const ValueVector& vector) {
193 return vector.auxiliaryBuffer->constCast<ListAuxiliaryBuffer>();
194 }
196 return vector.auxiliaryBuffer->cast<ListAuxiliaryBuffer>();
197 }
198 // If you call setDataVector during initialize, there must be a followed up
199 // copyListEntryAndBufferMetaData at runtime.
200 // TODO(Xiyang): try to merge setDataVector & copyListEntryAndBufferMetaData
201 static void setDataVector(const ValueVector* vector, std::shared_ptr<ValueVector> dataVector) {
202 KU_ASSERT(validateType(*vector));
203 auto& listBuffer = getAuxBufferUnsafe(*vector);
204 listBuffer.setDataVector(std::move(dataVector));
205 }
207 const SelectionVector& selVector, const ValueVector& other,
208 const SelectionVector& otherSelVector);
209 static ValueVector* getDataVector(const ValueVector* vector) {
210 KU_ASSERT(validateType(*vector));
211 return getAuxBuffer(*vector).getDataVector();
212 }
213 static std::shared_ptr<ValueVector> getSharedDataVector(const ValueVector* vector) {
214 KU_ASSERT(validateType(*vector));
215 return getAuxBuffer(*vector).getSharedDataVector();
216 }
217 static uint64_t getDataVectorSize(const ValueVector* vector) {
218 KU_ASSERT(validateType(*vector));
219 return getAuxBuffer(*vector).getSize();
220 }
221 static uint8_t* getListValues(const ValueVector* vector, const list_entry_t& listEntry) {
222 KU_ASSERT(validateType(*vector));
223 auto dataVector = getDataVector(vector);
224 return dataVector->getData() + dataVector->getNumBytesPerValue() * listEntry.offset;
225 }
226 static uint8_t* getListValuesWithOffset(const ValueVector* vector,
227 const list_entry_t& listEntry, offset_t elementOffsetInList) {
228 KU_ASSERT(validateType(*vector));
229 return getListValues(vector, listEntry) +
230 elementOffsetInList * getDataVector(vector)->getNumBytesPerValue();
231 }
232 static list_entry_t addList(ValueVector* vector, uint64_t listSize) {
233 KU_ASSERT(validateType(*vector));
234 return getAuxBufferUnsafe(*vector).addList(listSize);
235 }
236 static void resizeDataVector(ValueVector* vector, uint64_t numValues) {
237 KU_ASSERT(validateType(*vector));
238 getAuxBufferUnsafe(*vector).resize(numValues);
239 }
240
241 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
242 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
243 InMemOverflowBuffer* rowOverflowBuffer);
244 static void copyFromVectorData(ValueVector* dstVector, uint8_t* dstData,
245 const ValueVector* srcVector, const uint8_t* srcData);
246 static void appendDataVector(ValueVector* dstVector, ValueVector* srcDataVector,
247 uint64_t numValuesToAppend);
248 static void sliceDataVector(ValueVector* vectorToSlice, uint64_t offset, uint64_t numValues);
249
250private:
251 static bool validateType(const ValueVector& vector) {
252 switch (vector.dataType.getPhysicalType()) {
255 return true;
256 default:
257 return false;
258 }
259 }
260};
261
263public:
264 static const std::vector<std::shared_ptr<ValueVector>>& getFieldVectors(
265 const ValueVector* vector) {
266 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
267 ->getFieldVectors();
268 }
269
270 static std::shared_ptr<ValueVector> getFieldVector(const ValueVector* vector,
271 struct_field_idx_t idx) {
272 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
273 ->getFieldVectorShared(idx);
274 }
275
276 static ValueVector* getFieldVectorRaw(const ValueVector& vector, const std::string& fieldName) {
277 auto idx = StructType::getFieldIdx(vector.dataType, fieldName);
278 return ku_dynamic_cast<StructAuxiliaryBuffer*>(vector.auxiliaryBuffer.get())
279 ->getFieldVectorPtr(idx);
280 }
281
283 std::shared_ptr<ValueVector> vectorToReference) {
284 ku_dynamic_cast<StructAuxiliaryBuffer*>(vector->auxiliaryBuffer.get())
285 ->referenceChildVector(idx, std::move(vectorToReference));
286 }
287
288 static void copyFromRowData(ValueVector* vector, uint32_t pos, const uint8_t* rowData);
289 static void copyToRowData(const ValueVector* vector, uint32_t pos, uint8_t* rowData,
290 InMemOverflowBuffer* rowOverflowBuffer);
291 static void copyFromVectorData(ValueVector* dstVector, const uint8_t* dstData,
292 const ValueVector* srcVector, const uint8_t* srcData);
293};
294
296public:
301
302 static inline ValueVector* getValVector(const ValueVector* vector, union_field_idx_t fieldIdx) {
304 return StructVector::getFieldVector(vector, UnionType::getInternalFieldIdx(fieldIdx)).get();
305 }
306
307 static inline void referenceVector(ValueVector* vector, union_field_idx_t fieldIdx,
308 std::shared_ptr<ValueVector> vectorToReference) {
310 std::move(vectorToReference));
311 }
312
313 static inline void setTagField(ValueVector& vector, SelectionVector& sel,
314 union_field_idx_t tag) {
316 for (auto i = 0u; i < sel.getSelSize(); i++) {
317 vector.setValue<struct_field_idx_t>(sel[i], tag);
318 }
319 }
320};
321
323public:
324 static inline ValueVector* getKeyVector(const ValueVector* vector) {
325 return StructVector::getFieldVector(ListVector::getDataVector(vector), 0 /* keyVectorPos */)
326 .get();
327 }
328
329 static inline ValueVector* getValueVector(const ValueVector* vector) {
330 return StructVector::getFieldVector(ListVector::getDataVector(vector), 1 /* valVectorPos */)
331 .get();
332 }
333
334 static inline uint8_t* getMapKeys(const ValueVector* vector, const list_entry_t& listEntry) {
335 auto keyVector = getKeyVector(vector);
336 return keyVector->getData() + keyVector->getNumBytesPerValue() * listEntry.offset;
337 }
338
339 static inline uint8_t* getMapValues(const ValueVector* vector, const list_entry_t& listEntry) {
340 auto valueVector = getValueVector(vector);
341 return valueVector->getData() + valueVector->getNumBytesPerValue() * listEntry.offset;
342 }
343};
344
345} // namespace common
346} // namespace kuzu
#define KUZU_API
Definition api.h:25
#define KU_ASSERT(condition)
Definition assert.h:19
Definition in_mem_overflow_buffer.h:33
Definition auxiliary_buffer.h:79
Definition value_vector.h:190
static void setDataVector(const ValueVector *vector, std::shared_ptr< ValueVector > dataVector)
Definition value_vector.h:201
static void sliceDataVector(ValueVector *vectorToSlice, uint64_t offset, uint64_t numValues)
static void copyFromVectorData(ValueVector *dstVector, uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void appendDataVector(ValueVector *dstVector, ValueVector *srcDataVector, uint64_t numValuesToAppend)
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static list_entry_t addList(ValueVector *vector, uint64_t listSize)
Definition value_vector.h:232
static const ListAuxiliaryBuffer & getAuxBuffer(const ValueVector &vector)
Definition value_vector.h:192
static void resizeDataVector(ValueVector *vector, uint64_t numValues)
Definition value_vector.h:236
static void copyListEntryAndBufferMetaData(ValueVector &vector, const SelectionVector &selVector, const ValueVector &other, const SelectionVector &otherSelVector)
static ListAuxiliaryBuffer & getAuxBufferUnsafe(const ValueVector &vector)
Definition value_vector.h:195
static uint8_t * getListValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:221
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static uint64_t getDataVectorSize(const ValueVector *vector)
Definition value_vector.h:217
static ValueVector * getDataVector(const ValueVector *vector)
Definition value_vector.h:209
static uint8_t * getListValuesWithOffset(const ValueVector *vector, const list_entry_t &listEntry, offset_t elementOffsetInList)
Definition value_vector.h:226
static std::shared_ptr< ValueVector > getSharedDataVector(const ValueVector *vector)
Definition value_vector.h:213
Definition types.h:256
KUZU_API LogicalTypeID getLogicalTypeID() const
Definition types.h:279
KUZU_API PhysicalTypeID getPhysicalType() const
Definition types.h:283
Definition value_vector.h:322
static ValueVector * getValueVector(const ValueVector *vector)
Definition value_vector.h:329
static uint8_t * getMapKeys(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:334
static uint8_t * getMapValues(const ValueVector *vector, const list_entry_t &listEntry)
Definition value_vector.h:339
static ValueVector * getKeyVector(const ValueVector *vector)
Definition value_vector.h:324
Definition null_mask.h:72
Definition sel_vector.h:98
sel_t getSelSize() const
Definition sel_vector.h:68
Definition value_vector.h:155
static void reserveString(ValueVector *vector, ku_string_t &dstStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, ku_string_t &dstStr, ku_string_t &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, const char *srcStr, uint64_t length)
static void addString(ValueVector *vector, uint32_t vectorPos, const std::string &srcStr)
static void addString(ValueVector *vector, uint32_t vectorPos, ku_string_t &srcStr)
static ku_string_t & reserveString(ValueVector *vector, uint32_t vectorPos, uint64_t length)
static InMemOverflowBuffer * getInMemOverflowBuffer(ValueVector *vector)
Definition value_vector.h:157
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static void addString(kuzu::common::ValueVector *vector, ku_string_t &dstStr, const std::string &srcStr)
Definition value_vector.h:262
static const std::vector< std::shared_ptr< ValueVector > > & getFieldVectors(const ValueVector *vector)
Definition value_vector.h:264
static void copyFromVectorData(ValueVector *dstVector, const uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcData)
static void copyFromRowData(ValueVector *vector, uint32_t pos, const uint8_t *rowData)
static void referenceVector(ValueVector *vector, struct_field_idx_t idx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:282
static void copyToRowData(const ValueVector *vector, uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer)
static ValueVector * getFieldVectorRaw(const ValueVector &vector, const std::string &fieldName)
Definition value_vector.h:276
static std::shared_ptr< ValueVector > getFieldVector(const ValueVector *vector, struct_field_idx_t idx)
Definition value_vector.h:270
Definition value_vector.h:295
static void setTagField(ValueVector &vector, SelectionVector &sel, union_field_idx_t tag)
Definition value_vector.h:313
static void referenceVector(ValueVector *vector, union_field_idx_t fieldIdx, std::shared_ptr< ValueVector > vectorToReference)
Definition value_vector.h:307
static ValueVector * getValVector(const ValueVector *vector, union_field_idx_t fieldIdx)
Definition value_vector.h:302
static ValueVector * getTagVector(const ValueVector *vector)
Definition value_vector.h:297
Definition value.h:26
Definition value_vector.h:21
friend class ArrowColumnVector
Definition value_vector.h:26
static bool discardNull(ValueVector &vector)
uint8_t * getData() const
Definition value_vector.h:119
ValueVector(LogicalType dataType, storage::MemoryManager *memoryManager=nullptr, std::shared_ptr< DataChunkState > dataChunkState=nullptr)
void copyFromRowData(uint32_t pos, const uint8_t *rowData)
void copyToRowData(uint32_t pos, uint8_t *rowData, InMemOverflowBuffer *rowOverflowBuffer) const
void forEachNonNull(Func &&func) const
Definition value_vector.h:59
void setState(const std::shared_ptr< DataChunkState > &state_)
void copyFromValue(uint64_t pos, const Value &value)
std::unique_ptr< Value > getAsValue(uint64_t pos) const
friend class StringVector
Definition value_vector.h:25
void setAllNonNull()
Definition value_vector.h:76
uint8_t isNull(uint32_t pos) const
Definition value_vector.h:84
void serialize(Serializer &ser) const
ValueVector(LogicalTypeID dataTypeID, storage::MemoryManager *memoryManager=nullptr)
Definition value_vector.h:31
const T & getValue(uint32_t pos) const
Definition value_vector.h:97
LogicalType dataType
Definition value_vector.h:145
friend class ListVector
Definition value_vector.h:22
static std::unique_ptr< ValueVector > deSerialize(Deserializer &deSer, storage::MemoryManager *mm, std::shared_ptr< DataChunkState > dataChunkState)
friend class ListAuxiliaryBuffer
Definition value_vector.h:23
void copyFromVectorData(uint64_t dstPos, const ValueVector *srcVector, uint64_t srcPos)
uint32_t countNonNull() const
DELETE_COPY_AND_MOVE(ValueVector)
const NullMask & getNullMask() const
Definition value_vector.h:82
friend class StructVector
Definition value_vector.h:24
bool hasNoNullsGuarantee() const
Definition value_vector.h:78
T & getValue(uint32_t pos)
Definition value_vector.h:101
void setAllNull()
Definition value_vector.h:75
uint32_t getNumBytesPerValue() const
Definition value_vector.h:93
void setAsSingleNullEntry()
Definition value_vector.h:85
void setValue(uint32_t pos, T val)
void copyFromVectorData(uint8_t *dstData, const ValueVector *srcVector, const uint8_t *srcVectorData)
offset_t readNodeOffset(uint32_t pos) const
Definition value_vector.h:121
bool setNullFromBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
void setNullRange(uint32_t startPos, uint32_t len, bool value)
Definition value_vector.h:79
void setNull(uint32_t pos, bool isNull)
SelectionVector * getSelVectorPtr() const
Definition value_vector.h:136
std::shared_ptr< DataChunkState > state
Definition value_vector.h:146
std::optional< T > firstNonNull() const
Definition value_vector.h:40
Definition array_utils.h:7
@ LIST
Definition types.h:244
@ STRING
Definition types.h:243
@ ARRAY
Definition types.h:245
struct_field_idx_t union_field_idx_t
Definition types.h:50
uint64_t sel_t
Definition types.h:30
LogicalTypeID
Definition types.h:177
@ LIST
Definition types.h:211
@ INTERNAL_ID
Definition types.h:206
@ UNION
Definition types.h:215
uint64_t offset_t
Definition types.h:79
uint8_t struct_field_idx_t
Definition types.h:49
TO ku_dynamic_cast(FROM *old)
Definition cast.h:11
Definition array_utils.h:7
Definition value_vector.h:180
static void addBlob(ValueVector *vector, uint32_t pos, const uint8_t *data, uint64_t length)
Definition value_vector.h:184
static void addBlob(ValueVector *vector, uint32_t pos, const char *data, uint32_t length)
Definition value_vector.h:181
static struct_field_idx_t getFieldIdx(const LogicalType &type, const std::string &key)
static union_field_idx_t getInternalFieldIdx(union_field_idx_t idx)
static constexpr union_field_idx_t TAG_FIELD_IDX
Definition types.h:597
Definition ku_string.h:12
Definition types.h:114
offset_t offset
Definition types.h:115