Kuzu C++ API
Loading...
Searching...
No Matches
null_mask.h
Go to the documentation of this file.
1#pragma once
2
3#include <cstdint>
4#include <memory>
5
6#include "assert.h"
7#include <span>
8
9namespace kuzu {
10namespace common {
11
12class ArrowNullMaskTree;
13class Serializer;
14class Deserializer;
15
16constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
17 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000, 0x20000, 0x40000, 0x80000,
18 0x100000, 0x200000, 0x400000, 0x800000, 0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000,
19 0x20000000, 0x40000000, 0x80000000, 0x100000000, 0x200000000, 0x400000000, 0x800000000,
20 0x1000000000, 0x2000000000, 0x4000000000, 0x8000000000, 0x10000000000, 0x20000000000,
21 0x40000000000, 0x80000000000, 0x100000000000, 0x200000000000, 0x400000000000, 0x800000000000,
22 0x1000000000000, 0x2000000000000, 0x4000000000000, 0x8000000000000, 0x10000000000000,
23 0x20000000000000, 0x40000000000000, 0x80000000000000, 0x100000000000000, 0x200000000000000,
24 0x400000000000000, 0x800000000000000, 0x1000000000000000, 0x2000000000000000,
25 0x4000000000000000, 0x8000000000000000};
26constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64] = {0xfffffffffffffffe, 0xfffffffffffffffd,
27 0xfffffffffffffffb, 0xfffffffffffffff7, 0xffffffffffffffef, 0xffffffffffffffdf,
28 0xffffffffffffffbf, 0xffffffffffffff7f, 0xfffffffffffffeff, 0xfffffffffffffdff,
29 0xfffffffffffffbff, 0xfffffffffffff7ff, 0xffffffffffffefff, 0xffffffffffffdfff,
30 0xffffffffffffbfff, 0xffffffffffff7fff, 0xfffffffffffeffff, 0xfffffffffffdffff,
31 0xfffffffffffbffff, 0xfffffffffff7ffff, 0xffffffffffefffff, 0xffffffffffdfffff,
32 0xffffffffffbfffff, 0xffffffffff7fffff, 0xfffffffffeffffff, 0xfffffffffdffffff,
33 0xfffffffffbffffff, 0xfffffffff7ffffff, 0xffffffffefffffff, 0xffffffffdfffffff,
34 0xffffffffbfffffff, 0xffffffff7fffffff, 0xfffffffeffffffff, 0xfffffffdffffffff,
35 0xfffffffbffffffff, 0xfffffff7ffffffff, 0xffffffefffffffff, 0xffffffdfffffffff,
36 0xffffffbfffffffff, 0xffffff7fffffffff, 0xfffffeffffffffff, 0xfffffdffffffffff,
37 0xfffffbffffffffff, 0xfffff7ffffffffff, 0xffffefffffffffff, 0xffffdfffffffffff,
38 0xffffbfffffffffff, 0xffff7fffffffffff, 0xfffeffffffffffff, 0xfffdffffffffffff,
39 0xfffbffffffffffff, 0xfff7ffffffffffff, 0xffefffffffffffff, 0xffdfffffffffffff,
40 0xffbfffffffffffff, 0xff7fffffffffffff, 0xfeffffffffffffff, 0xfdffffffffffffff,
41 0xfbffffffffffffff, 0xf7ffffffffffffff, 0xefffffffffffffff, 0xdfffffffffffffff,
42 0xbfffffffffffffff, 0x7fffffffffffffff};
43
44const uint64_t NULL_LOWER_MASKS[65] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff,
45 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff,
46 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff,
47 0x3fffffff, 0x7fffffff, 0xffffffff, 0x1ffffffff, 0x3ffffffff, 0x7ffffffff, 0xfffffffff,
48 0x1fffffffff, 0x3fffffffff, 0x7fffffffff, 0xffffffffff, 0x1ffffffffff, 0x3ffffffffff,
49 0x7ffffffffff, 0xfffffffffff, 0x1fffffffffff, 0x3fffffffffff, 0x7fffffffffff, 0xffffffffffff,
50 0x1ffffffffffff, 0x3ffffffffffff, 0x7ffffffffffff, 0xfffffffffffff, 0x1fffffffffffff,
51 0x3fffffffffffff, 0x7fffffffffffff, 0xffffffffffffff, 0x1ffffffffffffff, 0x3ffffffffffffff,
52 0x7ffffffffffffff, 0xfffffffffffffff, 0x1fffffffffffffff, 0x3fffffffffffffff,
53 0x7fffffffffffffff, 0xffffffffffffffff};
54const uint64_t NULL_HIGH_MASKS[65] = {0x0, 0x8000000000000000, 0xc000000000000000,
55 0xe000000000000000, 0xf000000000000000, 0xf800000000000000, 0xfc00000000000000,
56 0xfe00000000000000, 0xff00000000000000, 0xff80000000000000, 0xffc0000000000000,
57 0xffe0000000000000, 0xfff0000000000000, 0xfff8000000000000, 0xfffc000000000000,
58 0xfffe000000000000, 0xffff000000000000, 0xffff800000000000, 0xffffc00000000000,
59 0xffffe00000000000, 0xfffff00000000000, 0xfffff80000000000, 0xfffffc0000000000,
60 0xfffffe0000000000, 0xffffff0000000000, 0xffffff8000000000, 0xffffffc000000000,
61 0xffffffe000000000, 0xfffffff000000000, 0xfffffff800000000, 0xfffffffc00000000,
62 0xfffffffe00000000, 0xffffffff00000000, 0xffffffff80000000, 0xffffffffc0000000,
63 0xffffffffe0000000, 0xfffffffff0000000, 0xfffffffff8000000, 0xfffffffffc000000,
64 0xfffffffffe000000, 0xffffffffff000000, 0xffffffffff800000, 0xffffffffffc00000,
65 0xffffffffffe00000, 0xfffffffffff00000, 0xfffffffffff80000, 0xfffffffffffc0000,
66 0xfffffffffffe0000, 0xffffffffffff0000, 0xffffffffffff8000, 0xffffffffffffc000,
67 0xffffffffffffe000, 0xfffffffffffff000, 0xfffffffffffff800, 0xfffffffffffffc00,
68 0xfffffffffffffe00, 0xffffffffffffff00, 0xffffffffffffff80, 0xffffffffffffffc0,
69 0xffffffffffffffe0, 0xfffffffffffffff0, 0xfffffffffffffff8, 0xfffffffffffffffc,
70 0xfffffffffffffffe, 0xffffffffffffffff};
71
72class NullMask {
73public:
74 static constexpr uint64_t NO_NULL_ENTRY = 0;
75 static constexpr uint64_t ALL_NULL_ENTRY = ~uint64_t(NO_NULL_ENTRY);
76 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2 = 6;
77 static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY = (uint64_t)1 << NUM_BITS_PER_NULL_ENTRY_LOG2;
78 static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY = NUM_BITS_PER_NULL_ENTRY >> 3;
79
80 // For creating a managed null mask
81 explicit NullMask(uint64_t capacity) : mayContainNulls{false} {
82 auto numNullEntries = (capacity + NUM_BITS_PER_NULL_ENTRY - 1) / NUM_BITS_PER_NULL_ENTRY;
83 buffer = std::make_unique<uint64_t[]>(numNullEntries);
84 data = std::span(buffer.get(), numNullEntries);
85 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
86 }
87
88 // For creating a null mask using existing data
89 explicit NullMask(std::span<uint64_t> nullData, bool mayContainNulls)
90 : data{nullData}, buffer{}, mayContainNulls{mayContainNulls} {}
91
92 inline void setAllNonNull() {
93 if (!mayContainNulls) {
94 return;
95 }
96 std::fill(data.begin(), data.end(), NO_NULL_ENTRY);
97 mayContainNulls = false;
98 }
99 inline void setAllNull() {
100 std::fill(data.begin(), data.end(), ALL_NULL_ENTRY);
101 mayContainNulls = true;
102 }
103
104 inline bool hasNoNullsGuarantee() const { return !mayContainNulls; }
105 uint64_t countNulls() const;
106
107 static void setNull(uint64_t* nullEntries, uint32_t pos, bool isNull);
108 inline void setNull(uint32_t pos, bool isNull) {
109 KU_ASSERT(pos < getNumNullBits(data));
110 setNull(data.data(), pos, isNull);
111 if (isNull) {
112 mayContainNulls = true;
113 }
114 }
115
116 static inline bool isNull(const uint64_t* nullEntries, uint32_t pos) {
117 auto [entryPos, bitPosInEntry] = getNullEntryAndBitPos(pos);
118 return nullEntries[entryPos] & NULL_BITMASKS_WITH_SINGLE_ONE[bitPosInEntry];
119 }
120
121 static uint64_t getNumNullBits(std::span<uint64_t> data) {
122 return data.size() * NullMask::NUM_BITS_PER_NULL_ENTRY;
123 }
124
125 inline bool isNull(uint32_t pos) const {
126 KU_ASSERT(pos < getNumNullBits(data));
127 return isNull(data.data(), pos);
128 }
129
130 // const because updates to the data must set mayContainNulls if any value
131 // becomes non-null
132 // Modifying the underlying data should be done with setNull or copyFromNullData
133 inline const uint64_t* getData() const { return data.data(); }
134
135 static inline uint64_t getNumNullEntries(uint64_t numNullBits) {
136 return (numNullBits >> NUM_BITS_PER_NULL_ENTRY_LOG2) +
137 ((numNullBits - (numNullBits << NUM_BITS_PER_NULL_ENTRY_LOG2)) == 0 ? 0 : 1);
138 }
139
140 // Copies bitpacked null flags from one buffer to another, starting at an arbitrary bit
141 // offset and preserving adjacent bits.
142 //
143 // returns true if we have copied a nullBit with value 1 (indicates a null value) to
144 // dstNullEntries.
145 static bool copyNullMask(const uint64_t* srcNullEntries, uint64_t srcOffset,
146 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
147
148 inline bool copyFrom(const NullMask& nullMask, uint64_t srcOffset, uint64_t dstOffset,
149 uint64_t numBitsToCopy, bool invert = false) {
150 if (nullMask.hasNoNullsGuarantee()) {
151 setNullFromRange(dstOffset, numBitsToCopy, invert);
152 return invert;
153 } else {
154 return copyFromNullBits(nullMask.getData(), srcOffset, dstOffset, numBitsToCopy,
155 invert);
156 }
157 }
158 bool copyFromNullBits(const uint64_t* srcNullEntries, uint64_t srcOffset, uint64_t dstOffset,
159 uint64_t numBitsToCopy, bool invert = false);
160
161 // Sets the given number of bits to null (if isNull is true) or non-null (if isNull is false),
162 // starting at the offset
163 static void setNullRange(uint64_t* nullEntries, uint64_t offset, uint64_t numBitsToSet,
164 bool isNull);
165
166 void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull);
167
168 void resize(uint64_t capacity);
169
170 void operator|=(const NullMask& other);
171
172 // Fast calculation of the minimum and maximum null values
173 // (essentially just three states, all null, all non-null and some null)
174 static std::pair<bool, bool> getMinMax(const uint64_t* nullEntries, uint64_t offset,
175 uint64_t numValues);
176
177private:
178 static inline std::pair<uint64_t, uint64_t> getNullEntryAndBitPos(uint64_t pos) {
179 auto nullEntryPos = pos >> NUM_BITS_PER_NULL_ENTRY_LOG2;
180 return std::make_pair(nullEntryPos,
181 pos - (nullEntryPos << NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2));
182 }
183
184 static bool copyUnaligned(const uint64_t* srcNullEntries, uint64_t srcOffset,
185 uint64_t* dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert = false);
186
187private:
188 std::span<uint64_t> data;
189 std::unique_ptr<uint64_t[]> buffer;
190 bool mayContainNulls;
191};
192
193} // namespace common
194} // namespace kuzu
#define KU_ASSERT(condition)
Definition assert.h:19
void setAllNonNull()
Definition null_mask.h:92
bool copyFromNullBits(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
NullMask(std::span< uint64_t > nullData, bool mayContainNulls)
Definition null_mask.h:89
bool hasNoNullsGuarantee() const
Definition null_mask.h:104
static bool copyNullMask(const uint64_t *srcNullEntries, uint64_t srcOffset, uint64_t *dstNullEntries, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
void operator|=(const NullMask &other)
void setAllNull()
Definition null_mask.h:99
NullMask(uint64_t capacity)
Definition null_mask.h:81
const uint64_t * getData() const
Definition null_mask.h:133
static constexpr uint64_t NUM_BYTES_PER_NULL_ENTRY
Definition null_mask.h:78
static bool isNull(const uint64_t *nullEntries, uint32_t pos)
Definition null_mask.h:116
static void setNullRange(uint64_t *nullEntries, uint64_t offset, uint64_t numBitsToSet, bool isNull)
bool isNull(uint32_t pos) const
Definition null_mask.h:125
static uint64_t getNumNullEntries(uint64_t numNullBits)
Definition null_mask.h:135
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY
Definition null_mask.h:77
static void setNull(uint64_t *nullEntries, uint32_t pos, bool isNull)
void setNullFromRange(uint64_t offset, uint64_t numBitsToSet, bool isNull)
static std::pair< bool, bool > getMinMax(const uint64_t *nullEntries, uint64_t offset, uint64_t numValues)
bool copyFrom(const NullMask &nullMask, uint64_t srcOffset, uint64_t dstOffset, uint64_t numBitsToCopy, bool invert=false)
Definition null_mask.h:148
void setNull(uint32_t pos, bool isNull)
Definition null_mask.h:108
uint64_t countNulls() const
static constexpr uint64_t NUM_BITS_PER_NULL_ENTRY_LOG2
Definition null_mask.h:76
void resize(uint64_t capacity)
static uint64_t getNumNullBits(std::span< uint64_t > data)
Definition null_mask.h:121
static constexpr uint64_t ALL_NULL_ENTRY
Definition null_mask.h:75
static constexpr uint64_t NO_NULL_ENTRY
Definition null_mask.h:74
Definition array_utils.h:7
const uint64_t NULL_LOWER_MASKS[65]
Definition null_mask.h:44
const uint64_t NULL_HIGH_MASKS[65]
Definition null_mask.h:54
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ZERO[64]
Definition null_mask.h:26
constexpr uint64_t NULL_BITMASKS_WITH_SINGLE_ONE[64]
Definition null_mask.h:16
Definition array_utils.h:7