You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
666 lines
28 KiB
666 lines
28 KiB
#pragma once |
|
|
|
#include <array> |
|
#include <cstdint> |
|
#include <string_view> |
|
#include <vector> |
|
|
|
#ifdef __GNUG__ |
|
#pragma GCC target("avx2") // GCC will only compile AVX2 if we tell it to. |
|
#endif |
|
|
|
#include <tmmintrin.h> |
|
#include <immintrin.h> |
|
|
|
#include "CpuFeatures.hpp" |
|
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1) |
|
|
|
namespace base64 { |
|
enum class Codepath { |
|
Auto = 0, |
|
Basic = 1, |
|
SSSE3 = 2, |
|
AVX2 = 3 |
|
}; |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
namespace detail { |
|
// Static look-up table for 6-bit values to 8-bit base64 characters. All values are valid. |
|
constexpr std::string_view Base64LUT{ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" }; |
|
|
|
// Static look-up table for 8-bit base64 characters to 6-bit values. Invalid values are forced to |
|
// zero (i.e. no validation). |
|
constexpr std::array<uint8_t, 256> Base64InverseLUT = { |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, // 0x20 - 0x2F |
|
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F |
|
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 0x40 - 0x4F |
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, // 0x50 - 0x5F |
|
0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 0x60 - 0x6F |
|
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, // 0x70 - 0x7F |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF0 - 0xFF |
|
}; |
|
|
|
inline Codepath get_auto_codepath() { |
|
using namespace cpu_features; |
|
auto features = get_features(); |
|
if (features & Features::AVX2) { |
|
return Codepath::AVX2; |
|
} |
|
if (features & Features::SSSE3) { |
|
return Codepath::SSSE3; |
|
} |
|
return Codepath::Basic; |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
//---------------------------------------------------------------------------------------------------- |
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t encode_bulk_ssse3( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr |
|
) { |
|
size_t loop_count = (source_data_length / 12); |
|
if (loop_count == 0) { |
|
return 0; |
|
} |
|
|
|
size_t loop_end = (loop_count * 12); |
|
|
|
// Code based on work by Wojciech Muła |
|
// Ref: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html |
|
const __m128i preshuffle_128 = _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1); |
|
const __m128i t0Mask = _mm_set_epi32(0x0fc0fc00, 0x0fc0fc00, 0x0fc0fc00, 0x0fc0fc00); |
|
const __m128i t1Values = _mm_set_epi32(0x04000040, 0x04000040, 0x04000040, 0x04000040); |
|
const __m128i t2Mask = _mm_set_epi32(0x003f03f0, 0x003f03f0, 0x003f03f0, 0x003f03f0); |
|
const __m128i t3Values = _mm_set_epi32(0x01000010, 0x01000010, 0x01000010, 0x01000010); |
|
const __m128i _51_128 = _mm_set_epi32(0x33333333, 0x33333333, 0x33333333, 0x33333333); |
|
const __m128i _26_128 = _mm_set_epi32(0x1a1a1a1a, 0x1a1a1a1a, 0x1a1a1a1a, 0x1a1a1a1a); |
|
const __m128i _13_128 = _mm_set_epi32(0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d); |
|
const __m128i shiftLUT = _mm_setr_epi8( |
|
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
|
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
|
'/' - 63, 'A', 0, 0 |
|
); |
|
|
|
for (size_t i = 0; i < loop_end; i += 12, dest_ptr += 16) { |
|
// Load four sets of octets at once. |
|
// [????|dddc|ccbb|baaa] |
|
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i])); |
|
|
|
// [?ddd|?ccc|?bbb|?aaa] |
|
b = _mm_shuffle_epi8(b, preshuffle_128); |
|
|
|
// t0 = [0000cccc|CC000000|aaaaaa00|00000000] |
|
// t1 = [00000000|00cccccc|00000000|00aaaaaa] |
|
// t2 = [00000000|00dddddd|000000bb|bbbb0000] |
|
// t3 = [00dddddd|00000000|00bbbbbb|00000000] |
|
// unpacked = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] |
|
const __m128i t0 = _mm_and_si128(b, t0Mask); |
|
const __m128i t2 = _mm_and_si128(b, t2Mask); |
|
const __m128i t1 = _mm_mulhi_epu16(t0, t1Values); |
|
const __m128i t3 = _mm_mullo_epi16(t2, t3Values); |
|
const __m128i unpacked = _mm_or_si128(t1, t3); |
|
|
|
// Convert to base64 characters without lookup tables |
|
const __m128i reduced = _mm_or_si128( |
|
_mm_subs_epu8(unpacked, _51_128), |
|
_mm_and_si128( |
|
_mm_cmpgt_epi8(_26_128, unpacked), |
|
_13_128 |
|
) |
|
); |
|
const __m128i result = _mm_add_epi8( |
|
_mm_shuffle_epi8(shiftLUT, reduced), |
|
unpacked |
|
); |
|
|
|
// Output |
|
_mm_storeu_si128( |
|
reinterpret_cast<__m128i*>(dest_ptr), |
|
result |
|
); |
|
} |
|
|
|
return loop_end; |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t encode_bulk_avx2( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr |
|
) { |
|
size_t loop_count = (source_data_length / 24); |
|
if (loop_count == 0) { |
|
return 0; |
|
} |
|
|
|
size_t loop_end = (loop_count * 24); |
|
|
|
// Code based on work by Wojciech Muła |
|
// Ref: http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html |
|
const __m256i preshuffle_256 = _mm256_set_epi8( |
|
10, 11, 9, 10, |
|
7, 8, 6, 7, |
|
4, 5, 3, 4, |
|
1, 2, 0, 1, |
|
10, 11, 9, 10, |
|
7, 8, 6, 7, |
|
4, 5, 3, 4, |
|
1, 2, 0, 1 |
|
); |
|
const __m256i t0Mask = _mm256_set1_epi32(0x0fc0fc00); |
|
const __m256i t1Values = _mm256_set1_epi32(0x04000040); |
|
const __m256i t2Mask = _mm256_set1_epi32(0x003f03f0); |
|
const __m256i t3Values = _mm256_set1_epi32(0x01000010); |
|
const __m256i _51_256 = _mm256_set1_epi32(0x33333333); |
|
const __m256i _26_256 = _mm256_set1_epi32(0x1a1a1a1a); |
|
const __m256i _13_256 = _mm256_set1_epi32(0x0d0d0d0d); |
|
const __m256i shiftLUT = _mm256_setr_epi8( |
|
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
|
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
|
'/' - 63, 'A', 0, 0, |
|
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
|
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
|
'/' - 63, 'A', 0, 0 |
|
); |
|
|
|
for (size_t i = 0; i < loop_end; i += 24, dest_ptr += 32) { |
|
// Load eight sets of octets at once. |
|
// b_low = [????|dddc|ccbb|baaa] |
|
// b_high = [????|hhhg|ggff|feee] |
|
__m128i b_low = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i])); |
|
__m128i b_high = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i+12])); |
|
|
|
// b = [?hhh|?ggg|?fff|?eee|?ddd|?ccc|?bbb|?aaa] |
|
__m256i b = _mm256_shuffle_epi8( |
|
_mm256_set_m128i(b_high, b_low), |
|
preshuffle_256 |
|
); |
|
|
|
// t0 = [0000cccc|CC000000|aaaaaa00|00000000] |
|
// t1 = [00000000|00cccccc|00000000|00aaaaaa] |
|
// t2 = [00000000|00dddddd|000000bb|bbbb0000] |
|
// t3 = [00dddddd|00000000|00bbbbbb|00000000] |
|
// unpacked = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] |
|
const __m256i t0 = _mm256_and_si256(b, t0Mask); |
|
const __m256i t2 = _mm256_and_si256(b, t2Mask); |
|
const __m256i t1 = _mm256_mulhi_epu16(t0, t1Values); |
|
const __m256i t3 = _mm256_mullo_epi16(t2, t3Values); |
|
const __m256i unpacked = _mm256_or_si256(t1, t3); |
|
|
|
// Convert to base64 characters without lookup tables |
|
const __m256i reduced = _mm256_or_si256( |
|
_mm256_subs_epu8(unpacked, _51_256), |
|
_mm256_and_si256( |
|
_mm256_cmpgt_epi8(_26_256, unpacked), |
|
_13_256 |
|
) |
|
); |
|
const __m256i result = _mm256_add_epi8( |
|
_mm256_shuffle_epi8(shiftLUT, reduced), |
|
unpacked |
|
); |
|
|
|
// Output |
|
_mm256_storeu_si256( |
|
reinterpret_cast<__m256i*>(dest_ptr), |
|
result |
|
); |
|
} |
|
|
|
return loop_end; |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t encode_bulk( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
if (codepath == Codepath::Auto) { |
|
static auto auto_codepath = get_auto_codepath(); |
|
codepath = auto_codepath; |
|
} |
|
|
|
switch (codepath) { |
|
case Codepath::SSSE3: return encode_bulk_ssse3(source_data, source_data_length, dest_ptr); |
|
case Codepath::AVX2: return encode_bulk_avx2(source_data, source_data_length, dest_ptr); |
|
default: |
|
case Codepath::Basic: return 0; |
|
} |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
//---------------------------------------------------------------------------------------------------- |
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t decode_bulk_ssse3( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr |
|
) { |
|
size_t loop_count = (source_data_length / 16); |
|
if (loop_count <= 1) { |
|
return 0; |
|
} |
|
|
|
loop_count--; |
|
size_t loop_end = (loop_count * 16); |
|
|
|
// Code based on work by Wojciech Muła |
|
// Ref: http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html |
|
const __m128i _0f_128 = _mm_set1_epi8(0x0f); |
|
const __m128i _2f_128 = _mm_set1_epi8(0x2f); |
|
const __m128i _n3_128 = _mm_set1_epi8(-3); |
|
const __m128i shiftLUT = _mm_setr_epi8( |
|
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30, |
|
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70, |
|
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00, |
|
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00 |
|
); |
|
const __m128i packValues1 = _mm_set_epi32(0x01400140, 0x01400140, 0x01400140, 0x01400140); |
|
const __m128i packValues2 = _mm_set_epi32(0x00011000, 0x00011000, 0x00011000, 0x00011000); |
|
const __m128i unshuffle_128 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1); |
|
|
|
for (size_t i = 0; i < loop_end; i += 16, dest_ptr += 12) { |
|
// Load four sets of octets at once. |
|
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i])); |
|
|
|
// Base64 characters -> 6-bit unpacked |
|
const __m128i higher_nibble = _mm_and_si128(_mm_srli_epi32(b, 4), _0f_128); |
|
const __m128i eq_2f = _mm_cmpeq_epi8(b, _2f_128); |
|
|
|
const __m128i shift = _mm_shuffle_epi8(shiftLUT, higher_nibble); |
|
const __m128i t0 = _mm_add_epi8(b, shift); |
|
const __m128i unpacked = _mm_add_epi8(t0, _mm_and_si128(eq_2f, _n3_128)); |
|
|
|
// 6-bit unpacked -> 8-bit packed |
|
const __m128i packed = _mm_madd_epi16( |
|
_mm_maddubs_epi16(unpacked, packValues1), |
|
packValues2 |
|
); |
|
|
|
// 8-bit packed -> original order |
|
const __m128i unshuffled = _mm_shuffle_epi8(packed, unshuffle_128); |
|
|
|
// Output |
|
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest_ptr), unshuffled); |
|
} |
|
|
|
return loop_end; |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t decode_bulk_avx2( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr |
|
) { |
|
size_t loop_count = (source_data_length / 32); |
|
if (loop_count <= 1) { |
|
return 0; |
|
} |
|
|
|
loop_count--; |
|
size_t loop_end = (loop_count * 32); |
|
|
|
// Code based on work by Wojciech Muła |
|
// Ref: http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html |
|
const __m256i _0f_256 = _mm256_set1_epi8(0x0f); |
|
const __m256i _2f_256 = _mm256_set1_epi8(0x2f); |
|
const __m256i _n3_256 = _mm256_set1_epi8(-3); |
|
const __m256i shiftLUT = _mm256_setr_epi8( |
|
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30, |
|
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70, |
|
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00, |
|
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00, |
|
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30, |
|
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70, |
|
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00, |
|
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00 |
|
); |
|
const __m256i packValues1 = _mm256_set1_epi32(0x01400140); |
|
const __m256i packValues2 = _mm256_set1_epi32(0x00011000); |
|
const __m256i unshuffle_256 = _mm256_setr_epi8( |
|
2, 1, 0, |
|
6, 5, 4, |
|
10, 9, 8, |
|
14, 13, 12, |
|
-1, -1, -1, -1, |
|
2, 1, 0, |
|
6, 5, 4, |
|
10, 9, 8, |
|
14, 13, 12, |
|
-1, -1, -1, -1 |
|
); |
|
|
|
for (size_t i = 0; i < loop_end; i += 32, dest_ptr += 24) { |
|
// Load eight sets of octets at once. |
|
__m256i b = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&source_data[i])); |
|
|
|
// Base64 characters -> 6-bit unpacked |
|
const __m256i higher_nibble = _mm256_and_si256(_mm256_srli_epi32(b, 4), _0f_256); |
|
const __m256i eq_2f = _mm256_cmpeq_epi8(b, _2f_256); |
|
|
|
const __m256i shift = _mm256_shuffle_epi8(shiftLUT, higher_nibble); |
|
const __m256i t0 = _mm256_add_epi8(b, shift); |
|
const __m256i unpacked = _mm256_add_epi8(t0, _mm256_and_si256(eq_2f, _n3_256)); |
|
|
|
// 6-bit unpacked -> 8-bit packed |
|
const __m256i packed = _mm256_madd_epi16( |
|
_mm256_maddubs_epi16(unpacked, packValues1), |
|
packValues2 |
|
); |
|
|
|
// 8-bit packed -> original order |
|
const __m256i unshuffled = _mm256_shuffle_epi8(packed, unshuffle_256); |
|
|
|
// Output |
|
_mm_storeu_si128( |
|
reinterpret_cast<__m128i*>(dest_ptr), |
|
_mm256_extracti128_si256(unshuffled, 0) |
|
); |
|
_mm_storeu_si128( |
|
reinterpret_cast<__m128i*>(dest_ptr+12), |
|
_mm256_extracti128_si256(unshuffled, 1) |
|
); |
|
} |
|
|
|
return loop_end; |
|
} |
|
|
|
//---------------------------------------------------------------------------------------------------- |
|
|
|
inline size_t decode_bulk( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t*& dest_ptr, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
if (codepath == Codepath::Auto) { |
|
static auto auto_codepath = get_auto_codepath(); |
|
codepath = auto_codepath; |
|
} |
|
|
|
switch (codepath) { |
|
case Codepath::SSSE3: return decode_bulk_ssse3(source_data, source_data_length, dest_ptr); |
|
case Codepath::AVX2: return decode_bulk_avx2(source_data, source_data_length, dest_ptr); |
|
default: |
|
case Codepath::Basic: return 0; |
|
} |
|
} |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Helper to determine the size of an encoded base64 buffer. |
|
inline size_t get_encoded_length(size_t binary_length, bool padded = true) { |
|
if (padded) { |
|
return (binary_length + 2) / 3 * 4; |
|
} else { |
|
size_t remainder = binary_length % 3; |
|
size_t length = (binary_length / 3) * 4; |
|
if (remainder) { |
|
length += remainder + 1; |
|
} |
|
return length; |
|
} |
|
} |
|
|
|
// Helper to determine the size of a decoded binary buffer, given the source base64 data. |
|
inline size_t get_decoded_length(const uint8_t* data, const size_t data_length) { |
|
if (data_length == 0) { |
|
return 0; |
|
} |
|
|
|
size_t octet_count = data_length / 4; |
|
size_t remainder = data_length % 4; |
|
if (remainder != 0) { |
|
// Unpadded data |
|
return (octet_count * 3) + (remainder - 1); |
|
} |
|
|
|
// Either binary % 3 == 0 || padded |
|
octet_count *= 3; |
|
if (data[data_length-2] == '=') { |
|
return octet_count - 2; |
|
} else if (data[data_length-1] == '=') { |
|
return octet_count - 1; |
|
} |
|
|
|
return octet_count; |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Primary base64 encoding method. Asserts that the destination buffer is _exactly_ the required size. |
|
inline void encode( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t* dest_data, |
|
const size_t dest_data_length, |
|
bool padded = true, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
if (get_encoded_length(source_data_length, padded) != dest_data_length) { |
|
throw std::logic_error("Dest buffer is incorrect size"); |
|
} |
|
|
|
// Use bulk vectorized encoding for as much data as possible. |
|
auto dest_ptr = dest_data; |
|
size_t loop_end = detail::encode_bulk(source_data, source_data_length, dest_ptr, codepath); |
|
|
|
size_t remainder = source_data_length - loop_end; |
|
size_t octet_count = (remainder / 3); |
|
size_t octet_end = loop_end + (octet_count * 3); |
|
|
|
// Process three source values at a time. |
|
for (size_t i = loop_end; i < octet_end; i += 3, dest_ptr += 4) { |
|
uint8_t b0 = source_data[i ]; |
|
uint8_t b1 = source_data[i+1]; |
|
uint8_t b2 = source_data[i+2]; |
|
|
|
dest_ptr[0] = detail::Base64LUT[b0 >> 2]; |
|
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4 | b1 >> 4]; |
|
dest_ptr[2] = detail::Base64LUT[(b1 & 0x0F) << 2 | b2 >> 6]; |
|
dest_ptr[3] = detail::Base64LUT[b2 & 0x3F]; |
|
} |
|
|
|
// Handle the remaining values separately to avoid branches the main loop. |
|
remainder = source_data_length - octet_end; |
|
if (remainder == 2) { |
|
uint8_t b0 = source_data[octet_end ]; |
|
uint8_t b1 = source_data[octet_end+1]; |
|
|
|
dest_ptr[0] = detail::Base64LUT[b0 >> 2]; |
|
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4 | b1 >> 4]; |
|
dest_ptr[2] = detail::Base64LUT[(b1 & 0x0F) << 2]; |
|
if (padded) { |
|
dest_ptr[3] = '='; |
|
} |
|
|
|
} else if (remainder == 1) { |
|
uint8_t b0 = source_data[octet_end]; |
|
|
|
dest_ptr[0] = detail::Base64LUT[b0 >> 2]; |
|
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4]; |
|
|
|
if (padded) { |
|
dest_ptr[2] = '='; |
|
dest_ptr[3] = '='; |
|
} |
|
} |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Helper to encode directly to a std::string. |
|
inline std::string encode_to_string( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
bool padded = true, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
std::string str( |
|
get_encoded_length(source_data_length, padded), |
|
'=' |
|
); |
|
|
|
encode( |
|
source_data, |
|
source_data_length, |
|
reinterpret_cast<uint8_t*>(str.data()), |
|
str.size(), |
|
padded, |
|
codepath |
|
); |
|
|
|
return str; |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Helper to encode directly to a std::vector. This is slightly faster than std::string as it doesn't |
|
// need to initialize the buffer before encoding. |
|
inline std::vector<uint8_t> encode_to_byte_vector( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
bool padded = true, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
std::vector<uint8_t> buf; |
|
buf.resize(get_encoded_length(source_data_length, padded)); |
|
|
|
encode( |
|
source_data, |
|
source_data_length, |
|
buf.data(), |
|
buf.size(), |
|
padded, |
|
codepath |
|
); |
|
|
|
return buf; |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Primary base64 decoding method. Asserts that the destination buffer is _exactly_ the required size. |
|
inline void decode( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
uint8_t* dest_data, |
|
const size_t dest_data_length, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
size_t binary_length = get_decoded_length(source_data, source_data_length); |
|
if (binary_length != dest_data_length) { |
|
throw std::logic_error("Dest buffer is incorrect size"); |
|
} |
|
|
|
auto dest_ptr = dest_data; |
|
size_t loop_end = detail::decode_bulk(source_data, source_data_length, dest_ptr, codepath); |
|
|
|
size_t binary_remainder = dest_data_length - std::distance(dest_data, dest_ptr); |
|
size_t octet_count = binary_remainder / 3; |
|
size_t octet_end = loop_end + (octet_count * 4); |
|
|
|
// Process four source values at a time. |
|
for (size_t i = loop_end; i < octet_end; i += 4, dest_ptr += 3) { |
|
uint8_t b0 = detail::Base64InverseLUT[source_data[i ]]; |
|
uint8_t b1 = detail::Base64InverseLUT[source_data[i+1]]; |
|
uint8_t b2 = detail::Base64InverseLUT[source_data[i+2]]; |
|
uint8_t b3 = detail::Base64InverseLUT[source_data[i+3]]; |
|
|
|
dest_ptr[0] = b0 << 2 | b1 >> 4; |
|
dest_ptr[1] = b1 << 4 | b2 >> 2; |
|
dest_ptr[2] = b2 << 6 | b3; |
|
} |
|
|
|
// Handle the remaining values separately to avoid branches the main loop. |
|
binary_remainder -= (octet_count * 3); |
|
if (binary_remainder == 2) { |
|
uint8_t b0 = detail::Base64InverseLUT[source_data[octet_end ]]; |
|
uint8_t b1 = detail::Base64InverseLUT[source_data[octet_end+1]]; |
|
uint8_t b2 = detail::Base64InverseLUT[source_data[octet_end+2]]; |
|
|
|
dest_ptr[0] = b0 << 2 | b1 >> 4; |
|
dest_ptr[1] = b1 << 4 | b2 >> 2; |
|
|
|
} else if (binary_remainder == 1) { |
|
uint8_t b0 = detail::Base64InverseLUT[source_data[octet_end ]]; |
|
uint8_t b1 = detail::Base64InverseLUT[source_data[octet_end+1]]; |
|
|
|
dest_ptr[0] = b0 << 2 | b1 >> 4; |
|
} |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Helper to decode directly to a std::string. |
|
inline std::string decode_to_string( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
std::string str(get_decoded_length(source_data, source_data_length), '\0'); |
|
|
|
decode( |
|
source_data, |
|
source_data_length, |
|
reinterpret_cast<uint8_t*>(str.data()), |
|
str.size(), |
|
codepath |
|
); |
|
|
|
return str; |
|
} |
|
|
|
//-------------------------------------------------------------------------------------------------------- |
|
|
|
// Helper to decode directly to a std::vector. This is slightly faster than std::string as it doesn't |
|
// need to initialize the buffer before decoding. |
|
inline std::vector<uint8_t> decode_to_vector( |
|
const uint8_t* source_data, |
|
const size_t source_data_length, |
|
Codepath codepath = Codepath::Auto |
|
) { |
|
std::vector<uint8_t> buf; |
|
buf.resize(get_decoded_length(source_data, source_data_length)); |
|
|
|
decode( |
|
source_data, |
|
source_data_length, |
|
buf.data(), |
|
buf.size(), |
|
codepath |
|
); |
|
|
|
return buf; |
|
} |
|
|
|
}
|
|
|