Browse Source

move external code into lib/

Joseph Surin 3 years ago
  1. 10
  2. 666
  3. 86
  4. 1
  5. 231756
  6. 12237
  7. 0

.gitmodules vendored

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
[submodule "tgbot-cpp"]
path = tgbot-cpp
url =
[submodule "spdlog"]
path = spdlog
[submodule "lib/tgbot-cpp"]
path = lib/tgbot-cpp
url =
[submodule "lib/spdlog"]
path = lib/spdlog
url =


@ -0,0 +1,666 @@ @@ -0,0 +1,666 @@
#pragma once
#include <array>
#include <cstdint>
#include <string_view>
#include <vector>
#ifdef __GNUG__
#pragma GCC target("avx2") // GCC will only compile AVX2 if we tell it to.
#include <tmmintrin.h>
#include <immintrin.h>
#include "CpuFeatures.hpp"
#define _mm256_set_m128i(v0, v1) _mm256_insertf128_si256(_mm256_castsi128_si256(v1), (v0), 1)
namespace base64 {
enum class Codepath {
Auto = 0,
Basic = 1,
SSSE3 = 2,
AVX2 = 3
namespace detail {
// Static look-up table for 6-bit values to 8-bit base64 characters. All values are valid.
constexpr std::string_view Base64LUT{ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" };
// Static look-up table for 8-bit base64 characters to 6-bit values. Invalid values are forced to
// zero (i.e. no validation).
constexpr std::array<uint8_t, 256> Base64InverseLUT = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, // 0x20 - 0x2F
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 0x40 - 0x4F
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, // 0x50 - 0x5F
0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 0x60 - 0x6F
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, // 0x70 - 0x7F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9F
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xA0 - 0xAF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xB0 - 0xBF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xC0 - 0xCF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xD0 - 0xDF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xE0 - 0xEF
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF0 - 0xFF
inline Codepath get_auto_codepath() {
using namespace cpu_features;
auto features = get_features();
if (features & Features::AVX2) {
return Codepath::AVX2;
if (features & Features::SSSE3) {
return Codepath::SSSE3;
return Codepath::Basic;
inline size_t encode_bulk_ssse3(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr
) {
size_t loop_count = (source_data_length / 12);
if (loop_count == 0) {
return 0;
size_t loop_end = (loop_count * 12);
// Code based on work by Wojciech Muła
// Ref:
const __m128i preshuffle_128 = _mm_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
const __m128i t0Mask = _mm_set_epi32(0x0fc0fc00, 0x0fc0fc00, 0x0fc0fc00, 0x0fc0fc00);
const __m128i t1Values = _mm_set_epi32(0x04000040, 0x04000040, 0x04000040, 0x04000040);
const __m128i t2Mask = _mm_set_epi32(0x003f03f0, 0x003f03f0, 0x003f03f0, 0x003f03f0);
const __m128i t3Values = _mm_set_epi32(0x01000010, 0x01000010, 0x01000010, 0x01000010);
const __m128i _51_128 = _mm_set_epi32(0x33333333, 0x33333333, 0x33333333, 0x33333333);
const __m128i _26_128 = _mm_set_epi32(0x1a1a1a1a, 0x1a1a1a1a, 0x1a1a1a1a, 0x1a1a1a1a);
const __m128i _13_128 = _mm_set_epi32(0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d);
const __m128i shiftLUT = _mm_setr_epi8(
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62,
'/' - 63, 'A', 0, 0
for (size_t i = 0; i < loop_end; i += 12, dest_ptr += 16) {
// Load four sets of octets at once.
// [????|dddc|ccbb|baaa]
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i]));
// [?ddd|?ccc|?bbb|?aaa]
b = _mm_shuffle_epi8(b, preshuffle_128);
// t0 = [0000cccc|CC000000|aaaaaa00|00000000]
// t1 = [00000000|00cccccc|00000000|00aaaaaa]
// t2 = [00000000|00dddddd|000000bb|bbbb0000]
// t3 = [00dddddd|00000000|00bbbbbb|00000000]
// unpacked = [00dddddd|00cccccc|00bbbbbb|00aaaaaa]
const __m128i t0 = _mm_and_si128(b, t0Mask);
const __m128i t2 = _mm_and_si128(b, t2Mask);
const __m128i t1 = _mm_mulhi_epu16(t0, t1Values);
const __m128i t3 = _mm_mullo_epi16(t2, t3Values);
const __m128i unpacked = _mm_or_si128(t1, t3);
// Convert to base64 characters without lookup tables
const __m128i reduced = _mm_or_si128(
_mm_subs_epu8(unpacked, _51_128),
_mm_cmpgt_epi8(_26_128, unpacked),
const __m128i result = _mm_add_epi8(
_mm_shuffle_epi8(shiftLUT, reduced),
// Output
return loop_end;
inline size_t encode_bulk_avx2(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr
) {
size_t loop_count = (source_data_length / 24);
if (loop_count == 0) {
return 0;
size_t loop_end = (loop_count * 24);
// Code based on work by Wojciech Muła
// Ref:
const __m256i preshuffle_256 = _mm256_set_epi8(
10, 11, 9, 10,
7, 8, 6, 7,
4, 5, 3, 4,
1, 2, 0, 1,
10, 11, 9, 10,
7, 8, 6, 7,
4, 5, 3, 4,
1, 2, 0, 1
const __m256i t0Mask = _mm256_set1_epi32(0x0fc0fc00);
const __m256i t1Values = _mm256_set1_epi32(0x04000040);
const __m256i t2Mask = _mm256_set1_epi32(0x003f03f0);
const __m256i t3Values = _mm256_set1_epi32(0x01000010);
const __m256i _51_256 = _mm256_set1_epi32(0x33333333);
const __m256i _26_256 = _mm256_set1_epi32(0x1a1a1a1a);
const __m256i _13_256 = _mm256_set1_epi32(0x0d0d0d0d);
const __m256i shiftLUT = _mm256_setr_epi8(
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62,
'/' - 63, 'A', 0, 0,
'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
'0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62,
'/' - 63, 'A', 0, 0
for (size_t i = 0; i < loop_end; i += 24, dest_ptr += 32) {
// Load eight sets of octets at once.
// b_low = [????|dddc|ccbb|baaa]
// b_high = [????|hhhg|ggff|feee]
__m128i b_low = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i]));
__m128i b_high = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i+12]));
// b = [?hhh|?ggg|?fff|?eee|?ddd|?ccc|?bbb|?aaa]
__m256i b = _mm256_shuffle_epi8(
_mm256_set_m128i(b_high, b_low),
// t0 = [0000cccc|CC000000|aaaaaa00|00000000]
// t1 = [00000000|00cccccc|00000000|00aaaaaa]
// t2 = [00000000|00dddddd|000000bb|bbbb0000]
// t3 = [00dddddd|00000000|00bbbbbb|00000000]
// unpacked = [00dddddd|00cccccc|00bbbbbb|00aaaaaa]
const __m256i t0 = _mm256_and_si256(b, t0Mask);
const __m256i t2 = _mm256_and_si256(b, t2Mask);
const __m256i t1 = _mm256_mulhi_epu16(t0, t1Values);
const __m256i t3 = _mm256_mullo_epi16(t2, t3Values);
const __m256i unpacked = _mm256_or_si256(t1, t3);
// Convert to base64 characters without lookup tables
const __m256i reduced = _mm256_or_si256(
_mm256_subs_epu8(unpacked, _51_256),
_mm256_cmpgt_epi8(_26_256, unpacked),
const __m256i result = _mm256_add_epi8(
_mm256_shuffle_epi8(shiftLUT, reduced),
// Output
return loop_end;
inline size_t encode_bulk(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr,
Codepath codepath = Codepath::Auto
) {
if (codepath == Codepath::Auto) {
static auto auto_codepath = get_auto_codepath();
codepath = auto_codepath;
switch (codepath) {
case Codepath::SSSE3: return encode_bulk_ssse3(source_data, source_data_length, dest_ptr);
case Codepath::AVX2: return encode_bulk_avx2(source_data, source_data_length, dest_ptr);
case Codepath::Basic: return 0;
inline size_t decode_bulk_ssse3(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr
) {
size_t loop_count = (source_data_length / 16);
if (loop_count <= 1) {
return 0;
size_t loop_end = (loop_count * 16);
// Code based on work by Wojciech Muła
// Ref:
const __m128i _0f_128 = _mm_set1_epi8(0x0f);
const __m128i _2f_128 = _mm_set1_epi8(0x2f);
const __m128i _n3_128 = _mm_set1_epi8(-3);
const __m128i shiftLUT = _mm_setr_epi8(
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30,
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70,
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00,
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00
const __m128i packValues1 = _mm_set_epi32(0x01400140, 0x01400140, 0x01400140, 0x01400140);
const __m128i packValues2 = _mm_set_epi32(0x00011000, 0x00011000, 0x00011000, 0x00011000);
const __m128i unshuffle_128 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1);
for (size_t i = 0; i < loop_end; i += 16, dest_ptr += 12) {
// Load four sets of octets at once.
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&source_data[i]));
// Base64 characters -> 6-bit unpacked
const __m128i higher_nibble = _mm_and_si128(_mm_srli_epi32(b, 4), _0f_128);
const __m128i eq_2f = _mm_cmpeq_epi8(b, _2f_128);
const __m128i shift = _mm_shuffle_epi8(shiftLUT, higher_nibble);
const __m128i t0 = _mm_add_epi8(b, shift);
const __m128i unpacked = _mm_add_epi8(t0, _mm_and_si128(eq_2f, _n3_128));
// 6-bit unpacked -> 8-bit packed
const __m128i packed = _mm_madd_epi16(
_mm_maddubs_epi16(unpacked, packValues1),
// 8-bit packed -> original order
const __m128i unshuffled = _mm_shuffle_epi8(packed, unshuffle_128);
// Output
_mm_storeu_si128(reinterpret_cast<__m128i*>(dest_ptr), unshuffled);
return loop_end;
inline size_t decode_bulk_avx2(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr
) {
size_t loop_count = (source_data_length / 32);
if (loop_count <= 1) {
return 0;
size_t loop_end = (loop_count * 32);
// Code based on work by Wojciech Muła
// Ref:
const __m256i _0f_256 = _mm256_set1_epi8(0x0f);
const __m256i _2f_256 = _mm256_set1_epi8(0x2f);
const __m256i _n3_256 = _mm256_set1_epi8(-3);
const __m256i shiftLUT = _mm256_setr_epi8(
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30,
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70,
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00,
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00,
/* 0 */ 0x00, /* 1 */ 0x00, /* 2 */ 0x3e - 0x2b, /* 3 */ 0x34 - 0x30,
/* 4 */ 0x00 - 0x41, /* 5 */ 0x0f - 0x50, /* 6 */ 0x1a - 0x61, /* 7 */ 0x29 - 0x70,
/* 8 */ 0x00, /* 9 */ 0x00, /* a */ 0x00, /* b */ 0x00,
/* c */ 0x00, /* d */ 0x00, /* e */ 0x00, /* f */ 0x00
const __m256i packValues1 = _mm256_set1_epi32(0x01400140);
const __m256i packValues2 = _mm256_set1_epi32(0x00011000);
const __m256i unshuffle_256 = _mm256_setr_epi8(
2, 1, 0,
6, 5, 4,
10, 9, 8,
14, 13, 12,
-1, -1, -1, -1,
2, 1, 0,
6, 5, 4,
10, 9, 8,
14, 13, 12,
-1, -1, -1, -1
for (size_t i = 0; i < loop_end; i += 32, dest_ptr += 24) {
// Load eight sets of octets at once.
__m256i b = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(&source_data[i]));
// Base64 characters -> 6-bit unpacked
const __m256i higher_nibble = _mm256_and_si256(_mm256_srli_epi32(b, 4), _0f_256);
const __m256i eq_2f = _mm256_cmpeq_epi8(b, _2f_256);
const __m256i shift = _mm256_shuffle_epi8(shiftLUT, higher_nibble);
const __m256i t0 = _mm256_add_epi8(b, shift);
const __m256i unpacked = _mm256_add_epi8(t0, _mm256_and_si256(eq_2f, _n3_256));
// 6-bit unpacked -> 8-bit packed
const __m256i packed = _mm256_madd_epi16(
_mm256_maddubs_epi16(unpacked, packValues1),
// 8-bit packed -> original order
const __m256i unshuffled = _mm256_shuffle_epi8(packed, unshuffle_256);
// Output
_mm256_extracti128_si256(unshuffled, 0)
_mm256_extracti128_si256(unshuffled, 1)
return loop_end;
inline size_t decode_bulk(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t*& dest_ptr,
Codepath codepath = Codepath::Auto
) {
if (codepath == Codepath::Auto) {
static auto auto_codepath = get_auto_codepath();
codepath = auto_codepath;
switch (codepath) {
case Codepath::SSSE3: return decode_bulk_ssse3(source_data, source_data_length, dest_ptr);
case Codepath::AVX2: return decode_bulk_avx2(source_data, source_data_length, dest_ptr);
case Codepath::Basic: return 0;
// Helper to determine the size of an encoded base64 buffer.
inline size_t get_encoded_length(size_t binary_length, bool padded = true) {
if (padded) {
return (binary_length + 2) / 3 * 4;
} else {
size_t remainder = binary_length % 3;
size_t length = (binary_length / 3) * 4;
if (remainder) {
length += remainder + 1;
return length;
// Helper to determine the size of a decoded binary buffer, given the source base64 data.
inline size_t get_decoded_length(const uint8_t* data, const size_t data_length) {
if (data_length == 0) {
return 0;
size_t octet_count = data_length / 4;
size_t remainder = data_length % 4;
if (remainder != 0) {
// Unpadded data
return (octet_count * 3) + (remainder - 1);
// Either binary % 3 == 0 || padded
octet_count *= 3;
if (data[data_length-2] == '=') {
return octet_count - 2;
} else if (data[data_length-1] == '=') {
return octet_count - 1;
return octet_count;
// Primary base64 encoding method. Asserts that the destination buffer is _exactly_ the required size.
inline void encode(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t* dest_data,
const size_t dest_data_length,
bool padded = true,
Codepath codepath = Codepath::Auto
) {
if (get_encoded_length(source_data_length, padded) != dest_data_length) {
throw std::logic_error("Dest buffer is incorrect size");
// Use bulk vectorized encoding for as much data as possible.
auto dest_ptr = dest_data;
size_t loop_end = detail::encode_bulk(source_data, source_data_length, dest_ptr, codepath);
size_t remainder = source_data_length - loop_end;
size_t octet_count = (remainder / 3);
size_t octet_end = loop_end + (octet_count * 3);
// Process three source values at a time.
for (size_t i = loop_end; i < octet_end; i += 3, dest_ptr += 4) {
uint8_t b0 = source_data[i ];
uint8_t b1 = source_data[i+1];
uint8_t b2 = source_data[i+2];
dest_ptr[0] = detail::Base64LUT[b0 >> 2];
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4 | b1 >> 4];
dest_ptr[2] = detail::Base64LUT[(b1 & 0x0F) << 2 | b2 >> 6];
dest_ptr[3] = detail::Base64LUT[b2 & 0x3F];
// Handle the remaining values separately to avoid branches the main loop.
remainder = source_data_length - octet_end;
if (remainder == 2) {
uint8_t b0 = source_data[octet_end ];
uint8_t b1 = source_data[octet_end+1];
dest_ptr[0] = detail::Base64LUT[b0 >> 2];
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4 | b1 >> 4];
dest_ptr[2] = detail::Base64LUT[(b1 & 0x0F) << 2];
if (padded) {
dest_ptr[3] = '=';
} else if (remainder == 1) {
uint8_t b0 = source_data[octet_end];
dest_ptr[0] = detail::Base64LUT[b0 >> 2];
dest_ptr[1] = detail::Base64LUT[(b0 & 0x03) << 4];
if (padded) {
dest_ptr[2] = '=';
dest_ptr[3] = '=';
// Helper to encode directly to a std::string.
inline std::string encode_to_string(
const uint8_t* source_data,
const size_t source_data_length,
bool padded = true,
Codepath codepath = Codepath::Auto
) {
std::string str(
get_encoded_length(source_data_length, padded),
return str;
// Helper to encode directly to a std::vector. This is slightly faster than std::string as it doesn't
// need to initialize the buffer before encoding.
inline std::vector<uint8_t> encode_to_byte_vector(
const uint8_t* source_data,
const size_t source_data_length,
bool padded = true,
Codepath codepath = Codepath::Auto
) {
std::vector<uint8_t> buf;
buf.resize(get_encoded_length(source_data_length, padded));
return buf;
// Primary base64 decoding method. Asserts that the destination buffer is _exactly_ the required size.
inline void decode(
const uint8_t* source_data,
const size_t source_data_length,
uint8_t* dest_data,
const size_t dest_data_length,
Codepath codepath = Codepath::Auto
) {
size_t binary_length = get_decoded_length(source_data, source_data_length);
if (binary_length != dest_data_length) {
throw std::logic_error("Dest buffer is incorrect size");
auto dest_ptr = dest_data;
size_t loop_end = detail::decode_bulk(source_data, source_data_length, dest_ptr, codepath);
size_t binary_remainder = dest_data_length - std::distance(dest_data, dest_ptr);
size_t octet_count = binary_remainder / 3;
size_t octet_end = loop_end + (octet_count * 4);
// Process four source values at a time.
for (size_t i = loop_end; i < octet_end; i += 4, dest_ptr += 3) {
uint8_t b0 = detail::Base64InverseLUT[source_data[i ]];
uint8_t b1 = detail::Base64InverseLUT[source_data[i+1]];
uint8_t b2 = detail::Base64InverseLUT[source_data[i+2]];
uint8_t b3 = detail::Base64InverseLUT[source_data[i+3]];
dest_ptr[0] = b0 << 2 | b1 >> 4;
dest_ptr[1] = b1 << 4 | b2 >> 2;
dest_ptr[2] = b2 << 6 | b3;
// Handle the remaining values separately to avoid branches the main loop.
binary_remainder -= (octet_count * 3);
if (binary_remainder == 2) {
uint8_t b0 = detail::Base64InverseLUT[source_data[octet_end ]];
uint8_t b1 = detail::Base64InverseLUT[source_data[octet_end+1]];
uint8_t b2 = detail::Base64InverseLUT[source_data[octet_end+2]];
dest_ptr[0] = b0 << 2 | b1 >> 4;
dest_ptr[1] = b1 << 4 | b2 >> 2;
} else if (binary_remainder == 1) {
uint8_t b0 = detail::Base64InverseLUT[source_data[octet_end ]];
uint8_t b1 = detail::Base64InverseLUT[source_data[octet_end+1]];
dest_ptr[0] = b0 << 2 | b1 >> 4;
// Helper to decode directly to a std::string.
inline std::string decode_to_string(
const uint8_t* source_data,
const size_t source_data_length,
Codepath codepath = Codepath::Auto
) {
std::string str(get_decoded_length(source_data, source_data_length), '\0');
return str;
// Helper to decode directly to a std::vector. This is slightly faster than std::string as it doesn't
// need to initialize the buffer before decoding.
inline std::vector<uint8_t> decode_to_vector(
const uint8_t* source_data,
const size_t source_data_length,
Codepath codepath = Codepath::Auto
) {
std::vector<uint8_t> buf;
buf.resize(get_decoded_length(source_data, source_data_length));
return buf;


@ -0,0 +1,86 @@ @@ -0,0 +1,86 @@
#pragma once
#include <array>
#ifdef _MSC_VER
#include <intrin.h>
#ifdef __GNUG__
#include <cpuid.h>
namespace cpu_features {
enum Features : uint64_t {
None = 0,
SSE = 1 << 0,
SSE2 = 1 << 1,
SSE3 = 1 << 2,
SSSE3 = 1 << 3,
SSE4_1 = 1 << 4,
SSE4_2 = 1 << 5,
AVX = 1 << 6,
AVX2 = 1 << 7,
AVX512F = 1 << 8,
AVX512PF = 1 << 9,
AVX512ER = 1 << 10,
AVX512CD = 1 << 11,
namespace detail {
#ifdef _MSC_VER
inline void cpuid(std::array<int, 4>& info, int level) {
__cpuid(, level);
#ifdef __GNUG__
inline void cpuid(std::array<int, 4>& info, int level) {
auto ptr = reinterpret_cast<unsigned int*>(;
if (level == 1) {
__get_cpuid(level, &ptr[0], &ptr[1], &ptr[2], &ptr[3]);
} else {
__cpuid_count(level, 0, ptr[0], ptr[1], ptr[2], ptr[3]);
inline Features get_features_impl() {
std::array<int, 4> info = {0};
cpuid(info, 0);
int feature_levels = info[0];
// Feature level 1 always exists
cpuid(info, 1);
std::underlying_type_t<Features> features = Features::None;
if (info[3] & (1 << 25)) { features |= Features::SSE; }
if (info[3] & (1 << 26)) { features |= Features::SSE2; }
if (info[2] & (1 << 0)) { features |= Features::SSE3; }
if (info[2] & (1 << 9)) { features |= Features::SSSE3; }
if (info[2] & (1 << 19)) { features |= Features::SSE4_1; }
if (info[2] & (1 << 20)) { features |= Features::SSE4_2; }
if (info[2] & (1 << 28)) { features |= Features::AVX; }
// Feature level 7
if (feature_levels >= 7) {
std::array<int, 4> info7 = {0};
cpuid(info7, 7);
if (info7[1] & (1 << 5)) { features |= Features::AVX2; }
if (info7[1] & (1 << 16)) { features |= Features::AVX512F; }
if (info7[1] & (1 << 26)) { features |= Features::AVX512PF; }
if (info7[1] & (1 << 27)) { features |= Features::AVX512ER; }
if (info7[1] & (1 << 28)) { features |= Features::AVX512CD; }
return static_cast<Features>(features);
inline Features get_features() {
static Features s_features = detail::get_features_impl();
return s_features;


@ -0,0 +1 @@ @@ -0,0 +1 @@
Subproject commit ff6e3c95f2dc038c19e220afce1d045137b1cb24


File diff suppressed because it is too large Load Diff


File diff suppressed because it is too large Load Diff

tgbot-cpp → lib/tgbot-cpp
