Files
2026-06-01 12:46:52 +02:00

424 lines
18 KiB
C++

#ifndef BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
#define BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>
#include <algorithm>
#include <iterator>
#include <string>
namespace boost {
namespace network {
namespace utils {
// Implements a BASE64 converter working on an iterator range.
// If the input sequence does not end at the three-byte boundary, the last
// encoded value part is remembered in an encoding state to be able to
// continue with the next chunk; the BASE64 encoding processes the input
// by byte-triplets.
//
// Summarized interface:
//
// struct state<Value> {
// bool empty () const;
// void clear();
// }
//
// OutputIterator encode(InputIterator begin, InputIterator end,
// OutputIterator output, State & rest)
// OutputIterator encode_rest(OutputIterator output, State & rest)
// OutputIterator encode(InputRange const & input, OutputIterator output,
// State & rest)
// OutputIterator encode(char const * value, OutputIterator output,
// state<char> & rest)
// std::basic_string<Char> encode(InputRange const & value, State & rest)
// std::basic_string<Char> encode(char const * value, state<char> & rest)
//
// OutputIterator encode(InputIterator begin, InputIterator end,
// OutputIterator output)
// OutputIterator encode(InputRange const & input, OutputIterator output)
// OutputIterator encode(char const * value, OutputIterator output)
// std::basic_string<Char> encode(InputRange const & value)
// std::basic_string<Char> encode(char const * value) {
//
// See also http://libb64.sourceforge.net, which served as inspiration.
// See also http://tools.ietf.org/html/rfc4648 for the specification.
namespace base64 {
namespace detail {
// Picks a character from the output alphabet for another 6-bit value
// from the input sequence to encode.
template <typename Value>
char encode_value(Value value) {
static char const encoding[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
return encoding[static_cast<unsigned int>(value)];
}
} // namespace detail
// Stores the state after processing the last chunk by the encoder. If the
// chunk byte-length is not divisible by three, the last (incomplete) value
// quantum canot be encoded right away; it has to wait for the next chunk
// of octets which will be processed joined (as if the trailing rest from
// the previous one was at its beinning).
template <typename Value>
struct state {
state() : triplet_index(0), last_encoded_value(0) {}
state(state<Value> const & source)
: triplet_index(source.triplet_index),
last_encoded_value(source.last_encoded_value) {}
bool empty() const { return triplet_index == 0; }
void clear() {
// indicate that no rest has been left in the last encoded value
// and no padding is needed for the encoded output
triplet_index = 0;
// the last encoded value, which may have been left from the last
// encoding step, must be zeroed too; it is important before the
// next encoding begins, because it works as a cyclic buffer and
// must start empty - with zero
last_encoded_value = 0;
}
protected:
// number of the octet in the incomplete quantum, which has been
// processed the last time; 0 means that the previous quantum was
// complete 3 octets, 1 that just one octet was avalable and 2 that
// two octets were available
unsigned char triplet_index;
// the value made of the previously shifted and or-ed octets which
// was not completely split to 6-bit codes, because the last quantum
// did not stop on the boundary of three octets
Value last_encoded_value;
// encoding of an input chunk needs to read and update the state
template <
typename InputIterator,
typename OutputIterator,
typename State
>
friend OutputIterator encode(InputIterator begin,
InputIterator end,
OutputIterator output,
State & rest);
// finishing the encoding needs to read and clear the state
template <
typename OutputIterator,
typename State
>
friend OutputIterator encode_rest(OutputIterator output,
State & rest);
};
// Encodes an input sequence to BASE64 writing it to the output iterator
// and stopping if the last input tree-octet quantum was not complete, in
// which case it stores the state for the later continuation, when another
// input chunk is ready for the encoding. The encoding must be finished
// by calling the encode_rest after processing the last chunk.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result;
// std::back_insert_iterator<std::basic_string<Char> > appender(result);
// base64::state<unsigned char> rest;
// base64::encode(buffer.begin(), buffer.end(), appender, rest);
// ...
// base64::encode_rest(appender, rest);
template <
typename InputIterator,
typename OutputIterator,
typename State
>
OutputIterator encode(InputIterator begin,
InputIterator end,
OutputIterator output,
State & rest) {
typedef typename iterator_value<InputIterator>::type value_type;
// continue with the rest of the last chunk - 2 or 4 bits which
// are already shifted to the left and need to be or-ed with the
// continuing data up to the target 6 bits
value_type encoded_value = rest.last_encoded_value;
// if the previous chunk stopped at encoding the first (1) or the second
// (2) octet of the three-byte quantum, jump to the right place,
// otherwise start the loop with an empty encoded value buffer
switch (rest.triplet_index) {
// this loop processes the input sequence of bit-octets by bits,
// shifting the current_value (used as a cyclic buffer) left and
// or-ing next bits there, while pulling the bit-sextets from the
// high word of the current_value
for (value_type current_value;;) {
case 0:
// if the input sequence is empty or reached its end at the
// 3-byte boundary, finish with an empty encoding state
if (begin == end) {
rest.triplet_index = 0;
// the last encoded value is not interesting - it would not
// be used, because processing of the next chunk will start
// at the 3-byte boundary
rest.last_encoded_value = 0;
return output;
}
// read the first octet from the current triplet
current_value = *begin++;
// use just the upper 6 bits to encode it to the target alphabet
encoded_value = (current_value & 0xfc) >> 2;
*output++ = detail::encode_value(encoded_value);
// shift the remaining two bits up to make place for the upoming
// part of the next octet
encoded_value = (current_value & 0x03) << 4;
case 1:
// if the input sequence reached its end after the first octet
// from the quantum triplet, store the encoding state and finish
if (begin == end) {
rest.triplet_index = 1;
rest.last_encoded_value = encoded_value;
return output;
}
// read the second first octet from the current triplet
current_value = *begin++;
// combine the upper four bits (as the lower part) with the
// previous two bits to encode it to the target alphabet
encoded_value |= (current_value & 0xf0) >> 4;
*output++ = detail::encode_value(encoded_value);
// shift the remaining four bits up to make place for the upoming
// part of the next octet
encoded_value = (current_value & 0x0f) << 2;
case 2:
// if the input sequence reached its end after the second octet
// from the quantum triplet, store the encoding state and finish
if (begin == end) {
rest.triplet_index = 2;
rest.last_encoded_value = encoded_value;
return output;
}
// read the third octet from the current triplet
current_value = *begin++;
// combine the upper two bits (as the lower part) with the
// previous four bits to encode it to the target alphabet
encoded_value |= (current_value & 0xc0) >> 6;
*output++ = detail::encode_value(encoded_value);
// encode the remaining 6 bits to the target alphabet
encoded_value = current_value & 0x3f;
*output++ = detail::encode_value(encoded_value);
}
}
return output;
}
// Finishes encoding of the previously processed chunks. If their total
// byte-length was divisible by three, nothing is needed, if not, the last
// quantum will be encoded as if padded with zeroes, which will be indicated
// by appending '=' characters to the output. This method must be always
// used at the end of encoding, if the previous chunks were encoded by the
// method overload accepting the encoding state.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result;
// std::back_insert_iterator<std::basic_string<Char> > appender(result);
// base64::state<unsigned char> rest;
// base64::encode(buffer.begin(), buffer.end(), appender, rest);
// ...
// base64::encode_rest(appender, rest);
template <
typename OutputIterator,
typename State
>
OutputIterator encode_rest(OutputIterator output,
State & rest) {
if (!rest.empty()) {
// process the last part of the trailing octet (either 4 or 2 bits)
// as if the input was padded with zeros - without or-ing the next
// input value to it; it has been already shifted to the left
*output++ = detail::encode_value(rest.last_encoded_value);
// at least one padding '=' will be always needed - at least two
// bits are missing in the finally encoded 6-bit value
*output++ = '=';
// if the last octet was the first in the triplet (the index was
// 1), four bits are missing in the finally encoded 6-bit value;
// another '=' character is needed for the another two bits
if (rest.triplet_index < 2)
*output++ = '=';
// clear the state all the time to make sure that another call to
// the encode_rest would not cause damage; the last encoded value,
// which may have been left there, must be zeroed too; it is
// important before the next encoding begins, because it works as
// a cyclic buffer and must start empty - with zero
rest.clear();
}
return output;
}
// Encodes a part of an input sequence specified by the pair of begin and
// end iterators.to BASE64 writing it to the output iterator. If its total
// byte-length was not divisible by three, the output will be padded by the
// '=' characters. If you encode an input consisting of mutiple chunks,
// use the method overload maintaining the encoding state.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result;
// base64::encode(buffer.begin(), buffer.end(), std::back_inserter(result));
template <
typename InputIterator,
typename OutputIterator
>
OutputIterator encode(InputIterator begin,
InputIterator end,
OutputIterator output) {
state<typename iterator_value<InputIterator>::type> rest;
output = encode(begin, end, output, rest);
return encode_rest(output, rest);
}
// Encodes an entire input sequence to BASE64, which either supports begin()
// and end() methods returning boundaries of the sequence or the boundaries
// can be computed by the Boost::Range, writing it to the output iterator
// and stopping if the last input tree-octet quantum was not complete, in
// which case it stores the state for the later continuation, when another
// input chunk is ready for the encoding. The encoding must be finished
// by calling the encode_rest after processing the last chunk.
//
// Warning: Buffers identified by C-pointers are processed including their
// termination character, if they have any. This is unexpected at least
// for the storing literals, which have a specialization here to avoid it.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result;
// std::back_insert_iterator<std::basic_string<Char> > appender(result);
// base64::state<unsigned char> rest;
// base64::encode(buffer, appender, rest);
// ...
// base64::encode_rest(appender, rest);
template <
typename InputRange,
typename OutputIterator,
typename State
>
OutputIterator encode(InputRange const & input,
OutputIterator output,
State & rest) {
return encode(boost::begin(input), boost::end(input), output, rest);
}
// Encodes an entire string literal to BASE64, writing it to the output
// iterator and stopping if the last input tree-octet quantum was not
// complete, in which case it stores the state for the later continuation,
// when another input chunk is ready for the encoding. The encoding must
// be finished by calling the encode_rest after processing the last chunk.
//
// The string literal is encoded without processing its terminating zero
// character, which is the usual expectation.
//
// std::basic_string<Char> result;
// std::back_insert_iterator<std::basic_string<Char> > appender(result);
// base64::state<char> rest;
// base64::encode("ab", appender, rest);
// ...
// base64::encode_rest(appender, rest);
template <typename OutputIterator>
OutputIterator encode(char const * value,
OutputIterator output,
state<char> & rest) {
return encode(value, value + strlen(value), output, rest);
}
// Encodes an entire input sequence to BASE64 writing it to the output
// iterator, which either supports begin() and end() methods returning
// boundaries of the sequence or the boundaries can be computed by the
// Boost::Range. If its total byte-length was not divisible by three,
// the output will be padded by the '=' characters. If you encode an
// input consisting of mutiple chunks, use the method overload maintaining
// the encoding state.
//
// Warning: Buffers identified by C-pointers are processed including their
// termination character, if they have any. This is unexpected at least
// for the storing literals, which have a specialization here to avoid it.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result;
// base64::encode(buffer, std::back_inserter(result));
template <
typename InputRange,
typename OutputIterator
>
OutputIterator encode(InputRange const & value,
OutputIterator output) {
return encode(boost::begin(value), boost::end(value), output);
}
// Encodes an entire string literal to BASE64 writing it to the output
// iterator. If its total length (without the trailing zero) was not
// divisible by three, the output will be padded by the '=' characters.
// If you encode an input consisting of mutiple chunks, use the method
// overload maintaining the encoding state.
//
// The string literal is encoded without processing its terminating zero
// character, which is the usual expectation.
//
// std::basic_string<Char> result;
// base64::encode("ab", std::back_inserter(result));
template <typename OutputIterator>
OutputIterator encode(char const * value,
OutputIterator output) {
return encode(value, value + strlen(value), output);
}
// Encodes an entire input sequence to BASE64 returning the result as
// string, which either supports begin() and end() methods returning
// boundaries of the sequence or the boundaries can be computed by the
// Boost::Range. If its total byte-length was not divisible by three,
// the output will be padded by the '=' characters. If you encode an
// input consisting of mutiple chunks, use other method maintaining
// the encoding state writing to an output iterator.
//
// Warning: Buffers identified by C-pointers are processed including their
// termination character, if they have any. This is unexpected at least
// for the storing literals, which have a specialization here to avoid it.
//
// std::vector<unsigned char> buffer = ...;
// std::basic_string<Char> result = base64::encode<Char>(buffer);
template <
typename Char,
typename InputRange
>
std::basic_string<Char> encode(InputRange const & value) {
std::basic_string<Char> result;
encode(value, std::back_inserter(result));
return result;
}
// Encodes an entire string literal to BASE64 returning the result as
// string. If its total byte-length was not divisible by three, the
// output will be padded by the '=' characters. If you encode an
// input consisting of mutiple chunks, use other method maintaining
// the encoding state writing to an output iterator.
//
// The string literal is encoded without processing its terminating zero
// character, which is the usual expectation.
//
// std::basic_string<Char> result = base64::encode<Char>("ab");
template <typename Char>
std::basic_string<Char> encode(char const * value) {
std::basic_string<Char> result;
encode(value, std::back_inserter(result));
return result;
}
// The function overloads for string literals encode the input without
// the terminating zero, which is usually expected, because the trailing
// zero byte is not considered a part of the string value; the overloads
// for an input range would wrap the string literal by Boost.Range and
// encode the full memory occupated by the string literal - including the
// unwanted last zero byte.
} // namespace base64
} // namespace utils
} // namespace network
} // namespace boost
#endif // BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP