henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2004 The WebRTC Project Authors. All rights reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #ifndef WEBRTC_BASE_STRINGENCODE_H_ |
| 12 | #define WEBRTC_BASE_STRINGENCODE_H_ |
| 13 | |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 14 | #include <sstream> |
Peter Thatcher | f7ae127 | 2015-05-15 17:40:45 | [diff] [blame] | 15 | #include <string> |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 16 | #include <vector> |
| 17 | |
andrew@webrtc.org | 4dc8e4e | 2014-09-16 01:03:29 | [diff] [blame] | 18 | #include "webrtc/base/checks.h" |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 19 | |
| 20 | namespace rtc { |
| 21 | |
| 22 | ////////////////////////////////////////////////////////////////////// |
| 23 | // String Encoding Utilities |
| 24 | ////////////////////////////////////////////////////////////////////// |
| 25 | |
| 26 | // Convert an unsigned value to it's utf8 representation. Returns the length |
| 27 | // of the encoded string, or 0 if the encoding is longer than buflen - 1. |
| 28 | size_t utf8_encode(char* buffer, size_t buflen, unsigned long value); |
| 29 | // Decode the utf8 encoded value pointed to by source. Returns the number of |
| 30 | // bytes used by the encoding, or 0 if the encoding is invalid. |
| 31 | size_t utf8_decode(const char* source, size_t srclen, unsigned long* value); |
| 32 | |
| 33 | // Escaping prefixes illegal characters with the escape character. Compact, but |
| 34 | // illegal characters still appear in the string. |
| 35 | size_t escape(char * buffer, size_t buflen, |
| 36 | const char * source, size_t srclen, |
| 37 | const char * illegal, char escape); |
| 38 | // Note: in-place unescaping (buffer == source) is allowed. |
| 39 | size_t unescape(char * buffer, size_t buflen, |
| 40 | const char * source, size_t srclen, |
| 41 | char escape); |
| 42 | |
| 43 | // Encoding replaces illegal characters with the escape character and 2 hex |
| 44 | // chars, so it's a little less compact than escape, but completely removes |
| 45 | // illegal characters. note that hex digits should not be used as illegal |
| 46 | // characters. |
| 47 | size_t encode(char * buffer, size_t buflen, |
| 48 | const char * source, size_t srclen, |
| 49 | const char * illegal, char escape); |
| 50 | // Note: in-place decoding (buffer == source) is allowed. |
| 51 | size_t decode(char * buffer, size_t buflen, |
| 52 | const char * source, size_t srclen, |
| 53 | char escape); |
| 54 | |
| 55 | // Returns a list of characters that may be unsafe for use in the name of a |
| 56 | // file, suitable for passing to the 'illegal' member of escape or encode. |
| 57 | const char* unsafe_filename_characters(); |
| 58 | |
| 59 | // url_encode is an encode operation with a predefined set of illegal characters |
| 60 | // and escape character (for use in URLs, obviously). |
| 61 | size_t url_encode(char * buffer, size_t buflen, |
| 62 | const char * source, size_t srclen); |
| 63 | // Note: in-place decoding (buffer == source) is allowed. |
| 64 | size_t url_decode(char * buffer, size_t buflen, |
| 65 | const char * source, size_t srclen); |
| 66 | |
| 67 | // html_encode prevents data embedded in html from containing markup. |
| 68 | size_t html_encode(char * buffer, size_t buflen, |
| 69 | const char * source, size_t srclen); |
| 70 | // Note: in-place decoding (buffer == source) is allowed. |
| 71 | size_t html_decode(char * buffer, size_t buflen, |
| 72 | const char * source, size_t srclen); |
| 73 | |
| 74 | // xml_encode makes data suitable for inside xml attributes and values. |
| 75 | size_t xml_encode(char * buffer, size_t buflen, |
| 76 | const char * source, size_t srclen); |
| 77 | // Note: in-place decoding (buffer == source) is allowed. |
| 78 | size_t xml_decode(char * buffer, size_t buflen, |
| 79 | const char * source, size_t srclen); |
| 80 | |
| 81 | // Convert an unsigned value from 0 to 15 to the hex character equivalent... |
| 82 | char hex_encode(unsigned char val); |
| 83 | // ...and vice-versa. |
| 84 | bool hex_decode(char ch, unsigned char* val); |
| 85 | |
| 86 | // hex_encode shows the hex representation of binary data in ascii. |
| 87 | size_t hex_encode(char* buffer, size_t buflen, |
| 88 | const char* source, size_t srclen); |
| 89 | |
| 90 | // hex_encode, but separate each byte representation with a delimiter. |
| 91 | // |delimiter| == 0 means no delimiter |
| 92 | // If the buffer is too short, we return 0 |
| 93 | size_t hex_encode_with_delimiter(char* buffer, size_t buflen, |
| 94 | const char* source, size_t srclen, |
| 95 | char delimiter); |
| 96 | |
| 97 | // Helper functions for hex_encode. |
Peter Thatcher | f7ae127 | 2015-05-15 17:40:45 | [diff] [blame] | 98 | std::string hex_encode(const std::string& str); |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 99 | std::string hex_encode(const char* source, size_t srclen); |
| 100 | std::string hex_encode_with_delimiter(const char* source, size_t srclen, |
| 101 | char delimiter); |
| 102 | |
| 103 | // hex_decode converts ascii hex to binary. |
| 104 | size_t hex_decode(char* buffer, size_t buflen, |
| 105 | const char* source, size_t srclen); |
| 106 | |
| 107 | // hex_decode, assuming that there is a delimiter between every byte |
| 108 | // pair. |
| 109 | // |delimiter| == 0 means no delimiter |
| 110 | // If the buffer is too short or the data is invalid, we return 0. |
| 111 | size_t hex_decode_with_delimiter(char* buffer, size_t buflen, |
| 112 | const char* source, size_t srclen, |
| 113 | char delimiter); |
| 114 | |
| 115 | // Helper functions for hex_decode. |
| 116 | size_t hex_decode(char* buffer, size_t buflen, const std::string& source); |
| 117 | size_t hex_decode_with_delimiter(char* buffer, size_t buflen, |
| 118 | const std::string& source, char delimiter); |
| 119 | |
| 120 | // Apply any suitable string transform (including the ones above) to an STL |
| 121 | // string. Stack-allocated temporary space is used for the transformation, |
| 122 | // so value and source may refer to the same string. |
| 123 | typedef size_t (*Transform)(char * buffer, size_t buflen, |
| 124 | const char * source, size_t srclen); |
| 125 | size_t transform(std::string& value, size_t maxlen, const std::string& source, |
| 126 | Transform t); |
| 127 | |
| 128 | // Return the result of applying transform t to source. |
| 129 | std::string s_transform(const std::string& source, Transform t); |
| 130 | |
| 131 | // Convenience wrappers. |
| 132 | inline std::string s_url_encode(const std::string& source) { |
| 133 | return s_transform(source, url_encode); |
| 134 | } |
| 135 | inline std::string s_url_decode(const std::string& source) { |
| 136 | return s_transform(source, url_decode); |
| 137 | } |
| 138 | |
| 139 | // Splits the source string into multiple fields separated by delimiter, |
| 140 | // with duplicates of delimiter creating empty fields. |
| 141 | size_t split(const std::string& source, char delimiter, |
| 142 | std::vector<std::string>* fields); |
| 143 | |
| 144 | // Splits the source string into multiple fields separated by delimiter, |
| 145 | // with duplicates of delimiter ignored. Trailing delimiter ignored. |
| 146 | size_t tokenize(const std::string& source, char delimiter, |
| 147 | std::vector<std::string>* fields); |
| 148 | |
deadbeef | 75bd1dc | 2015-10-06 18:38:28 | [diff] [blame] | 149 | // Tokenize, including the empty tokens. |
| 150 | size_t tokenize_with_empty_tokens(const std::string& source, |
| 151 | char delimiter, |
| 152 | std::vector<std::string>* fields); |
| 153 | |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 154 | // Tokenize and append the tokens to fields. Return the new size of fields. |
| 155 | size_t tokenize_append(const std::string& source, char delimiter, |
| 156 | std::vector<std::string>* fields); |
| 157 | |
| 158 | // Splits the source string into multiple fields separated by delimiter, with |
| 159 | // duplicates of delimiter ignored. Trailing delimiter ignored. A substring in |
| 160 | // between the start_mark and the end_mark is treated as a single field. Return |
| 161 | // the size of fields. For example, if source is "filename |
| 162 | // \"/Library/Application Support/media content.txt\"", delimiter is ' ', and |
| 163 | // the start_mark and end_mark are '"', this method returns two fields: |
| 164 | // "filename" and "/Library/Application Support/media content.txt". |
| 165 | size_t tokenize(const std::string& source, char delimiter, char start_mark, |
| 166 | char end_mark, std::vector<std::string>* fields); |
| 167 | |
Donald Curtis | 048395a | 2015-05-15 16:21:23 | [diff] [blame] | 168 | // Extract the first token from source as separated by delimiter, with |
| 169 | // duplicates of delimiter ignored. Return false if the delimiter could not be |
| 170 | // found, otherwise return true. |
Donald Curtis | 20dfa1d | 2015-05-15 20:14:24 | [diff] [blame] | 171 | bool tokenize_first(const std::string& source, |
| 172 | const char delimiter, |
| 173 | std::string* token, |
| 174 | std::string* rest); |
Donald Curtis | 048395a | 2015-05-15 16:21:23 | [diff] [blame] | 175 | |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 176 | // Safe sprintf to std::string |
| 177 | //void sprintf(std::string& value, size_t maxlen, const char * format, ...) |
| 178 | // PRINTF_FORMAT(3); |
| 179 | |
| 180 | // Convert arbitrary values to/from a string. |
| 181 | |
| 182 | template <class T> |
| 183 | static bool ToString(const T &t, std::string* s) { |
henrikg | 5c075c8 | 2015-09-17 07:24:34 | [diff] [blame] | 184 | RTC_DCHECK(s); |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 185 | std::ostringstream oss; |
| 186 | oss << std::boolalpha << t; |
| 187 | *s = oss.str(); |
| 188 | return !oss.fail(); |
| 189 | } |
| 190 | |
| 191 | template <class T> |
| 192 | static bool FromString(const std::string& s, T* t) { |
henrikg | 5c075c8 | 2015-09-17 07:24:34 | [diff] [blame] | 193 | RTC_DCHECK(t); |
henrike@webrtc.org | 47be73b | 2014-05-13 18:00:26 | [diff] [blame] | 194 | std::istringstream iss(s); |
| 195 | iss >> std::boolalpha >> *t; |
| 196 | return !iss.fail(); |
| 197 | } |
| 198 | |
| 199 | // Inline versions of the string conversion routines. |
| 200 | |
| 201 | template<typename T> |
| 202 | static inline std::string ToString(const T& val) { |
| 203 | std::string str; ToString(val, &str); return str; |
| 204 | } |
| 205 | |
| 206 | template<typename T> |
| 207 | static inline T FromString(const std::string& str) { |
| 208 | T val; FromString(str, &val); return val; |
| 209 | } |
| 210 | |
| 211 | template<typename T> |
| 212 | static inline T FromString(const T& defaultValue, const std::string& str) { |
| 213 | T val(defaultValue); FromString(str, &val); return val; |
| 214 | } |
| 215 | |
| 216 | // simple function to strip out characters which shouldn't be |
| 217 | // used in filenames |
| 218 | char make_char_safe_for_filename(char c); |
| 219 | |
| 220 | ////////////////////////////////////////////////////////////////////// |
| 221 | |
| 222 | } // namespace rtc |
| 223 | |
| 224 | #endif // WEBRTC_BASE_STRINGENCODE_H__ |