1 //this file is part of MimeTools (plugin for Notepad++) 2 //Copyright (C)2019 Don HO <don.h@free.fr> 3 // 4 // 5 // Enhance Base64 features, and rewrite Base64 encode/decode implementation 6 // Copyright 2019 by Paul Nankervis <paulnank@hotmail.com> 7 // 8 // 9 //This program is free software; you can redistribute it and/or 10 //modify it under the terms of the GNU General Public License 11 //as published by the Free Software Foundation; either 12 //version 2 of the License, or (at your option) any later version. 13 // 14 //This program is distributed in the hope that it will be useful, 15 //but WITHOUT ANY WARRANTY; without even the implied warranty of 16 //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 //GNU General Public License for more details. 18 // 19 //You should have received a copy of the GNU General Public License 20 //along with this program; if not, write to the Free Software 21 //Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 23 /** 24 * Base64 encoding decoding - where 8 bit ascii is re-represented using just 64 ascii characters (plus optional padding '='). 25 * 26 * This code includes options to encode to base64 in multiple ways. For example the text lines:- 27 * 28 * If you can keep your head when all about you 29 * Are losing theirs and blaming it on you; 30 * 31 * Using "Encode with Unix EOL" would produce a single base64 string with line breaks after each 64 characters:- 32 * 33 * SWYgeW91IGNhbiBrZWVwIHlvdXIgaGVhZCB3aGVuIGFsbCBhYm91dCB5b3UNCkFy 34 * ZSBsb3NpbmcgdGhlaXJzIGFuZCBibGFtaW5nIGl0IG9uIHlvdTs= 35 * 36 * That would be decoded using a single base64 decode which ignored whitespace characters (the line breaks). 37 * 38 * Alternatively the same lines could be encoded using a "by line" option to encode each line of input as 39 * its own separate base64 string:- 40 * 41 * SWYgeW91IGNhbiBrZWVwIHlvdXIgaGVhZCB3aGVuIGFsbCBhYm91dCB5b3U 42 * QXJlIGxvc2luZyB0aGVpcnMgYW5kIGJsYW1pbmcgaXQgb24geW91Ow 43 * 44 * Each of these output lines could be decoded separately, or multiple lines decoded using "reset on whitespace" 45 * to cause base64 decoding to restart on each line 46 */ 47 module npp_mimetools.b64; 48 49 50 enum base64CharSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 51 52 static immutable int[] base64CharMap = 53 [ 54 // base64 values or: -1 for illegal character, -2 to ignore character, and -3 for pad ('=') 55 -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, // <tab> <lf> & <cr> are ignored 56 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 57 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // <space> is ignored 58 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -3, -1, -1, // '=' is the pad character 59 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 60 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 61 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 62 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, 63 ]; 64 65 /** 66 * base64Encode simply converts ascii to base64 with appropriate wrapping and padding. Encoding is done by loading 67 * three ascii characters at a time into a bitField, and then extracting them as four base64 values. 68 * returnString is assumed to be large enough to contain the result (which is typically 4 / 3 the input size 69 * plus line breaks), and the function return is the length of the result 70 * wrapLength sets the length at which to wrap the encoded test at (not valid with byLineFlag) 71 * padFlag controls whether the one or two '=' pad characters are included at the end of encoding 72 * byLineFlag causes each input line to be encoded as a separate base64 string 73 */ 74 pure nothrow @safe @nogc 75 int base64Encode(ref char[] resultString, const char[] asciiString, size_t asciiStringLength, size_t wrapLength, bool padFlag, bool byLineFlag) 76 77 do 78 { 79 // input string index 80 size_t index; 81 82 // current line length 83 size_t lineLength = 0; 84 85 // result string length 86 int resultLength = 0; 87 88 // assembled bit field (up to 3 ascii characters at a time) 89 int bitField; 90 91 // offset into bit field (8 bit input: 16, 8, 0 -> 6 bit output: 18, 12, 6, 0) 92 int bitOffset = -1; 93 94 // end offset index value 95 int endOffset; 96 97 // character value 98 int charValue; 99 100 for (index = 0; index < asciiStringLength; ) { 101 bitField = 0; 102 103 for (bitOffset = 16; (bitOffset >= 0) && (index < asciiStringLength); bitOffset -= 8) { 104 charValue = cast(ubyte)(asciiString[index]); 105 106 if ((byLineFlag) && ((charValue == '\n') || (charValue == '\r'))) { 107 break; 108 } 109 110 index++; 111 bitField |= charValue << bitOffset; 112 } 113 114 // end indicator 115 endOffset = bitOffset + 3; 116 117 for (bitOffset = 18; bitOffset > endOffset; bitOffset -= 6) { 118 if ((wrapLength > 0) && (lineLength++ >= wrapLength) && (!byLineFlag)) { 119 resultString[resultLength++] = '\n'; 120 lineLength = 1; 121 } 122 123 resultString[resultLength++] = .base64CharSet[(bitField >> bitOffset) & 0x3F]; 124 } 125 126 if (byLineFlag) { 127 while ((index < asciiStringLength) && ((asciiString[index] == '\n') || (asciiString[index] == '\r'))) { 128 resultString[resultLength++] = asciiString[index++]; 129 } 130 } 131 } 132 133 if (padFlag && !byLineFlag) { 134 for (; bitOffset >= 0; bitOffset -= 6) { 135 if ((wrapLength > 0) && (lineLength++ >= wrapLength)) { 136 resultString[resultLength++] = '\n'; 137 lineLength = 1; 138 } 139 140 resultString[resultLength++] = '='; 141 } 142 } 143 144 return resultLength; 145 } 146 147 /** 148 * base64Decode converts base64 to ascii. But there are choices about what to do with illegal characters or 149 * malformed strings. In this version there is a strict flag to indicate that the input must be a single 150 * valid base64 string with no illegal characters, no extra padding, and no short segments. Otherwise 151 * there is best effort to decode around illegal characters which ARE preserved in the output. 152 * So "TWFyeQ==.aGFk.YQ.bGl0dGxl.bGFtYg==" decodes to "Mary.had.a.little.lamb" with five seperate 153 * base64 strings decoded, each separated by the illegal character dot. In strict mode the first dot 154 * would trigger a fatal error. Some other implementations choose to ignore illegal characters which 155 * of course has it's own issues. 156 * The four whitespace characters <CR> <LF> <TAB> and <SPACE> are silently ignored unless noWhitespaceFlag 157 * is set. In this case whitespace is treated similar to illegal characters and base64 decoding operates 158 * around the white space. So "TWFyeQ== aGFk YQ bGl0dGxl bGFtYg==" would decode as "Mary had a little lamb". 159 * Decoding is done by loading four base64 characters at a time into a bitField, and then extracting them as 160 * three ascii characters. 161 * returnString is assumed to be large enough to contain the result (which could be the same size as the input), 162 * and the function return is the length of the result, or a negative value in case of an error 163 */ 164 pure nothrow @safe @nogc 165 int base64Decode(ref char[] resultString, const char[] encodedString, size_t encodedStringLength, bool strictFlag, bool whitespaceReset) 166 167 do 168 { 169 // input string index 170 size_t index; 171 172 // result string length 173 int resultLength = 0; 174 175 // assembled bit field (up to 3 ascii characters at a time) 176 int bitField; 177 178 // offset into bit field (6 bit intput: 18, 12, 6, 0 -> 8 bit output: 16, 8, 0) 179 int bitOffset; 180 181 // end offset index value 182 int endOffset; 183 184 // character value 185 int charValue = 0; 186 187 // character index 188 int charIndex = 0; 189 190 // pad characters seen 191 int padLength = 0; 192 193 for (index = 0; index < encodedStringLength; ) { 194 bitField = 0; 195 196 for (bitOffset = 18; (bitOffset >= 0) && (index < encodedStringLength); ) { 197 charValue = cast(ubyte)(encodedString[index++]); 198 charIndex = .base64CharMap[charValue & 0x7F]; 199 200 if (charIndex >= 0) { 201 if ((padLength > 0) && (strictFlag)) { 202 // **ERROR** Data after pad character 203 return -1; 204 } 205 206 bitField |= charIndex << bitOffset; 207 bitOffset -= 6; 208 } else { 209 if (charIndex == -3) { // -3 is Pad character '=' 210 padLength++; 211 212 if ((strictFlag) && (bitOffset > 6)) { 213 // **ERROR** Pad character in wrong place 214 return -2; 215 } 216 } else { // either -1 for illegal character or -2 for whitespace (ignored) 217 if ((charIndex == -1) || (whitespaceReset)) { 218 // Remember it as an illegal character for copy below 219 charIndex = -1; 220 221 // exit loop to deal with illegal character 222 break; 223 } 224 } 225 } 226 } 227 228 if ((strictFlag) && (bitOffset == 12)) { 229 // **ERROR** Single symbol block not valid 230 return -3; 231 } 232 233 // end indicator 234 endOffset = bitOffset + 3; 235 236 for (bitOffset = 16; bitOffset > endOffset; bitOffset -= 8) { 237 resultString[resultLength++] = (bitField >> bitOffset) & 0xFF; 238 } 239 240 if (charIndex == -1) { // Was there an illegal character? 241 if (strictFlag) { 242 // **ERROR** Bad character in input string 243 return -4; 244 } 245 246 resultString[resultLength++] = cast(char)(charValue); 247 } 248 } 249 250 return resultLength; 251 }