npp_mimetools.b64 source code

1 //this file is part of MimeTools (plugin for Notepad++)
2 //Copyright (C)2019 Don HO <don.h@free.fr>
3 //
4 //
5 // Enhance Base64 features, and rewrite Base64 encode/decode implementation
6 // Copyright 2019 by Paul Nankervis <paulnank@hotmail.com>
7 //
8 //
9 //This program is free software; you can redistribute it and/or
10 //modify it under the terms of the GNU General Public License
11 //as published by the Free Software Foundation; either
12 //version 2 of the License, or (at your option) any later version.
13 //
14 //This program is distributed in the hope that it will be useful,
15 //but WITHOUT ANY WARRANTY; without even the implied warranty of
16 //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
17 //GNU General Public License for more details.
18 //
19 //You should have received a copy of the GNU General Public License
20 //along with this program; if not, write to the Free Software
21 //Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 
23 /**
24  * Base64 encoding decoding - where 8 bit ascii is re-represented using just 64 ascii characters (plus optional padding '=').
25  *
26  * This code includes options to encode to base64 in multiple ways. For example the text lines:-
27  *
28  *	If you can keep your head when all about you
29  *	Are losing theirs and blaming it on you;
30  *
31  * Using "Encode with Unix EOL" would produce a single base64 string with line breaks after each 64 characters:-
32  *
33  *	SWYgeW91IGNhbiBrZWVwIHlvdXIgaGVhZCB3aGVuIGFsbCBhYm91dCB5b3UNCkFy
34  *	ZSBsb3NpbmcgdGhlaXJzIGFuZCBibGFtaW5nIGl0IG9uIHlvdTs=
35  *
36  * That would be decoded using a single base64 decode which ignored whitespace characters (the line breaks).
37  *
38  * Alternatively the same lines could be encoded using a "by line" option to encode each line of input as
39  * its own separate base64 string:-
40  *
41  *	SWYgeW91IGNhbiBrZWVwIHlvdXIgaGVhZCB3aGVuIGFsbCBhYm91dCB5b3U
42  *	QXJlIGxvc2luZyB0aGVpcnMgYW5kIGJsYW1pbmcgaXQgb24geW91Ow
43  *
44  * Each of these output lines could be decoded separately, or multiple lines decoded using "reset on whitespace"
45  * to cause base64 decoding to restart on each line
46  */
47 module npp_mimetools.b64;
48 
49 
50 enum base64CharSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
51 
52 static immutable int[] base64CharMap =
53 [
54 	// base64 values or: -1 for illegal character, -2 to ignore character, and -3 for pad ('=')
55 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,	 // <tab> <lf> & <cr> are ignored
56 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57 	-2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,	 // <space> is ignored
58 	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -3, -1, -1,	 // '=' is the pad character
59 	-1,	 0,	 1,	 2,	 3,	 4,	 5,	 6,	 7,	 8,	 9, 10, 11, 12, 13, 14,
60 	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
61 	-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
62 	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
63 ];
64 
65 /**
66  * base64Encode simply converts ascii to base64 with appropriate wrapping and padding. Encoding is done by loading
67  * three ascii characters at a time into a bitField, and then extracting them as four base64 values.
68  * returnString is assumed to be large enough to contain the result (which is typically 4 / 3 the input size
69  * plus line breaks), and the function return is the length of the result
70  * wrapLength sets the length at which to wrap the encoded test at (not valid with byLineFlag)
71  * padFlag controls whether the one or two '=' pad characters are included at the end of encoding
72  * byLineFlag causes each input line to be encoded as a separate base64 string
73  */
74 pure nothrow @safe @nogc
75 int base64Encode(ref char[] resultString, const char[] asciiString, size_t asciiStringLength, size_t wrapLength, bool padFlag, bool byLineFlag)
76 
77 	do
78 	{
79 		// input string index
80 		size_t index;
81 
82 		// current line length
83 		size_t lineLength = 0;
84 
85 		// result string length
86 		int resultLength = 0;
87 
88 		// assembled bit field (up to 3 ascii characters at a time)
89 		int bitField;
90 
91 		// offset into bit field (8 bit input: 16, 8, 0 -> 6 bit output: 18, 12, 6, 0)
92 		int bitOffset = -1;
93 
94 		// end offset index value
95 		int endOffset;
96 
97 		// character value
98 		int charValue;
99 
100 		for (index = 0; index < asciiStringLength; ) {
101 			bitField = 0;
102 
103 			for (bitOffset = 16; (bitOffset >= 0) && (index < asciiStringLength); bitOffset -= 8) {
104 				charValue = cast(ubyte)(asciiString[index]);
105 
106 				if ((byLineFlag) && ((charValue == '\n') || (charValue == '\r'))) {
107 					break;
108 				}
109 
110 				index++;
111 				bitField |= charValue << bitOffset;
112 			}
113 
114 			// end indicator
115 			endOffset = bitOffset + 3;
116 
117 			for (bitOffset = 18; bitOffset > endOffset; bitOffset -= 6) {
118 				if ((wrapLength > 0) && (lineLength++ >= wrapLength) && (!byLineFlag)) {
119 					resultString[resultLength++] = '\n';
120 					lineLength = 1;
121 				}
122 
123 				resultString[resultLength++] = .base64CharSet[(bitField >> bitOffset) & 0x3F];
124 			}
125 
126 			if (byLineFlag) {
127 				while ((index < asciiStringLength) && ((asciiString[index] == '\n') || (asciiString[index] == '\r'))) {
128 					resultString[resultLength++] = asciiString[index++];
129 				}
130 			}
131 		}
132 
133 		if (padFlag && !byLineFlag) {
134 			for (; bitOffset >= 0; bitOffset -= 6) {
135 				if ((wrapLength > 0) && (lineLength++ >= wrapLength)) {
136 					resultString[resultLength++] = '\n';
137 					lineLength = 1;
138 				}
139 
140 				resultString[resultLength++] = '=';
141 			}
142 		}
143 
144 		return resultLength;
145 	}
146 
147 /**
148  * base64Decode converts base64 to ascii. But there are choices about what to do with illegal characters or
149  * malformed strings. In this version there is a strict flag to indicate that the input must be a single
150  * valid base64 string with no illegal characters, no extra padding, and no short segments. Otherwise
151  * there is best effort to decode around illegal characters which ARE preserved in the output.
152  * So  "TWFyeQ==.aGFk.YQ.bGl0dGxl.bGFtYg=="	 decodes to	 "Mary.had.a.little.lamb"  with five seperate
153  * base64 strings decoded, each separated by the illegal character dot. In strict mode the first dot
154  * would trigger a fatal error. Some other implementations choose to ignore illegal characters which
155  * of course has it's own issues.
156  * The four whitespace characters <CR> <LF> <TAB> and <SPACE> are silently ignored unless noWhitespaceFlag
157  * is set. In this case whitespace is treated similar to illegal characters and base64 decoding operates
158  * around the white space. So "TWFyeQ== aGFk YQ bGl0dGxl bGFtYg==" would decode as "Mary had a little lamb".
159  * Decoding is done by loading four base64 characters at a time into a bitField, and then extracting them as
160  * three ascii characters.
161  * returnString is assumed to be large enough to contain the result (which could be the same size as the input),
162  * and the function return is the length of the result, or a negative value in case of an error
163  */
164 pure nothrow @safe @nogc
165 int base64Decode(ref char[] resultString, const char[] encodedString, size_t encodedStringLength, bool strictFlag, bool whitespaceReset)
166 
167 	do
168 	{
169 		// input string index
170 		size_t index;
171 
172 		// result string length
173 		int resultLength = 0;
174 
175 		// assembled bit field (up to 3 ascii characters at a time)
176 		int bitField;
177 
178 		// offset into bit field (6 bit intput: 18, 12, 6, 0 -> 8 bit output: 16, 8, 0)
179 		int bitOffset;
180 
181 		// end offset index value
182 		int endOffset;
183 
184 		// character value
185 		int charValue = 0;
186 
187 		// character index
188 		int charIndex = 0;
189 
190 		// pad characters seen
191 		int padLength = 0;
192 
193 		for (index = 0; index < encodedStringLength; ) {
194 			bitField = 0;
195 
196 			for (bitOffset = 18; (bitOffset >= 0) && (index < encodedStringLength); ) {
197 				charValue = cast(ubyte)(encodedString[index++]);
198 				charIndex = .base64CharMap[charValue & 0x7F];
199 
200 				if (charIndex >= 0) {
201 					if ((padLength > 0) && (strictFlag)) {
202 						// **ERROR** Data after pad character
203 						return -1;
204 					}
205 
206 					bitField |= charIndex << bitOffset;
207 					bitOffset -= 6;
208 				} else {
209 					if (charIndex == -3) { // -3 is Pad character '='
210 						padLength++;
211 
212 						if ((strictFlag) && (bitOffset > 6)) {
213 							// **ERROR** Pad character in wrong place
214 							return -2;
215 						}
216 					} else { // either -1 for illegal character or -2 for whitespace (ignored)
217 						if ((charIndex == -1) || (whitespaceReset)) {
218 							// Remember it as an illegal character for copy below
219 							charIndex = -1;
220 
221 							// exit loop to deal with illegal character
222 							break;
223 						}
224 					}
225 				}
226 			}
227 
228 			if ((strictFlag) && (bitOffset == 12)) {
229 				// **ERROR** Single symbol block not valid
230 				return -3;
231 			}
232 
233 			// end indicator
234 			endOffset = bitOffset + 3;
235 
236 			for (bitOffset = 16; bitOffset > endOffset; bitOffset -= 8) {
237 				resultString[resultLength++] = (bitField >> bitOffset) & 0xFF;
238 			}
239 
240 			if (charIndex == -1) { // Was there an illegal character?
241 				if (strictFlag) {
242 					// **ERROR** Bad character in input string
243 					return -4;
244 				}
245 
246 				resultString[resultLength++] = cast(char)(charValue);
247 			}
248 		}
249 
250 		return resultLength;
251 	}