1 : // Copyright 2014 Google Inc. All Rights Reserved.
2 : //
3 : // Licensed under the Apache License, Version 2.0 (the "License");
4 : // you may not use this file except in compliance with the License.
5 : // You may obtain a copy of the License at
6 : //
7 : // http://www.apache.org/licenses/LICENSE-2.0
8 : //
9 : // Unless required by applicable law or agreed to in writing, software
10 : // distributed under the License is distributed on an "AS IS" BASIS,
11 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 : // See the License for the specific language governing permissions and
13 : // limitations under the License.
14 : //
15 : // Much of this file has been adapted from Chromium (net/http/http_util.cc) and
16 : // Breakpad (common/linux/http_upload.cc).
17 : // See http://www.ietf.org/rfc/rfc2388.txt for a description of the
18 : // multipart/form-data HTTP message type implemented in this file.
19 : #include "syzygy/kasko/internet_helpers.h"
20 :
21 : #include <winhttp.h> // NOLINT
22 :
23 : #include <wchar.h>
24 :
25 : #include "base/logging.h"
26 : #include "base/strings/string_number_conversions.h"
27 : #include "base/strings/string_tokenizer.h"
28 : #include "base/strings/string_util.h"
29 : #include "base/strings/utf_string_conversions.h"
30 :
31 m : namespace kasko {
32 :
33 m : namespace {
34 :
35 : // Returns the index of the closing quote of the string, if any. |start| points
36 : // at the opening quote.
37 m : size_t FindStringEnd(const base::string16& line,
38 m : size_t start,
39 m : base::char16 delim) {
40 m : DCHECK_LT(start, line.length());
41 m : DCHECK_EQ(line[start], delim);
42 m : DCHECK((delim == L'"') || (delim == L'\''));
43 :
44 m : const base::char16 set[] = { delim, L'\\', L'\0' };
45 m : for (size_t end = line.find_first_of(set, start + 1);
46 m : end != base::string16::npos; end = line.find_first_of(set, end + 2)) {
47 m : if (line[end] != L'\\')
48 m : return end;
49 m : }
50 m : return line.length();
51 m : }
52 :
53 m : const base::char16 kHttpLws[] = L" \t";
54 :
55 m : bool IsLWS(base::char16 c) {
56 m : return ::wcschr(kHttpLws, c) != NULL;
57 m : }
58 :
59 m : void TrimLWS(base::string16::const_iterator* begin,
60 m : base::string16::const_iterator* end) {
61 : // Skip leading whitespace.
62 m : while (*begin < *end && IsLWS((*begin)[0]))
63 m : ++(*begin);
64 :
65 : // Skip trailing whitespace.
66 m : while (*begin < *end && IsLWS((*end)[-1]))
67 m : --(*end);
68 m : }
69 :
70 m : } // namespace
71 :
72 m : void ParseContentType(const base::string16& content_type_str,
73 m : base::string16* mime_type,
74 m : base::string16* charset,
75 m : bool* had_charset,
76 m : base::string16* boundary) {
77 m : const base::string16::const_iterator begin = content_type_str.begin();
78 :
79 : // Trim leading and trailing whitespace from type. We include '(' in the
80 : // trailing trim set to catch media-type comments, which are not at all
81 : // standard, but may occur in rare cases.
82 m : size_t type_val = content_type_str.find_first_not_of(kHttpLws);
83 m : type_val = std::min(type_val, content_type_str.length());
84 m : size_t type_end = content_type_str.find_first_of(
85 m : base::string16(kHttpLws) + L";(", type_val);
86 m : if (type_end == base::string16::npos)
87 m : type_end = content_type_str.length();
88 :
89 m : size_t charset_val = 0;
90 m : size_t charset_end = 0;
91 m : bool type_has_charset = false;
92 :
93 : // Iterate over parameters.
94 m : size_t param_start = content_type_str.find_first_of(';', type_end);
95 m : if (param_start != std::string::npos) {
96 m : base::StringTokenizerT<base::string16, base::string16::const_iterator>
97 m : tokenizer(begin + param_start, content_type_str.end(), L";");
98 m : tokenizer.set_quote_chars(L"\"");
99 m : while (tokenizer.GetNext()) {
100 m : base::string16::const_iterator equals_sign =
101 m : std::find(tokenizer.token_begin(), tokenizer.token_end(), L'=');
102 m : if (equals_sign == tokenizer.token_end())
103 m : continue;
104 :
105 m : base::string16::const_iterator param_name_begin = tokenizer.token_begin();
106 m : base::string16::const_iterator param_name_end = equals_sign;
107 m : TrimLWS(¶m_name_begin, ¶m_name_end);
108 :
109 m : base::string16::const_iterator param_value_begin = equals_sign + 1;
110 m : base::string16::const_iterator param_value_end = tokenizer.token_end();
111 m : DCHECK(param_value_begin <= tokenizer.token_end());
112 m : TrimLWS(¶m_value_begin, ¶m_value_end);
113 :
114 m : if (base::LowerCaseEqualsASCII(
115 m : base::StringPiece16(param_name_begin, param_name_end),
116 m : "charset")) {
117 m : charset_val = param_value_begin - begin;
118 m : charset_end = param_value_end - begin;
119 m : type_has_charset = true;
120 m : } else if (base::LowerCaseEqualsASCII(
121 m : base::StringPiece16(param_name_begin, param_name_end),
122 m : "boundary")) {
123 m : if (boundary)
124 m : boundary->assign(param_value_begin, param_value_end);
125 m : }
126 m : }
127 m : }
128 :
129 m : if (type_has_charset) {
130 : // Trim leading and trailing whitespace from charset_val. We include '(' in
131 : // the trailing trim set to catch media-type comments, which are not at all
132 : // standard, but may occur in rare cases.
133 m : charset_val = content_type_str.find_first_not_of(kHttpLws, charset_val);
134 m : charset_val = std::min(charset_val, charset_end);
135 m : base::char16 first_char = content_type_str[charset_val];
136 m : if (first_char == L'"' || first_char == L'\'') {
137 m : charset_end = FindStringEnd(content_type_str, charset_val, first_char);
138 m : ++charset_val;
139 m : DCHECK(charset_end >= charset_val);
140 m : } else {
141 m : charset_end = std::min(content_type_str.find_first_of(
142 m : base::string16(kHttpLws) + L";(", charset_val),
143 m : charset_end);
144 m : }
145 m : }
146 :
147 : // If the server sent "*/*", it is meaningless, so do not store it.
148 : // Also, if type_val is the same as mime_type, then just update the charset
149 : // However, if charset is empty and mime_type hasn't changed, then don't
150 : // wipe-out an existing charset. We also want to reject a mime-type if it does
151 : // not include a slash. Some servers give junk after the charset parameter,
152 : // which may include a comma, so this check makes us a bit more tolerant.
153 m : if (content_type_str.length() != 0 &&
154 m : content_type_str != L"*/*" &&
155 m : content_type_str.find_first_of(L'/') != base::string16::npos) {
156 : // The common case here is that mime_type is empty.
157 m : bool eq = !mime_type->empty() &&
158 m : base::LowerCaseEqualsASCII(
159 m : base::StringPiece16(begin + type_val, begin + type_end),
160 m : base::WideToUTF8(*mime_type).data());
161 m : if (!eq) {
162 m : mime_type->assign(begin + type_val, begin + type_end);
163 m : *mime_type = base::ToLowerASCII(*mime_type);
164 m : }
165 m : if ((!eq && *had_charset) || type_has_charset) {
166 m : *had_charset = true;
167 m : charset->assign(begin + charset_val, begin + charset_end);
168 m : *charset = base::ToLowerASCII(*charset);
169 m : }
170 m : }
171 m : }
172 :
173 m : bool DecomposeUrl(const base::string16& url,
174 m : base::string16* scheme,
175 m : base::string16* host,
176 m : uint16_t* port,
177 m : base::string16* path) {
178 m : DCHECK(scheme);
179 m : DCHECK(host);
180 m : DCHECK(path);
181 :
182 m : wchar_t scheme_buffer[16], host_buffer[256], path_buffer[256];
183 m : URL_COMPONENTS components;
184 m : memset(&components, 0, sizeof(components));
185 m : components.dwStructSize = sizeof(components);
186 m : components.lpszScheme = scheme_buffer;
187 m : components.dwSchemeLength = sizeof(scheme_buffer) / sizeof(scheme_buffer[0]);
188 m : components.lpszHostName = host_buffer;
189 m : components.dwHostNameLength = sizeof(host_buffer) / sizeof(host_buffer[0]);
190 m : components.lpszUrlPath = path_buffer;
191 m : components.dwUrlPathLength = sizeof(path_buffer) / sizeof(path_buffer[0]);
192 m : if (!::WinHttpCrackUrl(url.c_str(), 0, 0, &components))
193 m : return false;
194 m : *scheme = scheme_buffer;
195 m : *host = host_buffer;
196 m : *path = path_buffer;
197 m : *port = components.nPort;
198 m : return true;
199 m : }
200 :
201 m : base::string16 ComposeUrl(const base::string16& host,
202 m : uint16_t port,
203 m : const base::string16& path,
204 m : bool secure) {
205 m : if (secure) {
206 m : if (port == 443)
207 m : return L"https://" + host + path;
208 m : return L"https://" + host + L':' + base::UintToString16(port) + path;
209 m : }
210 m : if (port == 80)
211 m : return L"http://" + host + path;
212 m : return L"http://" + host + L':' + base::UintToString16(port) + path;
213 m : }
214 :
215 m : base::string16 GenerateMultipartHttpRequestBoundary() {
216 : // The boundary has 27 '-' characters followed by 16 hex digits.
217 m : static const base::char16 kBoundaryPrefix[] = L"---------------------------";
218 m : static const size_t kBoundaryLength = 27 + 16;
219 :
220 : // Generate some random numbers to fill out the boundary.
221 m : int r0 = rand();
222 m : int r1 = rand();
223 :
224 : // Add one character for the NULL termination.
225 m : base::char16 temp[kBoundaryLength + 1];
226 m : ::swprintf(temp, sizeof(temp) / sizeof(*temp), L"%s%08X%08X", kBoundaryPrefix,
227 m : r0, r1);
228 :
229 m : return base::string16(temp, kBoundaryLength);
230 m : }
231 :
232 m : base::string16 GenerateMultipartHttpRequestContentTypeHeader(
233 m : const base::string16 boundary) {
234 m : return L"Content-Type: multipart/form-data; boundary=" + boundary;
235 m : }
236 :
237 m : std::string GenerateMultipartHttpRequestBody(
238 m : const std::map<base::string16, base::string16>& parameters,
239 m : const std::string& upload_file,
240 m : const base::string16& file_part_name,
241 m : const base::string16& boundary) {
242 m : DCHECK(!boundary.empty());
243 m : DCHECK(!file_part_name.empty());
244 m : std::string boundary_utf8 = base::WideToUTF8(boundary);
245 :
246 m : std::string request_body;
247 :
248 : // Append each of the parameter pairs as a form-data part.
249 m : for (const auto& entry : parameters) {
250 m : request_body.append("--" + boundary_utf8 + "\r\n");
251 m : request_body.append("Content-Disposition: form-data; name=\"" +
252 m : base::WideToUTF8(entry.first) + "\"\r\n\r\n" +
253 m : base::WideToUTF8(entry.second) + "\r\n");
254 m : }
255 :
256 m : std::string file_part_name_utf8 = base::WideToUTF8(file_part_name);
257 :
258 m : request_body.append("--" + boundary_utf8 + "\r\n");
259 m : request_body.append("Content-Disposition: form-data; "
260 m : "name=\"" + file_part_name_utf8 + "\"; "
261 m : "filename=\"" + file_part_name_utf8 + "\"\r\n");
262 m : request_body.append("Content-Type: application/octet-stream\r\n");
263 m : request_body.append("\r\n");
264 :
265 m : request_body.append(upload_file);
266 m : request_body.append("\r\n");
267 m : request_body.append("--" + boundary_utf8 + "--\r\n");
268 :
269 m : return request_body;
270 m : }
271 :
272 m : } // namespace kasko
|