Line data Source code
1 : /* data-identify.c - Try to identify the data
2 : Copyright (C) 2013, 2016 g10 Code GmbH
3 :
4 : This file is part of GPGME.
5 :
6 : GPGME is free software; you can redistribute it and/or modify it
7 : under the terms of the GNU Lesser General Public License as
8 : published by the Free Software Foundation; either version 2.1 of
9 : the License, or (at your option) any later version.
10 :
11 : GPGME is distributed in the hope that it will be useful, but
12 : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : Lesser General Public License for more details.
15 :
16 : You should have received a copy of the GNU Lesser General Public
17 : License along with this program; if not, see <https://www.gnu.org/licenses/>.
18 : */
19 :
20 : #if HAVE_CONFIG_H
21 : # include <config.h>
22 : #endif
23 :
24 : #include <stdlib.h>
25 : #include <string.h>
26 :
27 : #include "gpgme.h"
28 : #include "data.h"
29 : #include "util.h"
30 : #include "parsetlv.h"
31 :
32 :
33 : /* The size of the sample data we take for detection. */
34 : #define SAMPLE_SIZE 2048
35 :
36 :
37 : /* OpenPGP packet types. */
38 : enum
39 : {
40 : PKT_NONE = 0,
41 : PKT_PUBKEY_ENC = 1, /* Public key encrypted packet. */
42 : PKT_SIGNATURE = 2, /* Secret key encrypted packet. */
43 : PKT_SYMKEY_ENC = 3, /* Session key packet. */
44 : PKT_ONEPASS_SIG = 4, /* One pass sig packet. */
45 : PKT_SECRET_KEY = 5, /* Secret key. */
46 : PKT_PUBLIC_KEY = 6, /* Public key. */
47 : PKT_SECRET_SUBKEY = 7, /* Secret subkey. */
48 : PKT_COMPRESSED = 8, /* Compressed data packet. */
49 : PKT_ENCRYPTED = 9, /* Conventional encrypted data. */
50 : PKT_MARKER = 10, /* Marker packet. */
51 : PKT_PLAINTEXT = 11, /* Literal data packet. */
52 : PKT_RING_TRUST = 12, /* Keyring trust packet. */
53 : PKT_USER_ID = 13, /* User id packet. */
54 : PKT_PUBLIC_SUBKEY = 14, /* Public subkey. */
55 : PKT_OLD_COMMENT = 16, /* Comment packet from an OpenPGP draft. */
56 : PKT_ATTRIBUTE = 17, /* PGP's attribute packet. */
57 : PKT_ENCRYPTED_MDC = 18, /* Integrity protected encrypted data. */
58 : PKT_MDC = 19, /* Manipulation detection code packet. */
59 : };
60 :
61 :
62 : static inline unsigned long
63 0 : buf32_to_ulong (const void *buffer)
64 : {
65 0 : const unsigned char *p = buffer;
66 :
67 0 : return (((unsigned long)p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
68 : }
69 :
70 :
71 : /* Parse the next openpgp packet. This function assumes a valid
72 : * OpenPGP packet at the address pointed to by BUFPTR which has a
73 : * maximum length as stored at BUFLEN. Return the header information
74 : * of that packet and advance the pointer stored at BUFPTR to the next
75 : * packet; also adjust the length stored at BUFLEN to match the
76 : * remaining bytes. If there are no more packets, store NULL at
77 : * BUFPTR. Return an non-zero error code on failure or the following
78 : * data on success:
79 : *
80 : * R_PKTTYPE = The packet type.
81 : * R_NTOTAL = The total number of bytes of this packet
82 : *
83 : * If GPG_ERR_TRUNCATED is returned, a packet type is anyway stored at
84 : * R_PKTTYPE but R_NOTAL won't have a usable value,
85 : */
86 : static gpg_error_t
87 0 : next_openpgp_packet (unsigned char const **bufptr, size_t *buflen,
88 : int *r_pkttype, size_t *r_ntotal)
89 : {
90 0 : const unsigned char *buf = *bufptr;
91 0 : size_t len = *buflen;
92 : int c, ctb, pkttype;
93 : unsigned long pktlen;
94 :
95 0 : if (!len)
96 0 : return gpg_error (GPG_ERR_NO_DATA);
97 :
98 : /* First some blacklisting. */
99 0 : if (len >= 4 && !memcmp (buf, "\x89PNG", 4))
100 0 : return gpg_error (GPG_ERR_INV_PACKET); /* This is a PNG file. */
101 :
102 : /* Start parsing. */
103 0 : ctb = *buf++; len--;
104 0 : if ( !(ctb & 0x80) )
105 0 : return gpg_error (GPG_ERR_INV_PACKET); /* Invalid CTB. */
106 :
107 0 : if ((ctb & 0x40)) /* New style (OpenPGP) CTB. */
108 : {
109 0 : pkttype = (ctb & 0x3f);
110 0 : if (!len)
111 0 : return gpg_error (GPG_ERR_INV_PACKET); /* No 1st length byte. */
112 0 : c = *buf++; len--;
113 0 : if ( c < 192 )
114 0 : pktlen = c;
115 0 : else if ( c < 224 )
116 : {
117 0 : pktlen = (c - 192) * 256;
118 0 : if (!len)
119 0 : return gpg_error (GPG_ERR_INV_PACKET); /* No 2nd length byte. */
120 0 : c = *buf++; len--;
121 0 : pktlen += c + 192;
122 : }
123 0 : else if (c == 255)
124 : {
125 0 : if (len < 4)
126 0 : return gpg_error (GPG_ERR_INV_PACKET); /* No length bytes. */
127 0 : pktlen = buf32_to_ulong (buf);
128 0 : buf += 4;
129 0 : len -= 4;
130 : }
131 : else /* Partial length encoding. */
132 : {
133 0 : pktlen = 0;
134 : }
135 : }
136 : else /* Old style CTB. */
137 : {
138 : int lenbytes;
139 :
140 0 : pktlen = 0;
141 0 : pkttype = (ctb>>2)&0xf;
142 0 : lenbytes = ((ctb&3)==3)? 0 : (1<<(ctb & 3));
143 0 : if (len < lenbytes)
144 0 : return gpg_error (GPG_ERR_INV_PACKET); /* Not enough length bytes. */
145 0 : for (; lenbytes; lenbytes--)
146 : {
147 0 : pktlen <<= 8;
148 0 : pktlen |= *buf++; len--;
149 : }
150 : }
151 :
152 : /* Do some basic sanity check. */
153 0 : switch (pkttype)
154 : {
155 : case PKT_PUBKEY_ENC:
156 : case PKT_SIGNATURE:
157 : case PKT_SYMKEY_ENC:
158 : case PKT_ONEPASS_SIG:
159 : case PKT_SECRET_KEY:
160 : case PKT_PUBLIC_KEY:
161 : case PKT_SECRET_SUBKEY:
162 : case PKT_COMPRESSED:
163 : case PKT_ENCRYPTED:
164 : case PKT_MARKER:
165 : case PKT_PLAINTEXT:
166 : case PKT_RING_TRUST:
167 : case PKT_USER_ID:
168 : case PKT_PUBLIC_SUBKEY:
169 : case PKT_OLD_COMMENT:
170 : case PKT_ATTRIBUTE:
171 : case PKT_ENCRYPTED_MDC:
172 : case PKT_MDC:
173 0 : break; /* Okay these are allowed packets. */
174 : default:
175 0 : return gpg_error (GPG_ERR_UNEXPECTED);
176 : }
177 :
178 0 : if (pktlen > len)
179 : {
180 : /* Packet length header too long. This is possible because we
181 : * may have only a truncated image. */
182 0 : *r_pkttype = pkttype;
183 0 : *r_ntotal = 0;
184 0 : *bufptr = NULL;
185 0 : return gpg_error (GPG_ERR_TRUNCATED);
186 : }
187 :
188 0 : *r_pkttype = pkttype;
189 0 : *r_ntotal = (buf - *bufptr) + pktlen;
190 :
191 0 : *bufptr = buf + pktlen;
192 0 : *buflen = len - pktlen;
193 0 : if (!*buflen)
194 0 : *bufptr = NULL;
195 :
196 0 : return 0;
197 : }
198 :
199 :
200 : /* Detection of PGP binary data. This function parses an OpenPGP
201 : * message. This parser is robust enough to work on a truncated
202 : * version. Returns a GPGME_DATA_TYPE_. */
203 : static gpgme_data_type_t
204 0 : pgp_binary_detection (const void *image_arg, size_t imagelen)
205 : {
206 0 : gpg_error_t err = 0;
207 0 : const unsigned char *image = image_arg;
208 : size_t n;
209 : int pkttype;
210 0 : int anypacket = 0;
211 0 : int allsignatures = 0;
212 :
213 0 : while (!err && image)
214 : {
215 0 : err = next_openpgp_packet (&image, &imagelen, &pkttype, &n);
216 0 : if (gpg_err_code (err) == GPG_ERR_TRUNCATED)
217 : ;
218 0 : else if (err)
219 0 : break;
220 :
221 : /* Skip all leading marker packets. */
222 0 : if (!anypacket && pkttype == PKT_MARKER)
223 0 : continue;
224 :
225 0 : if (pkttype == PKT_SIGNATURE)
226 : {
227 0 : if (!anypacket)
228 0 : allsignatures = 1;
229 : }
230 : else
231 0 : allsignatures = 0;
232 :
233 0 : switch (pkttype)
234 : {
235 : case PKT_SIGNATURE:
236 0 : break; /* We decide later. */
237 :
238 : case PKT_PLAINTEXT:
239 : /* Old style signature format: {sig}+,plaintext */
240 0 : if (allsignatures)
241 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
242 0 : break;
243 :
244 : case PKT_ONEPASS_SIG:
245 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
246 :
247 : case PKT_SECRET_KEY:
248 : case PKT_PUBLIC_KEY:
249 0 : return GPGME_DATA_TYPE_PGP_KEY;
250 :
251 : case PKT_SECRET_SUBKEY:
252 : case PKT_PUBLIC_SUBKEY:
253 0 : return GPGME_DATA_TYPE_PGP_OTHER;
254 : case PKT_PUBKEY_ENC:
255 : case PKT_SYMKEY_ENC:
256 0 : return GPGME_DATA_TYPE_PGP_ENCRYPTED;
257 :
258 : case PKT_COMPRESSED:
259 : /* If this is the first packet we assume that that a signed
260 : * packet follows. We do not want to uncompress it here due
261 : * to the need of a lot of code and the potentail DoS. */
262 0 : if (!anypacket)
263 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
264 0 : return GPGME_DATA_TYPE_PGP_OTHER;
265 :
266 : default:
267 0 : return GPGME_DATA_TYPE_PGP_OTHER;
268 : }
269 0 : anypacket = 1;
270 : }
271 :
272 0 : if (allsignatures)
273 0 : return GPGME_DATA_TYPE_PGP_SIGNATURE;
274 :
275 0 : return GPGME_DATA_TYPE_UNKNOWN;
276 : }
277 :
278 :
279 : /* This is probably an armored "PGP MESSAGE" which can encode
280 : * different PGP data types. STRING is modified after a call to this
281 : * function. */
282 : static gpgme_data_type_t
283 0 : inspect_pgp_message (char *string)
284 : {
285 : struct b64state state;
286 : size_t nbytes;
287 :
288 0 : if (_gpgme_b64dec_start (&state, ""))
289 0 : return GPGME_DATA_TYPE_INVALID; /* oops */
290 :
291 0 : if (_gpgme_b64dec_proc (&state, string, strlen (string), &nbytes))
292 : {
293 0 : _gpgme_b64dec_finish (&state);
294 0 : return GPGME_DATA_TYPE_UNKNOWN; /* bad encoding etc. */
295 : }
296 0 : _gpgme_b64dec_finish (&state);
297 0 : string[nbytes] = 0; /* Better append a Nul. */
298 :
299 0 : return pgp_binary_detection (string, nbytes);
300 : }
301 :
302 :
303 : /* Note that DATA may be binary but a final nul is required so that
304 : string operations will find a terminator.
305 :
306 : Returns: GPGME_DATA_TYPE_xxxx */
307 : static gpgme_data_type_t
308 0 : basic_detection (char *data, size_t datalen)
309 : {
310 : tlvinfo_t ti;
311 : const char *s;
312 : size_t n;
313 0 : int maybe_p12 = 0;
314 :
315 0 : if (datalen < 24) /* Object is probably too short for detection. */
316 0 : return GPGME_DATA_TYPE_UNKNOWN;
317 :
318 : /* This is a common example of a CMS object - it is obvious that we
319 : only need to read a few bytes to get to the OID:
320 : 30 82 0B 59 06 09 2A 86 48 86 F7 0D 01 07 02 A0 82 0B 4A 30 82 0B 46 02
321 : ----------- ++++++++++++++++++++++++++++++++
322 : SEQUENCE OID (signedData)
323 : (2 byte len)
324 :
325 : A PKCS#12 message is:
326 :
327 : 30 82 08 59 02 01 03 30 82 08 1F 06 09 2A 86 48 86 F7 0D 01 07 01 A0 82
328 : ----------- ++++++++ ----------- ++++++++++++++++++++++++++++++++
329 : SEQUENCE INTEGER SEQUENCE OID (data)
330 :
331 : A X.509 certificate is:
332 :
333 : 30 82 05 B8 30 82 04 A0 A0 03 02 01 02 02 07 15 46 A0 BF 30 07 39 30 0D
334 : ----------- +++++++++++ ----- ++++++++ --------------------------
335 : SEQUENCE SEQUENCE [0] INTEGER INTEGER SEQU
336 : (tbs) (version) (s/n) (Algo)
337 :
338 : Thus we need to read at least 22 bytes, we add 2 bytes to cope with
339 : length headers stored with 4 bytes.
340 : */
341 :
342 :
343 0 : s = data;
344 0 : n = datalen;
345 :
346 0 : if (parse_tlv (&s, &n, &ti))
347 0 : goto try_pgp; /* Not properly BER encoded. */
348 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
349 0 : && ti.is_cons))
350 : goto try_pgp; /* A CMS object always starts with a sequence. */
351 :
352 0 : if (parse_tlv (&s, &n, &ti))
353 0 : goto try_pgp; /* Not properly BER encoded. */
354 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
355 0 : && ti.is_cons && n >= ti.length)
356 : {
357 0 : if (parse_tlv (&s, &n, &ti))
358 0 : goto try_pgp;
359 0 : if (!(ti.cls == ASN1_CLASS_CONTEXT && ti.tag == 0
360 0 : && ti.is_cons && ti.length == 3 && n >= ti.length))
361 : goto try_pgp;
362 :
363 0 : if (parse_tlv (&s, &n, &ti))
364 0 : goto try_pgp;
365 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
366 0 : && !ti.is_cons && ti.length == 1 && n && (*s == 1 || *s == 2)))
367 : goto try_pgp;
368 0 : s++;
369 0 : n--;
370 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
371 0 : && !ti.is_cons))
372 : goto try_pgp;
373 : /* Because the now following S/N may be larger than the sample
374 : data we have, we stop parsing here and don't check for the
375 : algorithm ID. */
376 0 : return GPGME_DATA_TYPE_X509_CERT;
377 : }
378 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_INTEGER
379 0 : && !ti.is_cons && ti.length == 1 && n && *s == 3)
380 : {
381 0 : maybe_p12 = 1;
382 0 : s++;
383 0 : n--;
384 0 : if (parse_tlv (&s, &n, &ti))
385 0 : goto try_pgp;
386 0 : if (!(ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_SEQUENCE
387 0 : && ti.is_cons))
388 : goto try_pgp;
389 0 : if (parse_tlv (&s, &n, &ti))
390 0 : goto try_pgp;
391 : }
392 0 : if (ti.cls == ASN1_CLASS_UNIVERSAL && ti.tag == ASN1_TAG_OBJECT_ID
393 0 : && !ti.is_cons && ti.length && n >= ti.length)
394 : {
395 0 : if (ti.length == 9)
396 : {
397 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x01", 9))
398 : {
399 : /* Data. */
400 : return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
401 0 : /* */ : GPGME_DATA_TYPE_CMS_OTHER);
402 : }
403 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x02", 9))
404 : {
405 : /* Signed Data. */
406 : return (maybe_p12 ? GPGME_DATA_TYPE_PKCS12
407 0 : /* */ : GPGME_DATA_TYPE_CMS_SIGNED);
408 : }
409 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x03", 9))
410 0 : return GPGME_DATA_TYPE_CMS_ENCRYPTED; /* Enveloped Data. */
411 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x05", 9))
412 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Digested Data. */
413 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x07\x06", 9))
414 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Encrypted Data. */
415 : }
416 0 : else if (ti.length == 11)
417 : {
418 0 : if (!memcmp (s, "\x2A\x86\x48\x86\xF7\x0D\x01\x09\x10\x01\x02", 11))
419 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Auth Data. */
420 : }
421 : }
422 :
423 :
424 : try_pgp:
425 : /* Check whether this might be a non-armored PGP message. We need
426 : to do this before checking for armor lines, so that we don't get
427 : fooled by armored messages inside a signed binary PGP message. */
428 0 : if ((data[0] & 0x80))
429 : {
430 : /* That might be a binary PGP message. At least it is not plain
431 : ASCII. Of course this might be certain lead-in text of
432 : armored CMS messages. However, I am not sure whether this is
433 : at all defined and in any case it is uncommon. Thus we don't
434 : do any further plausibility checks but stupidly assume no CMS
435 : armored data will follow. */
436 0 : return pgp_binary_detection (data, datalen);
437 : }
438 :
439 : /* Now check whether there are armor lines. */
440 0 : for (s = data; s && *s; s = (*s=='\n')?(s+1):((s=strchr (s,'\n'))?(s+1):s))
441 : {
442 0 : if (!strncmp (s, "-----BEGIN ", 11))
443 : {
444 0 : if (!strncmp (s+11, "SIGNED ", 7))
445 0 : return GPGME_DATA_TYPE_CMS_SIGNED;
446 0 : if (!strncmp (s+11, "ENCRYPTED ", 10))
447 0 : return GPGME_DATA_TYPE_CMS_ENCRYPTED;
448 0 : if (!strncmp (s+11, "PGP ", 4))
449 : {
450 0 : if (!strncmp (s+15, "SIGNATURE", 9))
451 0 : return GPGME_DATA_TYPE_PGP_SIGNATURE;
452 0 : if (!strncmp (s+15, "SIGNED MESSAGE", 14))
453 0 : return GPGME_DATA_TYPE_PGP_SIGNED;
454 0 : if (!strncmp (s+15, "PUBLIC KEY BLOCK", 16))
455 0 : return GPGME_DATA_TYPE_PGP_KEY;
456 0 : if (!strncmp (s+15, "PRIVATE KEY BLOCK", 17))
457 0 : return GPGME_DATA_TYPE_PGP_KEY;
458 0 : if (!strncmp (s+15, "SECRET KEY BLOCK", 16))
459 0 : return GPGME_DATA_TYPE_PGP_KEY;
460 0 : if (!strncmp (s+15, "ARMORED FILE", 12))
461 0 : return GPGME_DATA_TYPE_UNKNOWN;
462 :
463 0 : return inspect_pgp_message (data);
464 : }
465 0 : if (!strncmp (s+11, "CERTIFICATE", 11))
466 0 : return GPGME_DATA_TYPE_X509_CERT;
467 0 : if (!strncmp (s+11, "PKCS12", 6))
468 0 : return GPGME_DATA_TYPE_PKCS12;
469 0 : return GPGME_DATA_TYPE_CMS_OTHER; /* Not PGP, thus we assume CMS. */
470 : }
471 : }
472 :
473 0 : return GPGME_DATA_TYPE_UNKNOWN;
474 : }
475 :
476 :
477 : /* Try to detect the type of the data. Note that this function works
478 : only on seekable data objects. The function tries to reset the
479 : file pointer but there is no guarantee that it will work.
480 :
481 : FIXME: We may want to add internal buffering so that this function
482 : can be implemented for allmost all kind of data objects.
483 : */
484 : gpgme_data_type_t
485 0 : gpgme_data_identify (gpgme_data_t dh, int reserved)
486 : {
487 : gpgme_data_type_t result;
488 : char *sample;
489 : int n;
490 : gpgme_off_t off;
491 :
492 : (void)reserved;
493 :
494 : /* Check whether we can seek the data object. */
495 0 : off = gpgme_data_seek (dh, 0, SEEK_CUR);
496 0 : if (off == (gpgme_off_t)(-1))
497 0 : return GPGME_DATA_TYPE_INVALID;
498 :
499 : /* Allocate a buffer and read the data. */
500 0 : sample = malloc (SAMPLE_SIZE);
501 0 : if (!sample)
502 0 : return GPGME_DATA_TYPE_INVALID; /* Ooops. */
503 0 : n = gpgme_data_read (dh, sample, SAMPLE_SIZE - 1);
504 0 : if (n < 0)
505 : {
506 0 : free (sample);
507 0 : return GPGME_DATA_TYPE_INVALID; /* Ooops. */
508 : }
509 0 : sample[n] = 0; /* (Required for our string functions.) */
510 :
511 0 : result = basic_detection (sample, n);
512 0 : free (sample);
513 0 : gpgme_data_seek (dh, off, SEEK_SET);
514 :
515 0 : return result;
516 : }
|