123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562 |
- /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Copyright by The HDF Group. *
- * All rights reserved. *
- * *
- * This file is part of HDF5. The full HDF5 copyright notice, including *
- * terms governing use, modification, and redistribution, is contained in *
- * the COPYING file, which can be found at the root of the source code *
- * distribution tree, or in https://www.hdfgroup.org/licenses. *
- * If you do not have access to either file, you may request a copy from *
- * help@hdfgroup.org. *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
- /*****************************************************************************
- * Read-Only S3 Virtual File Driver (VFD)
- *
- * This is the header for the S3 Communications module
- *
- * ***NOT A FILE DRIVER***
- *
- * Purpose:
- *
- * - Provide structures and functions related to communicating with
- * Amazon S3 (Simple Storage Service).
- * - Abstract away the REST API (HTTP,
- * networked communications) behind a series of uniform function calls.
- * - Handle AWS4 authentication, if appropriate.
- * - Fail predictably in event of errors.
- * - Eventually, support more S3 operations, such as creating, writing to,
- * and removing Objects remotely.
- *
- * translates:
- * `read(some_file, bytes_offset, bytes_length, &dest_buffer);`
- * to:
- * ```
- * GET myfile HTTP/1.1
- * Host: somewhere.me
- * Range: bytes=4096-5115
- * ```
- * and places received bytes from HTTP response...
- * ```
- * HTTP/1.1 206 Partial-Content
- * Content-Range: 4096-5115/63239
- *
- * <bytes>
- * ```
- * ...in destination buffer.
- *
- * TODO: put documentation in a consistent place and point to it from here.
- *
- * Programmer: Jacob Smith
- * 2017-11-30
- *
- *****************************************************************************/
- #include "H5private.h" /* Generic Functions */
- #ifdef H5_HAVE_ROS3_VFD
- /* Necessary S3 headers */
- #include <curl/curl.h>
- #include <openssl/evp.h>
- #include <openssl/hmac.h>
- #include <openssl/sha.h>
- /*****************
- * PUBLIC MACROS *
- *****************/
- /* hexadecimal string of pre-computed sha256 checksum of the empty string
- * hex(sha256sum(""))
- */
- #define EMPTY_SHA256 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
- /* string length (plus null terminator)
- * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_')
- */
- #define ISO8601_SIZE 17
- /* string length (plus null terminator)
- * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT"
- */
- #define RFC7231_SIZE 30
- /*---------------------------------------------------------------------------
- *
- * Macro: ISO8601NOW()
- *
- * Purpose:
- *
- * write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest
- * e.g., "20170630T204155Z"
- *
- * wrapper for strftime()
- *
- * It is left to the programmer to check return value of
- * ISO8601NOW (should equal ISO8601_SIZE - 1).
- *
- *---------------------------------------------------------------------------
- */
- #define ISO8601NOW(dest, now_gm) strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm))
- /*---------------------------------------------------------------------------
- *
- * Macro: RFC7231NOW()
- *
- * Purpose:
- *
- * write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest
- * e.g., "Fri, 30 Jun 2017 20:41:55 GMT"
- *
- * wrapper for strftime()
- *
- * It is left to the programmer to check return value of
- * RFC7231NOW (should equal RFC7231_SIZE - 1).
- *
- *---------------------------------------------------------------------------
- */
- #define RFC7231NOW(dest, now_gm) strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm))
- /* Reasonable maximum length of a credential string.
- * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below).
- * 17 <- "////aws4_request\0"
- * 2 < "s3" (service)
- * 8 <- "YYYYmmdd" (date)
- * 128 <- (access_id)
- * 155 :: sum
- */
- #define S3COMMS_MAX_CREDENTIAL_SIZE 155
- /*---------------------------------------------------------------------------
- *
- * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL()
- *
- * Purpose:
- *
- * Format "S3 Credential" string from inputs, for AWS4.
- *
- * Wrapper for HDsnprintf().
- *
- * _HAS NO ERROR-CHECKING FACILITIES_
- * It is left to programmer to ensure that return value confers success.
- * e.g.,
- * ```
- * assert( S3COMMS_MAX_CREDENTIAL_SIZE >=
- * S3COMMS_FORMAT_CREDENTIAL(...) );
- * ```
- *
- * "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request"
- * assuming that `dest` has adequate space.
- *
- * ALL inputs must be null-terminated strings.
- *
- * `access` should be the user's access key ID.
- * `date` must be of format "YYYYmmdd".
- * `region` should be relevant AWS region, i.e. "us-east-1".
- * `service` should be "s3".
- *
- *---------------------------------------------------------------------------
- */
- #define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \
- HDsnprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, "%s/%s/%s/%s/aws4_request", (access), (iso8601_date), \
- (region), (service))
- /*********************
- * PUBLIC STRUCTURES *
- *********************/
- /*----------------------------------------------------------------------------
- *
- * Structure: hrb_node_t
- *
- * HTTP Header Field Node
- *
- *
- *
- * Maintain a ordered (linked) list of HTTP Header fields.
- *
- * Provides efficient access and manipulation of a logical sequence of
- * HTTP header fields, of particular use when composing an
- * "S3 Canonical Request" for authentication.
- *
- * - The creation of a Canonical Request involves:
- * - convert field names to lower case
- * - sort by this lower-case name
- * - convert ": " name-value separator in HTTP string to ":"
- * - get sorted lowercase names without field or separator
- *
- * As HTTP headers allow headers in any order (excepting the case of multiple
- * headers with the same name), the list ordering can be optimized for Canonical
- * Request creation, suggesting alphabtical order. For more expedient insertion
- * and removal of elements in the list, linked list seems preferable to a
- * dynamically-expanding array. The usually-smaller number of entries (5 or
- * fewer) makes performance overhead of traversing the list trivial.
- *
- * The above requirements of creating at Canonical Request suggests a reasonable
- * trade-off of speed for space with the option to compute elements as needed
- * or to have the various elements prepared and stored in the structure
- * (e.g. name, value, lowername, concatenated name:value)
- * The structure currently is implemented to pre-compute.
- *
- * At all times, the "first" node of the list should be the least,
- * alphabetically. For all nodes, the `next` node should be either NULL or
- * of greater alphabetical value.
- *
- * Each node contains its own header field information, plus a pointer to the
- * next node.
- *
- * It is not allowed to have multiple nodes with the same _lowercase_ `name`s
- * in the same list
- * (i.e., name is case-insensitive for access and modification.)
- *
- * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated
- * strings allocated specifically for their node.
- *
- *
- *
- * `magic` (unsigned long)
- *
- * "unique" idenfier number for the structure type
- *
- * `name` (char *)
- *
- * Case-meaningful name of the HTTP field.
- * Given case is how it is supplied to networking code.
- * e.g., "Range"
- *
- * `lowername` (char *)
- *
- * Lowercase copy of name.
- * e.g., "range"
- *
- * `value` (char *)
- *
- * Case-meaningful value of HTTP field.
- * e.g., "bytes=0-9"
- *
- * `cat` (char *)
- *
- * Concatenated, null-terminated string of HTTP header line,
- * as the field would appear in an HTTP request.
- * e.g., "Range: bytes=0-9"
- *
- * `next` (hrb_node_t *)
- *
- * Pointers to next node in the list, or NULL sentinel as end of list.
- * Next node must have a greater `lowername` as determined by strcmp().
- *
- *----------------------------------------------------------------------------
- */
- typedef struct hrb_node_t {
- unsigned long magic;
- char * name;
- char * value;
- char * cat;
- char * lowername;
- struct hrb_node_t *next;
- } hrb_node_t;
- #define S3COMMS_HRB_NODE_MAGIC 0x7F5757UL
- /*----------------------------------------------------------------------------
- *
- * Structure: hrb_t
- *
- * HTTP Request Buffer structure
- *
- *
- *
- * Logically represent an HTTP request
- *
- * GET /myplace/myfile.h5 HTTP/1.1
- * Host: over.rainbow.oz
- * Date: Fri, 01 Dec 2017 12:35:04 CST
- *
- * <body>
- *
- * ...with fast, efficient access to and modification of primary and field
- * elements.
- *
- * Structure for building HTTP requests while hiding much of the string
- * processing required "under the hood."
- *
- * Information about the request target -- the first line -- and the body text,
- * if any, are managed directly with this structure. All header fields, e.g.,
- * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and
- * included in the request by a pointer to the head of the list.
- *
- *
- *
- * `magic` (unsigned long)
- *
- * "Magic" number confirming that this is an hrb_t structure and
- * what operations are valid for it.
- *
- * Must be S3COMMS_HRB_MAGIC to be valid.
- *
- * `body` (char *) :
- *
- * Pointer to start of HTTP body.
- *
- * Can be NULL, in which case it is treated as the empty string, "".
- *
- * `body_len` (size_t) :
- *
- * Number of bytes (characters) in `body`. 0 if empty or NULL `body`.
- *
- * `first_header` (hrb_node_t *) :
- *
- * Pointer to first SORTED header node, if any.
- * It is left to the programmer to ensure that this node and associated
- * list is destroyed when done.
- *
- * `resource` (char *) :
- *
- * Pointer to resource URL string, e.g., "/folder/page.xhtml".
- *
- * `verb` (char *) :
- *
- * Pointer to HTTP verb string, e.g., "GET".
- *
- * `version` (char *) :
- *
- * Pointer to HTTP version string, e.g., "HTTP/1.1".
- *
- *----------------------------------------------------------------------------
- */
- typedef struct {
- unsigned long magic;
- char * body;
- size_t body_len;
- hrb_node_t * first_header;
- char * resource;
- char * verb;
- char * version;
- } hrb_t;
- #define S3COMMS_HRB_MAGIC 0x6DCC84UL
- /*----------------------------------------------------------------------------
- *
- * Structure: parsed_url_t
- *
- *
- * Represent a URL with easily-accessed pointers to logical elements within.
- * These elements (components) are stored as null-terminated strings (or just
- * NULLs). These components should be allocated for the structure, making the
- * data as safe as possible from modification. If a component is NULL, it is
- * either implicit in or absent from the URL.
- *
- * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value"
- * ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^
- * Scheme Host Port Resource Query/-ies
- *
- *
- *
- * `magic` (unsigned long)
- *
- * Structure identification and validation identifier.
- * Identifies as `parsed_url_t` type.
- *
- * `scheme` (char *)
- *
- * String representing which protocol is to be expected.
- * _Must_ be present.
- * "http", "https", "ftp", e.g.
- *
- * `host` (char *)
- *
- * String of host, either domain name, IPv4, or IPv6 format.
- * _Must_ be present.
- * "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]"
- *
- * `port` (char *)
- *
- * String representation of specified port. Must resolve to a valid unsigned
- * integer.
- * "9000", "80"
- *
- * `path` (char *)
- *
- * Path to resource on host. If not specified, assumes root "/".
- * "lollipop_guild.wav", "characters/witches/white.dat"
- *
- * `query` (char *)
- *
- * Single string of all query parameters in url (if any).
- * "arg1=value1&arg2=value2"
- *
- *----------------------------------------------------------------------------
- */
- typedef struct {
- unsigned long magic;
- char * scheme; /* required */
- char * host; /* required */
- char * port;
- char * path;
- char * query;
- } parsed_url_t;
- #define S3COMMS_PARSED_URL_MAGIC 0x21D0DFUL
- /*----------------------------------------------------------------------------
- *
- * Structure: s3r_t
- *
- *
- *
- * S3 request structure "handle".
- *
- * Holds persistent information for Amazon S3 requests.
- *
- * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self.
- *
- * Intended to be re-used for operations on a remote object.
- *
- * Cleaned up through `H5FD_s3comms_s3r_close()`.
- *
- * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has
- * undefined behavior if called to perform in multiple threads.
- *
- *
- *
- * `magic` (unsigned long)
- *
- * "magic" number identifying this structure as unique type.
- * MUST equal `S3R_MAGIC` to be valid.
- *
- * `curlhandle` (CURL)
- *
- * Pointer to the curl_easy handle generated for the request.
- *
- * `httpverb` (char *)
- *
- * Pointer to NULL-terminated string. HTTP verb,
- * e.g. "GET", "HEAD", "PUT", etc.
- *
- * Default is NULL, resulting in a "GET" request.
- *
- * `purl` (parsed_url_t *)
- *
- * Pointer to structure holding the elements of URL for file open.
- *
- * e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2"
- * parsed into...
- * { scheme: "http"
- * host: "bucket.aws.com"
- * port: "8080"
- * path: "myfile.dat"
- * query: "q1=v1&q2=v2"
- * }
- *
- * Cannot be NULL.
- *
- * `region` (char *)
- *
- * Pointer to NULL-terminated string, specifying S3 "region",
- * e.g., "us-east-1".
- *
- * Required to authenticate.
- *
- * `secret_id` (char *)
- *
- * Pointer to NULL-terminated string for "secret" access id to S3 resource.
- *
- * Required to authenticate.
- *
- * `signing_key` (unsigned char *)
- *
- * Pointer to `SHA256_DIGEST_LENGTH`-long string for "re-usable" signing
- * key, generated via
- * `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>",
- * "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")`
- * which may be re-used for several (up to seven (7)) days from creation?
- * Computed once upon file open.
- *
- * Required to authenticate.
- *
- *----------------------------------------------------------------------------
- */
- typedef struct {
- unsigned long magic;
- CURL * curlhandle;
- size_t filesize;
- char * httpverb;
- parsed_url_t * purl;
- char * region;
- char * secret_id;
- unsigned char *signing_key;
- } s3r_t;
- #define S3COMMS_S3R_MAGIC 0x44d8d79
- #ifdef __cplusplus
- extern "C" {
- #endif
- /*******************************************
- * DECLARATION OF HTTP FIELD LIST ROUTINES *
- *******************************************/
- H5_DLL herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, const char *name, const char *value);
- /***********************************************
- * DECLARATION OF HTTP REQUEST BUFFER ROUTINES *
- ***********************************************/
- H5_DLL herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf);
- H5_DLL hrb_t *H5FD_s3comms_hrb_init_request(const char *verb, const char *resource, const char *host);
- /*************************************
- * DECLARATION OF S3REQUEST ROUTINES *
- *************************************/
- H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle);
- H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle);
- H5_DLL s3r_t *H5FD_s3comms_s3r_open(const char url[], const char region[], const char id[],
- const unsigned char signing_key[]);
- H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, haddr_t offset, size_t len, void *dest);
- /*********************************
- * DECLARATION OF OTHER ROUTINES *
- *********************************/
- H5_DLL struct tm *gmnow(void);
- H5_DLL herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, int cr_size,
- char *signed_headers_dest, int sh_size, hrb_t *http_request);
- H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, const unsigned char *msg, size_t msg_len,
- hbool_t lowercase);
- H5_DLL herr_t H5FD_s3comms_free_purl(parsed_url_t *purl);
- H5_DLL herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *msg,
- size_t msg_len, char *dest);
- H5_DLL herr_t H5FD_s3comms_load_aws_profile(const char *name, char *key_id_out, char *secret_access_key_out,
- char *aws_region_out);
- H5_DLL herr_t H5FD_s3comms_nlowercase(char *dest, const char *s, size_t len);
- H5_DLL herr_t H5FD_s3comms_parse_url(const char *str, parsed_url_t **purl);
- H5_DLL herr_t H5FD_s3comms_percent_encode_char(char *repr, const unsigned char c, size_t *repr_len);
- H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, const char *secret, const char *region,
- const char *iso8601now);
- H5_DLL herr_t H5FD_s3comms_tostringtosign(char *dest, const char *req_str, const char *now,
- const char *region);
- H5_DLL herr_t H5FD_s3comms_trim(char *dest, char *s, size_t s_len, size_t *n_written);
- H5_DLL herr_t H5FD_s3comms_uriencode(char *dest, const char *s, size_t s_len, hbool_t encode_slash,
- size_t *n_written);
- #ifdef __cplusplus
- }
- #endif
- #endif /* H5_HAVE_ROS3_VFD */
|