Start changing HTTP parser.
parent
19933cbf23
commit
0dde1f589e
@ -0,0 +1,4 @@
|
||||
tags
|
||||
*.o
|
||||
test
|
||||
test_g
|
@ -0,0 +1,4 @@
|
||||
Contributors must agree to the Contributor License Agreement before patches
|
||||
can be accepted.
|
||||
|
||||
http://spreadsheets2.google.com/viewform?hl=en&formkey=dDJXOGUwbzlYaWM4cHN1MERwQS1CSnc6MQ
|
@ -0,0 +1,19 @@
|
||||
Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
@ -0,0 +1,171 @@
|
||||
HTTP Parser
|
||||
===========
|
||||
|
||||
This is a parser for HTTP messages written in C. It parses both requests and
|
||||
responses. The parser is designed to be used in performance HTTP
|
||||
applications. It does not make any syscalls nor allocations, it does not
|
||||
buffer data, it can be interrupted at anytime. Depending on your
|
||||
architecture, it only requires about 40 bytes of data per message
|
||||
stream (in a web server that is per connection).
|
||||
|
||||
Features:
|
||||
|
||||
* No dependencies
|
||||
* Handles persistent streams (keep-alive).
|
||||
* Decodes chunked encoding.
|
||||
* Upgrade support
|
||||
* Defends against buffer overflow attacks.
|
||||
|
||||
The parser extracts the following information from HTTP messages:
|
||||
|
||||
* Header fields and values
|
||||
* Content-Length
|
||||
* Request method
|
||||
* Response status code
|
||||
* Transfer-Encoding
|
||||
* HTTP version
|
||||
* Request path, query string, fragment
|
||||
* Message body
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
One `http_parser` object is used per TCP connection. Initialize the struct
|
||||
using `http_parser_init()` and set the callbacks. That might look something
|
||||
like this for a request parser:
|
||||
|
||||
http_parser_settings settings;
|
||||
settings.on_path = my_path_callback;
|
||||
settings.on_header_field = my_header_field_callback;
|
||||
/* ... */
|
||||
|
||||
http_parser *parser = malloc(sizeof(http_parser));
|
||||
http_parser_init(parser, HTTP_REQUEST);
|
||||
parser->data = my_socket;
|
||||
|
||||
When data is received on the socket execute the parser and check for errors.
|
||||
|
||||
size_t len = 80*1024, nparsed;
|
||||
char buf[len];
|
||||
ssize_t recved;
|
||||
|
||||
recved = recv(fd, buf, len, 0);
|
||||
|
||||
if (recved < 0) {
|
||||
/* Handle error. */
|
||||
}
|
||||
|
||||
/* Start up / continue the parser.
|
||||
* Note we pass recved==0 to signal that EOF has been recieved.
|
||||
*/
|
||||
nparsed = http_parser_execute(parser, &settings, buf, recved);
|
||||
|
||||
if (parser->upgrade) {
|
||||
/* handle new protocol */
|
||||
} else if (nparsed != recved) {
|
||||
/* Handle error. Usually just close the connection. */
|
||||
}
|
||||
|
||||
HTTP needs to know where the end of the stream is. For example, sometimes
|
||||
servers send responses without Content-Length and expect the client to
|
||||
consume input (for the body) until EOF. To tell http_parser about EOF, give
|
||||
`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
|
||||
can still be encountered during an EOF, so one must still be prepared
|
||||
to receive them.
|
||||
|
||||
Scalar valued message information such as `status_code`, `method`, and the
|
||||
HTTP version are stored in the parser structure. This data is only
|
||||
temporally stored in `http_parser` and gets reset on each new message. If
|
||||
this information is needed later, copy it out of the structure during the
|
||||
`headers_complete` callback.
|
||||
|
||||
The parser decodes the transfer-encoding for both requests and responses
|
||||
transparently. That is, a chunked encoding is decoded before being sent to
|
||||
the on_body callback.
|
||||
|
||||
|
||||
The Special Problem of Upgrade
|
||||
------------------------------
|
||||
|
||||
HTTP supports upgrading the connection to a different protocol. An
|
||||
increasingly common example of this is the Web Socket protocol which sends
|
||||
a request like
|
||||
|
||||
GET /demo HTTP/1.1
|
||||
Upgrade: WebSocket
|
||||
Connection: Upgrade
|
||||
Host: example.com
|
||||
Origin: http://example.com
|
||||
WebSocket-Protocol: sample
|
||||
|
||||
followed by non-HTTP data.
|
||||
|
||||
(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
|
||||
information the Web Socket protocol.)
|
||||
|
||||
To support this, the parser will treat this as a normal HTTP message without a
|
||||
body. Issuing both on_headers_complete and on_message_complete callbacks. However
|
||||
http_parser_execute() will stop parsing at the end of the headers and return.
|
||||
|
||||
The user is expected to check if `parser->upgrade` has been set to 1 after
|
||||
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
|
||||
offset by the return value of `http_parser_execute()`.
|
||||
|
||||
|
||||
Callbacks
|
||||
---------
|
||||
|
||||
During the `http_parser_execute()` call, the callbacks set in
|
||||
`http_parser_settings` will be executed. The parser maintains state and
|
||||
never looks behind, so buffering the data is not necessary. If you need to
|
||||
save certain data for later usage, you can do that from the callbacks.
|
||||
|
||||
There are two types of callbacks:
|
||||
|
||||
* notification `typedef int (*http_cb) (http_parser*);`
|
||||
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
|
||||
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
|
||||
Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
|
||||
(common) on_header_field, on_header_value, on_body;
|
||||
|
||||
Callbacks must return 0 on success. Returning a non-zero value indicates
|
||||
error to the parser, making it exit immediately.
|
||||
|
||||
In case you parse HTTP message in chunks (i.e. `read()` request line
|
||||
from socket, parse, read half headers, parse, etc) your data callbacks
|
||||
may be called more than once. Http-parser guarantees that data pointer is only
|
||||
valid for the lifetime of callback. You can also `read()` into a heap allocated
|
||||
buffer to avoid copying memory around if this fits your application.
|
||||
|
||||
Reading headers may be a tricky task if you read/parse headers partially.
|
||||
Basically, you need to remember whether last header callback was field or value
|
||||
and apply following logic:
|
||||
|
||||
(on_header_field and on_header_value shortened to on_h_*)
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| State (prev. callback) | Callback | Description/action |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
|
||||
| | | into it |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| value | on_h_field | New header started. |
|
||||
| | | Copy current name,value buffers to headers |
|
||||
| | | list and allocate new buffer for new name |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| field | on_h_field | Previous name continues. Reallocate name |
|
||||
| | | buffer and append callback data to it |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| field | on_h_value | Value for current header started. Allocate |
|
||||
| | | new buffer and copy callback data to it |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
| value | on_h_value | Value continues. Reallocate value buffer |
|
||||
| | | and append callback data to it |
|
||||
------------------------ ------------ --------------------------------------------
|
||||
|
||||
|
||||
See examples of reading in headers:
|
||||
|
||||
* [partial example](http://gist.github.com/155877) in C
|
||||
* [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
|
||||
* [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,181 @@
|
||||
/* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef http_parser_h
|
||||
#define http_parser_h
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <sys/types.h>
|
||||
#if defined(_WIN32) && !defined(__MINGW32__)
|
||||
typedef __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
typedef unsigned int size_t;
|
||||
typedef int ssize_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
|
||||
* faster
|
||||
*/
|
||||
#ifndef HTTP_PARSER_STRICT
|
||||
# define HTTP_PARSER_STRICT 1
|
||||
#else
|
||||
# define HTTP_PARSER_STRICT 0
|
||||
#endif
|
||||
|
||||
|
||||
/* Maximium header size allowed */
|
||||
#define HTTP_MAX_HEADER_SIZE (80*1024)
|
||||
|
||||
|
||||
typedef struct http_parser http_parser;
|
||||
typedef struct http_parser_settings http_parser_settings;
|
||||
|
||||
|
||||
/* Callbacks should return non-zero to indicate an error. The parser will
|
||||
* then halt execution.
|
||||
*
|
||||
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
|
||||
* returning '1' from on_headers_complete will tell the parser that it
|
||||
* should not expect a body. This is used when receiving a response to a
|
||||
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
|
||||
* chunked' headers that indicate the presence of a body.
|
||||
*
|
||||
* http_data_cb does not return data chunks. It will be call arbitrarally
|
||||
* many times for each string. E.G. you might get 10 callbacks for "on_path"
|
||||
* each providing just a few characters more data.
|
||||
*/
|
||||
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
|
||||
typedef int (*http_cb) (http_parser*);
|
||||
|
||||
|
||||
/* Request Methods */
|
||||
enum http_method
|
||||
{ HTTP_DELETE = 0
|
||||
, HTTP_GET
|
||||
, HTTP_HEAD
|
||||
, HTTP_POST
|
||||
, HTTP_PUT
|
||||
/* pathological */
|
||||
, HTTP_CONNECT
|
||||
, HTTP_OPTIONS
|
||||
, HTTP_TRACE
|
||||
/* webdav */
|
||||
, HTTP_COPY
|
||||
, HTTP_LOCK
|
||||
, HTTP_MKCOL
|
||||
, HTTP_MOVE
|
||||
, HTTP_PROPFIND
|
||||
, HTTP_PROPPATCH
|
||||
, HTTP_UNLOCK
|
||||
/* subversion */
|
||||
, HTTP_REPORT
|
||||
, HTTP_MKACTIVITY
|
||||
, HTTP_CHECKOUT
|
||||
, HTTP_MERGE
|
||||
/* upnp */
|
||||
, HTTP_MSEARCH
|
||||
, HTTP_NOTIFY
|
||||
, HTTP_SUBSCRIBE
|
||||
, HTTP_UNSUBSCRIBE
|
||||
};
|
||||
|
||||
|
||||
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
|
||||
|
||||
|
||||
struct http_parser {
|
||||
/** PRIVATE **/
|
||||
unsigned char type : 2;
|
||||
unsigned char flags : 6;
|
||||
unsigned char state;
|
||||
unsigned char header_state;
|
||||
unsigned char index;
|
||||
|
||||
uint32_t nread;
|
||||
int64_t content_length;
|
||||
|
||||
/** READ-ONLY **/
|
||||
unsigned short http_major;
|
||||
unsigned short http_minor;
|
||||
unsigned short status_code; /* responses only */
|
||||
unsigned char method; /* requests only */
|
||||
|
||||
/* 1 = Upgrade header was present and the parser has exited because of that.
|
||||
* 0 = No upgrade header present.
|
||||
* Should be checked when http_parser_execute() returns in addition to
|
||||
* error checking.
|
||||
*/
|
||||
char upgrade;
|
||||
|
||||
/** PUBLIC **/
|
||||
void *data; /* A pointer to get hook to the "connection" or "socket" object */
|
||||
};
|
||||
|
||||
|
||||
struct http_parser_settings {
|
||||
http_cb on_message_begin;
|
||||
http_data_cb on_path;
|
||||
http_data_cb on_query_string;
|
||||
http_data_cb on_url;
|
||||
http_data_cb on_fragment;
|
||||
http_data_cb on_header_field;
|
||||
http_data_cb on_header_value;
|
||||
http_cb on_headers_complete;
|
||||
http_data_cb on_body;
|
||||
http_cb on_message_complete;
|
||||
};
|
||||
|
||||
|
||||
void http_parser_init(http_parser *parser, enum http_parser_type type);
|
||||
|
||||
|
||||
size_t http_parser_execute(http_parser *parser,
|
||||
const http_parser_settings *settings,
|
||||
const char *data,
|
||||
size_t len);
|
||||
|
||||
|
||||
/* If http_should_keep_alive() in the on_headers_complete or
|
||||
* on_message_complete callback returns true, then this will be should be
|
||||
* the last message on the connection.
|
||||
* If you are the server, respond with the "Connection: close" header.
|
||||
* If you are the client, close the connection.
|
||||
*/
|
||||
int http_should_keep_alive(http_parser *parser);
|
||||
|
||||
/* Returns a string version of the HTTP method. */
|
||||
const char *http_method_str(enum http_method);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
@ -0,0 +1,105 @@
|
||||
#include "http.h"
|
||||
#include "server.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct http_client *
|
||||
http_client_new(int fd, struct server *s) {
|
||||
|
||||
struct http_client *c = calloc(1, sizeof(struct http_client));
|
||||
c->fd = fd;
|
||||
c->s = s;
|
||||
|
||||
c->settings.on_path = http_on_path;
|
||||
c->settings.on_body = http_on_body;
|
||||
http_parser_init(&c->parser, HTTP_REQUEST);
|
||||
c->parser.data = c;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
http_client_read(int fd, short event, void *ctx) {
|
||||
|
||||
struct http_client *c = ctx;
|
||||
char buffer[64*1024];
|
||||
int ret, nparsed;
|
||||
|
||||
(void)fd;
|
||||
(void)event;
|
||||
|
||||
ret = read(c->fd, buffer, sizeof(buffer));
|
||||
if(ret <= 0) { /* broken connection, bye */
|
||||
printf("close client %d\n", c->fd);
|
||||
http_client_free(c);
|
||||
return;
|
||||
}
|
||||
|
||||
c->buffer = realloc(c->buffer, c->sz + ret);
|
||||
memcpy(c->buffer + c->sz, buffer, ret);
|
||||
c->sz += ret;
|
||||
|
||||
/* TODO: http parse. */
|
||||
nparsed = http_parser_execute(&c->parser, &c->settings, buffer, ret);
|
||||
if(c->parser.upgrade) {
|
||||
/* TODO */
|
||||
} else if(nparsed != ret) { /* invalid */
|
||||
http_client_free(c);
|
||||
}
|
||||
/*
|
||||
printf("parse %zd bytes: [", c->sz); fflush(stdout);
|
||||
write(1, c->buffer, c->sz);
|
||||
printf("]\n");
|
||||
*/
|
||||
|
||||
|
||||
/* carry on */
|
||||
http_client_serve(c);
|
||||
}
|
||||
|
||||
void
|
||||
http_client_free(struct http_client *c) {
|
||||
|
||||
close(c->fd);
|
||||
free(c->buffer);
|
||||
free(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add read event callback
|
||||
*/
|
||||
void
|
||||
http_client_serve(struct http_client *c) {
|
||||
|
||||
event_set(&c->ev, c->fd, EV_READ, http_client_read, c);
|
||||
event_base_set(c->s->base, &c->ev);
|
||||
|
||||
event_add(&c->ev, NULL);
|
||||
}
|
||||
|
||||
int
|
||||
http_on_path(http_parser *p, const char *at, size_t length) {
|
||||
|
||||
struct http_client *c = p->data;
|
||||
|
||||
c->path.s = at;
|
||||
c->path.sz = length;
|
||||
|
||||
/* save HTTP verb as well */
|
||||
c->verb = (enum http_method)p->method;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
http_on_body(http_parser *p, const char *at, size_t length) {
|
||||
struct http_client *c = p->data;
|
||||
|
||||
c->body.s = at;
|
||||
c->body.sz = length;
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
#ifndef HTTP_H
|
||||
#define HTTP_H
|
||||
|
||||
#include <event.h>
|
||||
#include "http-parser/http_parser.h"
|
||||
|
||||
typedef struct {
|
||||
const char *s;
|
||||
size_t sz;
|
||||
} str_t;
|
||||
|
||||
struct http_client {
|
||||
|
||||
/* socket and server reference */
|
||||
int fd;
|
||||
struct event ev;
|
||||
struct server *s;
|
||||
|
||||
/* input buffer */
|
||||
char *buffer;
|
||||
size_t sz;
|
||||
|
||||
/* http parser */
|
||||
http_parser_settings settings;
|
||||
http_parser parser;
|
||||
|
||||
/* decoded http */
|
||||
enum http_method verb;
|
||||
|
||||
str_t path;
|
||||
str_t body;
|
||||
};
|
||||
|
||||
struct http_client *
|
||||
http_client_new(int fd, struct server *s);
|
||||
|
||||
void
|
||||
http_client_serve(struct http_client *c);
|
||||
|
||||
void
|
||||
http_client_free(struct http_client *c);
|
||||
|
||||
int
|
||||
http_on_path(http_parser*, const char *at, size_t length);
|
||||
|
||||
int
|
||||
http_on_path(http_parser*, const char *at, size_t length);
|
||||
|
||||
int
|
||||
http_on_body(http_parser*, const char *at, size_t length);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue