把源码中的头文件http_parser.h和源码http_parser.c直接拷贝到项目中(https://github.com/nodejs/http-parser),然后一起编译即可;
我们写一个简单地测试例子:
main.c
#include "http_parser.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <time.h>
static http_parser *parser;
int on_message_begin(http_parser* ) {
(void);
printf("\nMESSAGE BEGIN\n\n");
return 0;
}int on_headers_complete(http_parser* ) {
(void);
printf("\nHEADERS COMPLETE\n\n");
return 0;
}int on_message_complete(http_parser* ) {
(void);
printf("\nMESSAGE COMPLETE\n\n");
return 0;
}int on_url(http_parser* , const char* at, size_t length) {
(void);
printf("Url: %.*s\n", (int)length, at);return 0;
}int on_header_field(http_parser* , const char* at, size_t length) {
(void);
printf("Header field: %.*s\n", (int)length, at);
return 0;
}int on_header_value(http_parser* , const char* at, size_t length) {
(void);
printf("Header value: %.*s\n", (int)length, at);
return 0;
}int on_body(http_parser* , const char* at, size_t length) {
(void);
printf("Body: %.*s\n", (int)length, at);
return 0;
}int main() {
http_parser_settings parser_set;// http_parser的回调函数,需要获取HEADER后者BODY信息,可以在这里面处理。 parser_set.on_message_begin = on_message_begin; parser_set.on_header_field = on_header_field; parser_set.on_header_value = on_header_value; parser_set.on_url = on_url; parser_set.on_body = on_body; parser_set.on_headers_complete = on_headers_complete; parser_set.on_message_complete = on_message_complete; char buf[1024]="GET /a/b/c/d HTTP/1.1"; size_t parsed; parser = (http_parser*)malloc(sizeof(http_parser)); // 分配一个http_parser http_parser_init(parser, HTTP_REQUEST); // 初始化parser为Request类型 parsed = http_parser_execute(parser, &parser_set, buf, strlen(buf)); // 执行解析过程 http_parser_execute(parser, &parser_set, buf, 0); // 信息读取完毕 free(parser); parser = NULL;
}
使用主要分三步:
1. 申请一块http_parser大小的内存作为当前请求的parser对象,里面包含了对这次请求的解析信息;
2.申请一块http_parser_settings大小内存作为设置对象,它包含了我们设置的各种回调函数;
3.调用http_parser_execute解析请求串,根据parsed与总字符是否相等来判断是否成功还是失败;
下面我们为源码添加一些注释,大家自行对照代码去分析里面的状态机的转化过程。
http_parser.h
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to
- deal in the Software without restriction, including without limitation the
- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- sell copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C"
{
#endif/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 9
#define HTTP_PARSER_VERSION_PATCH 4#include <stddef.h>
#if defined(_WIN32) && !defined(MINGW32) &&
(!defined(_MSC_VER) || _MSC_VER < 1600) && !defined(WINE)
#include <BaseTsd.h>
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#elif (defined(__sun) || defined(sun)) && defined(__SunOS_5_9)
#include <sys/inttypes.h>
#else
#include <stdint.h>
#endif/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
- faster
*/
#ifndef HTTP_PARSER_STRICT
#define HTTP_PARSER_STRICT 1
#endif/* Maximium header size allowed. If the macro is not defined
before including this header then the default is used. To
change the maximum header size, define the macro in the build
environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
the effective limit on the size of the header, define the macro
to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
*/
#ifndef HTTP_MAX_HEADER_SIZE
#define HTTP_MAX_HEADER_SIZE (80 * 1024)
#endiftypedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;/* Callbacks should return non-zero to indicate an error. The parser will
then halt execution.
The one exception is on_headers_complete. In a HTTP_RESPONSE parser
returning '1' from on_headers_complete will tell the parser that it
should not expect a body. This is used when receiving a response to a
HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
chunked' headers that indicate the presence of a body.
Returning
2
from on_headers_complete will tell parser that it should notexpect neither a body nor any futher responses on this connection. This is
useful for handling responses to a CONNECT request which may not contain
Upgrade
orConnection: upgrade
headers.http_data_cb does not return data chunks. It will be called arbitrarily
many times for each string. E.G. you might get 10 callbacks for "on_url"
each providing just a few characters more data.
*/
typedef int (*http_data_cb)(http_parser *, const char *at, size_t length);
typedef int (*http_cb)(http_parser *);// 三个值分别为: 状态码 响应报文中对应的字符串 解释型描述
/* Status Codes */
#define HTTP_STATUS_MAP(XX)
XX(100, CONTINUE, Continue)
XX(101, SWITCHING_PROTOCOLS, Switching Protocols)
XX(102, PROCESSING, Processing)
XX(200, OK, OK)
XX(201, CREATED, Created)
XX(202, ACCEPTED, Accepted)
XX(203, NON_AUTHORITATIVE_INFORMATION, Non - Authoritative Information)
XX(204, NO_CONTENT, No Content)
XX(205, RESET_CONTENT, Reset Content)
XX(206, PARTIAL_CONTENT, Partial Content)
XX(207, MULTI_STATUS, Multi - Status)
XX(208, ALREADY_REPORTED, Already Reported)
XX(226, IM_USED, IM Used)
XX(300, MULTIPLE_CHOICES, Multiple Choices)
XX(301, MOVED_PERMANENTLY, Moved Permanently)
XX(302, FOUND, Found)
XX(303, SEE_OTHER, See Other)
XX(304, NOT_MODIFIED, Not Modified)
XX(305, USE_PROXY, Use Proxy)
XX(307, TEMPORARY_REDIRECT, Temporary Redirect)
XX(308, PERMANENT_REDIRECT, Permanent Redirect)
XX(400, BAD_REQUEST, Bad Request)
XX(401, UNAUTHORIZED, Unauthorized)
XX(402, PAYMENT_REQUIRED, Payment Required)
XX(403, FORBIDDEN, Forbidden)
XX(404, NOT_FOUND, Not Found)
XX(405, METHOD_NOT_ALLOWED, Method Not Allowed)
XX(406, NOT_ACCEPTABLE, Not Acceptable)
XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required)
XX(408, REQUEST_TIMEOUT, Request Timeout)
XX(409, CONFLICT, Conflict)
XX(410, GONE, Gone)
XX(411, LENGTH_REQUIRED, Length Required)
XX(412, PRECONDITION_FAILED, Precondition Failed)
XX(413, PAYLOAD_TOO_LARGE, Payload Too Large)
XX(414, URI_TOO_LONG, URI Too Long)
XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type)
XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable)
XX(417, EXPECTATION_FAILED, Expectation Failed)
XX(421, MISDIRECTED_REQUEST, Misdirected Request)
XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity)
XX(423, LOCKED, Locked)
XX(424, FAILED_DEPENDENCY, Failed Dependency)
XX(426, UPGRADE_REQUIRED, Upgrade Required)
XX(428, PRECONDITION_REQUIRED, Precondition Required)
XX(429, TOO_MANY_REQUESTS, Too Many Requests)
XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large)
XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons)
XX(500, INTERNAL_SERVER_ERROR, Internal Server Error)
XX(501, NOT_IMPLEMENTED, Not Implemented)
XX(502, BAD_GATEWAY, Bad Gateway)
XX(503, SERVICE_UNAVAILABLE, Service Unavailable)
XX(504, GATEWAY_TIMEOUT, Gateway Timeout)
XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported)
XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates)
XX(507, INSUFFICIENT_STORAGE, Insufficient Storage)
XX(508, LOOP_DETECTED, Loop Detected)
XX(510, NOT_EXTENDED, Not Extended)
XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required)// 下面的声明会进行2次 define 语句的替换: /** * 第一次替换:HTTP_STATUS_MAP(XX) 得到 * enum http_status { * #define XX(num, name, string) HTTP_STATUS_##name = num, XX(100, CONTINUE, Continue) \ XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ XX(102, PROCESSING, Processing) \ XX(200, OK, OK) \ ... #undef XX * } * 第二次替换:XX(num, name, string) 得到 * enum http_status { * HTTP_STATUS_CONTINUE = 100, * HTTP_STATUS_SWITCHING_PROTOCOLS = 101, * HTTP_STATUS_SWITCHING_PROCESSING = 102, * HTTP_STATUS_OK = 200, * ... * } * * 得到了全部status状态码的枚举 */ enum http_status {
#define XX(num, name, string) HTTP_STATUS_##name = num,
HTTP_STATUS_MAP(XX)
#undef XX
};/* Request Methods /
#define HTTP_METHOD_MAP(XX)
XX(0, DELETE, DELETE)
XX(1, GET, GET)
XX(2, HEAD, HEAD)
XX(3, POST, POST)
XX(4, PUT, PUT)
/ pathological /
XX(5, CONNECT, CONNECT)
XX(6, OPTIONS, OPTIONS)
XX(7, TRACE, TRACE)
/ WebDAV /
XX(8, COPY, COPY)
XX(9, LOCK, LOCK)
XX(10, MKCOL, MKCOL)
XX(11, MOVE, MOVE)
XX(12, PROPFIND, PROPFIND)
XX(13, PROPPATCH, PROPPATCH)
XX(14, SEARCH, SEARCH)
XX(15, UNLOCK, UNLOCK)
XX(16, BIND, BIND)
XX(17, REBIND, REBIND)
XX(18, UNBIND, UNBIND)
XX(19, ACL, ACL)
/ subversion /
XX(20, REPORT, REPORT)
XX(21, MKACTIVITY, MKACTIVITY)
XX(22, CHECKOUT, CHECKOUT)
XX(23, MERGE, MERGE)
/ upnp /
XX(24, MSEARCH, M - SEARCH)
XX(25, NOTIFY, NOTIFY)
XX(26, SUBSCRIBE, SUBSCRIBE)
XX(27, UNSUBSCRIBE, UNSUBSCRIBE)
/ RFC-5789 /
XX(28, PATCH, PATCH)
XX(29, PURGE, PURGE)
/ CalDAV /
XX(30, MKCALENDAR, MKCALENDAR)
/ RFC-2068, section 19.6.1.2 /
XX(31, LINK, LINK)
XX(32, UNLINK, UNLINK)
/ icecast */
XX(33, SOURCE, SOURCE)// 原理同上: /** * enum http_method { * HTTP_DELETE = 0, * HTTP_GET = 1, * ... * } * * 得到了请求方法METHOD的枚举 */ enum http_method {
#define XX(num, name, string) HTTP_##name = num,
HTTP_METHOD_MAP(XX)
#undef XX
};// 解析的类型 请求还是响应 2者都是? enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; // 这个请求解析过程得到的一些状态 用位来标识 /* Flag values for http_parser.flags field */ enum flags { // chunked传输 F_CHUNKED = 1 << 0, // 保持连接 F_CONNECTION_KEEP_ALIVE = 1 << 1, // 关闭连接 F_CONNECTION_CLOSE = 1 << 2, // 升级服务 F_CONNECTION_UPGRADE = 1 << 3, // chunked读取完毕 F_TRAILING = 1 << 4, // 协议升级 F_UPGRADE = 1 << 5, // 跳过body体 F_SKIPBODY = 1 << 6, // 报文数据段的有长度值 F_CONTENTLENGTH = 1 << 7 };
/* Map for errno-related constants
*
- The provided argument should be a macro that takes 2 arguments.
/
#define HTTP_ERRNO_MAP(XX)
/ No error /
XX(OK, "success")
/ Callback-related errors /
XX(CB_message_begin, "the on_message_begin callback failed")
XX(CB_url, "the on_url callback failed")
XX(CB_header_field, "the on_header_field callback failed")
XX(CB_header_value, "the on_header_value callback failed")
XX(CB_headers_complete, "the on_headers_complete callback failed")
XX(CB_body, "the on_body callback failed")
XX(CB_message_complete, "the on_message_complete callback failed")
XX(CB_status, "the on_status callback failed")
XX(CB_chunk_header, "the on_chunk_header callback failed")
XX(CB_chunk_complete, "the on_chunk_complete callback failed")
/ Parsing-related errors */
XX(INVALID_EOF_STATE, "stream ended at an unexpected time")
XX(HEADER_OVERFLOW,
"too many header bytes seen; overflow detected")
XX(CLOSED_CONNECTION,
"data received after completed connection: close message")
XX(INVALID_VERSION, "invalid HTTP version")
XX(INVALID_STATUS, "invalid HTTP status code")
XX(INVALID_METHOD, "invalid HTTP method")
XX(INVALID_URL, "invalid URL")
XX(INVALID_HOST, "invalid host")
XX(INVALID_PORT, "invalid port")
XX(INVALID_PATH, "invalid path")
XX(INVALID_QUERY_STRING, "invalid query string")
XX(INVALID_FRAGMENT, "invalid fragment")
XX(LF_EXPECTED, "LF character expected")
XX(INVALID_HEADER_TOKEN, "invalid character in header")
XX(INVALID_CONTENT_LENGTH,
"invalid character in content-length header")
XX(UNEXPECTED_CONTENT_LENGTH,
"unexpected content-length header")
XX(INVALID_CHUNK_SIZE,
"invalid character in chunk size header")
XX(INVALID_CONSTANT, "invalid constant string")
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")
XX(STRICT, "strict mode assertion failed")
XX(PAUSED, "parser is paused")
XX(UNKNOWN, "an unknown error occurred")
XX(INVALID_TRANSFER_ENCODING,
"request has invalid transfer-encoding")// 原理同上:
/**
enum http_errno
{
HPE_OK,
HPE_CB_message_begin,
HPE_CB_url,
...
};得到错误类型枚举
*//* Define HPE_* values for each errno value above */
#define HTTP_ERRNO_GEN(n, s) HPE_##n,
enum http_errno
{
HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
};
#undef HTTP_ERRNO_GEN/* Get an http_errno value from an http_parser */
#define HTTP_PARSER_ERRNO(p) ((enum http_errno)(p)->http_errno)// 每个tcp连接处理一次报文的时候初始化一个新的http_parser结构体来存储解析的信息和状态 struct http_parser { /** PRIVATE **/ // 2位bit来区分解析报文的类型 unsigned int type : 2; /* enum http_parser_type */ // 8位bit来存储上文所述的flags各个字段 unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ // 7位bit来表示目前报文状态机的当前状态 unsigned int state : 7; /* enum state from http_parser.c */ // 7位bit来表示目前正在解析头部字段所述的头部状态机的状态 unsigned int header_state : 7; /* enum header_state from http_parser.c */ // 5位ibit 解析每一类数据 如 method url version head 等都把它重置为0 表示解析到当前类型数据的第几个字符 unsigned int index : 5; /* index into current matcher */ // 1位bit 是否有这个transfer-encoding字段 unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */ // 1位bit 是否同时有length和chunked unsigned int allow_chunked_length : 1; /* Allow headers with both * `Content-Length` and * `Transfer-Encoding: chunked` set */ // 1位bit 放宽对头部字段字符集的限制 unsigned int lenient_http_headers : 1; // paser当前总共分析了多少个字符 uint32_t nread; /* # bytes read in various scenarios */ // 如果有content_length字段 存储它的值 uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one) * if no Content-Length header. */ /** READ-ONLY **/ // HTTP主版本 unsigned short http_major; // HTTP次版本 unsigned short http_minor; // 16位状态码 unsigned int status_code : 16; /* responses only */ // 8位请求方法 unsigned int method : 8; /* requests only */ // parser所处的errno状态 只有OK才算正常errno unsigned int http_errno : 7; /* 1 = Upgrade header was present and the parser has exited because of that.
0 = No upgrade header present.
Should be checked when http_parser_execute() returns in addition to
error checking.
*/
// 协议升级
unsigned int upgrade : 1;// 与外界数据产生关联的钩子
/** PUBLIC **/
void data; / A pointer to get hook to the "connection" or "socket" object */
};// 存储用户传入的回调函数的地址setting struct http_parser_settings { // 开始解析时触发 http_cb on_message_begin; // 解析出完整的请求url时触发 http_data_cb on_url; // 响应报文解析出 status时触发 http_data_cb on_status; // 解析出一个头部字段key时触发 http_data_cb on_header_field; // 解析出一个头部字段值value时触发 http_data_cb on_header_value; // 整个头部字段(0-N)行解析完成触发 http_cb on_headers_complete; // 报文数据读读取完时触发 http_data_cb on_body; // 整个解析完成时触发 http_cb on_message_complete; /* When on_chunk_header is called, the current chunk length is stored
- in parser->content_length.
*/
// 解析得到一行chunked的size大小 值放在此时的content-length中 触发
http_cb on_chunk_header;
// 对应上面size的chunked数据读取完触发
http_cb on_chunk_complete;
};enum http_parser_url_fields { // 协议 UF_SCHEMA = 0, // host 域名或者ip地址 UF_HOST = 1, // 端口号 UF_PORT = 2, // 请求路径path UF_PATH = 3, // 查询参数 UF_QUERY = 4, // 哈希字段 UF_FRAGMENT = 5, // 用户信息 UF_USERINFO = 6, // 最大位数 UF_MAX = 7 }; /* Result structure for http_parser_parse_url().
Callers should index into field_data[] with UF_* values iff field_set
has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
because we probably have padding left over), we convert any port to
a uint16_t.
/
struct http_parser_url
{
// url哪些部分存在 存在则置位1
uint16_t field_set; / Bitmask of (1 << UF_*) values /
// 端口号
uint16_t port; / Converted UF_PORT string */// 每个数据字段的偏移和长度 struct { uint16_t off; /* Offset into buffer in which field starts */ uint16_t len; /* Length of run in buffer */ } field_data[UF_MAX];
};
/* Returns the library version. Bits 16-23 contain the major version number,
bits 8-15 the minor version number and bits 0-7 the patch level.
Usage example:
unsigned long version = http_parser_version();
unsigned major = (version >> 16) & 255;
unsigned minor = (version >> 8) & 255;
unsigned patch = version & 255;
printf("http_parser v%u.%u.%u\n", major, minor, patch);
*/
unsigned long http_parser_version(void);void http_parser_init(http_parser *parser, enum http_parser_type type); /* Initialize http_parser_settings members to 0
*/
void http_parser_settings_init(http_parser_settings *settings);/* Executes the parser. Returns number of parsed bytes. Sets
parser->http_errno
on error. */
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);/* If http_should_keep_alive() in the on_headers_complete or
on_message_complete callback returns 0, then this should be
the last message on the connection.
If you are the server, respond with the "Connection: close" header.
If you are the client, close the connection.
*/
int http_should_keep_alive(const http_parser *parser);/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method m);/* Returns a string version of the HTTP status code. */
const char *http_status_str(enum http_status s);/* Return a string name of the given error */
const char *http_errno_name(enum http_errno err);/* Return a string description of the given error */
const char *http_errno_description(enum http_errno err);/* Initialize all http_parser_url members to 0 */
void http_parser_url_init(struct http_parser_url *u);/* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect,
struct http_parser_url *u);/* Pause or un-pause the parser; a nonzero value pauses */
void http_parser_pause(http_parser *parser, int paused);/* Checks if this is the final chunk of the body. */
int http_body_is_final(const http_parser *parser);/* Change the maximum header size provided at compile time. */
void http_parser_set_max_header_size(uint32_t size);
#ifdef __cplusplus
}
#endif
#endif
http_parser.c由于字符数超标,文件地址为: http_parser.c
里面包含了全部内容,大家请自行查阅哈。