2015-02-06 103 views
4

我刚刚实现了一个HTTP/1.1客户端来解析分块传输编码。但是,它适用于某些网站,但其他网站却失败。我假设我需要为每个块数据读取chunkSize + 2字节,包括\r\n,对吗?解析Chunked HTTP/1.1响应

这里是我的代码:

while(chunked)//if detecting chunked in the header before, this is true 
{ 
    //getLine is a function can read a line separated by \r\n 
    //sockfd is a socket created before and file position is at the start of HTTP body (after that blank line between header and body) 
    line = getLine(sockfd); 
    printf("%s", line);//print the chunk size line in hex 
    int chunkSize = strtol(line, NULL, 16); 
    if(chunkSize == 0) 
    { 
     printf("##### Read chunk size of 0, reading until we hit end of stream.\n"); 
     break; 
    } 
    printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize); 
    char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0 
    bzero(chunkBuf, chunkSize + 3); 
    if(read(sockfd, chunkBuf, chunkSize + 2) == 0)//sockfd is a socket created before 
    { 
     perror("Read Error: "); 
     exit(EXIT_FAILURE); 
    } 
    printf("%s", chunkBuf);//print the chunk content 
    free(chunkBuf); 

} 

其实我可以打印出全部内容,而无需解析,即通过线打印线,所以我觉得我可以做在上面的代码中的一些错误,任何人都可以给我一些提示?

下面是参考整个代码:

#include <stdio.h> 
#include <sys/socket.h> 
#include <arpa/inet.h> 
#include <stdlib.h> 
#include <netdb.h> 
#include <netinet/in.h> 
#include <string.h> 
#include <stdbool.h> 
#include <unistd.h> 

#define HTTP_VERSION "HTTP/1.1" 
#define PAGE "/" 

int createSokect() 
{ 
    int socketfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 
    if(socketfd < 0) 
    { 
     perror("Cannot create socket\n"); 
     exit(EXIT_FAILURE); 
    } 
    return socketfd; 
} 

char* getIP(char* host) 
{ 
    struct hostent* hent; 
    int len = 15;//xxx.xxx.xxx.xxx 
    char *ipaddr = (char *)malloc(len + 1);//one more \0 
    bzero(ipaddr, len + 1); 
    if((hent = gethostbyname(host)) == NULL) 
    { 
     printf("Cannot get IP for this host: %s\n", host); 
     exit(EXIT_FAILURE); 
    } 
    if(inet_ntop(AF_INET, (void*)hent->h_addr_list[0], ipaddr, len) == NULL) 
    { 
     printf("Cannot resolve IP for this host: %s\n", host); 
     exit(EXIT_FAILURE); 
    } 
    return ipaddr; 
} 

char* createQuery(char* host, char* page) 
{ 
    char* msg = "GET %s %s\r\nHost: %s\r\nConnection: close\r\n\r\n"; 
    char* query = (char *)malloc(strlen(host) + strlen(page) + strlen(msg) + strlen(HTTP_VERSION) - 6 + 1);//-6: %s %s %s 
    sprintf(query, msg, page, HTTP_VERSION, host); 
    return query; 
} 

char* getLine(int fd) 
{ 
    char c = 0, pre = 0; 
    char* line = 0; 
    int size = 1; 
    int pos = 0; 
    while(read(fd, &c, 1)!=0) 
    { 
     if(pos + 1 == size) 
     { 
      size *= 2; 
      line = realloc(line, size); 
     } 
     line[pos++] = c; 
     //printf("%c", c); 

     if(pre == '\r' && c == '\n')//this is a new line 
     { 
      break; 
     } 
     pre = c; 

    } 
    if(line) 
    { 
     line[pos++] = 0; 
    } 
    return line; 
} 

int main(int argc, char** argv) 
{ 
    if(argc < 3) 
    { 
     perror("Need more arguments"); 
     exit(EXIT_FAILURE); 
    } 
    int sockfd = createSokect(); 
    char* ip = getIP(argv[1]); 
    printf("Host: %s\n", argv[1]); 
    printf("IP: %s\n", ip); 
    struct sockaddr_in server; 
    server.sin_family = AF_INET; 
    int err = inet_pton(server.sin_family, ip, (void *)(&(server.sin_addr.s_addr))); 
    if(err != 1) 
    { 
     perror("Cannot convert IP to binary address\n"); 
     exit(EXIT_FAILURE); 
    } 
    server.sin_port = htons(atoi(argv[2])); 
    printf("port: %d\n", server.sin_port); 

    //connect to the server 
    if(connect(sockfd, (struct sockaddr *)&server, sizeof(server)) < 0) 
    { 
     printf("Cannot connect: %d\n", err); 
     exit(EXIT_FAILURE); 
    } 

    char* query = createQuery(argv[1], PAGE); 
    printf("##### CLIENT IS SENDING THE FOLLOWING TO SERVER:\n"); 
    printf("%s", query); 

    int offset = 0; 
    //send query to the server 
    err = send(sockfd, query + offset, strlen(query) - offset, 0); 
    if(err < 0) 
    { 
     perror("Cannot send query"); 
     exit(EXIT_FAILURE); 
    } 


    printf("##### CLIENT RECEIVED THE FOLLOWING FROM SERVER:\n"); 
    //receive message line by line 
    bool chunked = false; 
    char* line; 
    while((line = getLine(sockfd)) != NULL) 
    { 
     printf("%s", line); 
     if(!strcasecmp(line, "transfer-encoding: chunked\r\n")) 
     { 
      chunked = true; 
      //printf("Chunked here\n"); 
     } 
     if(!strcmp(line, "\r\n")) 
     { 
      printf("##### Just read blank line, now reading body.\n"); 
      if(chunked)//chunked, we print those in another way, otherwise line by line 
      { 
       free(line); 
       break; 
      } 
     } 


     free(line); 
    } 

    while(chunked) 
    { 
     line = getLine(sockfd); 
     printf("%s", line); 
     int chunkSize = strtol(line, NULL, 16); 
     if(chunkSize == 0) 
     { 
      printf("##### Read chunk size of 0, reading until we hit end of stream.\n"); 
      break; 
     } 
     printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize); 
     char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0 
     bzero(chunkBuf, chunkSize + 3); 
     if(read(sockfd, chunkBuf, chunkSize + 2) == 0) 
     { 
      perror("Read Error: "); 
      exit(EXIT_FAILURE); 
     } 
     printf("%s", chunkBuf); 
     free(chunkBuf); 

    } 

    //receive message from the server 
    /* 
    char buf[2048]; 
    bzero(buf, sizeof(buf)); 
    err = recv(sockfd, buf, sizeof(buf), 0); 
    if(err < 0) 
    { 
    perror("Receive error"); 
    exit(EXIT_FAILURE); 
    } 
    char *content = buf; 
    fprintf(stdout, content);*/ 

    free(query); 
    free(ip); 
    close(sockfd); 
    printf("##### Connection closed by server.\n"); 
    exit(EXIT_SUCCESS); 
} 
+0

你的程序打印什么? – Marian 2015-02-06 16:21:35

+0

收到解决方案后不要破坏您的帖子。堆栈溢出的问题和答案也可以帮助未来的开发人员,而不仅仅是提问者。 – Matt 2015-11-09 18:08:57

回答

5

线:

if(read(sockfd, chunkBuf, chunkSize + 2) == 0) ... 

将读取到CHUNKSIZE + 2,即,它可以读取以下。请参阅手册页面read。你的代码应该看起来像这样:

int n = 0; 
while (n<chunkSize) { 
    r = read(sockfd, chunkBuf+n, chunkSize - n); 
    if (r <= 0) { error or closed conection ... } 
    n += r; 
} 
+0

我不明白。对于块,它的大小指定跟随的字节数。为什么它会减少我的方式?另外,我改变了你说的代码,问题仍然存在。 – TonyLic 2015-02-06 16:13:04

+1

'read'返回现在可用的数据。如果块被分割为两个网络帧,并且只有一个在您调用'read'时到达,那么只有一半数据将在第一次调用时返回。 – Marian 2015-02-06 16:17:14

+0

这很有道理。但是我的问题似乎是在阅读完一个大块后我无法阅读新的一行。你能看看问题出在哪里吗? – TonyLic 2015-02-06 17:57:44

0

由于我知道块的大小,所以我一个一个地读取字符,直到块的大小。这种方式可以工作。但我仍然不明白为什么我一次尝试使用整个块大小的read或recv失败。