2011-08-25 167 views
0
int connect_host(s_host_t * h, int recv_sec, int send_sec) 
{ 
    int sock = -1; 
    int ret; 
    int select_ret; 
    int res; 
    socklen_t res_size = sizeof res; 
    struct sockaddr_in channel; 
    struct hostent host; 
    struct timeval recv_timeout; 
    struct timeval send_timeout; 
    fd_set wset; 

    if (FAIL_CHECK(!gethostname_my(h->addr, &host))) 
    { 
     gko_log(WARNING, "gethostbyname %s error", h->addr); 
     ret = -1; 
     goto CONNECT_END; 
    } 
    sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 
    if (FAIL_CHECK(sock < 0)) 
    { 
     gko_log(WARNING, "get socket error"); 
     ret = -1; 
     goto CONNECT_END; 
    } 

    recv_timeout.tv_usec = 0; 
    recv_timeout.tv_sec = recv_sec ? recv_sec : RCV_TIMEOUT; 
    send_timeout.tv_usec = 0; 
    send_timeout.tv_sec = send_sec ? send_sec : SND_TIMEOUT; 

    memset(&channel, 0, sizeof(channel)); 
    channel.sin_family = AF_INET; 
    memcpy(&channel.sin_addr.s_addr, host.h_addr, host.h_length); 
    channel.sin_port = htons(h->port); 

    /** set the connect non-blocking then blocking for add timeout on connect **/ 
    if (FAIL_CHECK(setnonblock(sock) < 0)) 
    { 
     gko_log(WARNING, "set socket non-blocking error"); 
     ret = -1; 
     goto CONNECT_END; 
    } 

    /** connect and send the msg **/ 
    if (FAIL_CHECK(connect(sock, (struct sockaddr *) &channel, sizeof(channel)) && 
      errno != EINPROGRESS)) 
    { 
     gko_log(WARNING, "connect error"); 
     ret = HOST_DOWN_FAIL; 
     goto CONNECT_END; 
    } 

    /** Wait for write bit to be set **/ 
    /// 
    FD_ZERO(&wset); 
    FD_SET(sock, &wset); 
    select_ret = select(sock + 1, 0, &wset, 0, &send_timeout); 
    if (select_ret < 0) 
    { 
     gko_log(FATAL, "select error on connect"); 
     ret = HOST_DOWN_FAIL; 
     goto CONNECT_END; 
    } 
    if (!select_ret) 
    { 
     gko_log(FATAL, "connect timeout on connect"); 
     ret = HOST_DOWN_FAIL; 
     goto CONNECT_END; 
    } 

    /** 
    * check if connection is RESETed, maybe this is the 
    * best way to do that 
    * SEE: http://cr.yp.to/docs/connect.html 
    **/ 
    (void) getsockopt(sock, SOL_SOCKET, SO_ERROR, &res, &res_size); 
    if (CONNECT_DEST_DOWN(res)) 
    { 
     gko_log(NOTICE, "dest is down SO_ERROR: %d", res); 
     ret = HOST_DOWN_FAIL; 
     goto CONNECT_END; 
    } 

    ///gko_log(WARNING, "selected %d ret %d, time %d", sock, select_ret, send_timeout.tv_sec); 
    /** set back blocking **/ 
    if (FAIL_CHECK(setblock(sock) < 0)) 
    { 
     gko_log(WARNING, "set socket non-blocking error"); 
     ret = -1; 
     goto CONNECT_END; 
    } 

    /** set recv & send timeout **/ 
    if (FAIL_CHECK(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &recv_timeout, 
        sizeof(struct timeval)))) 
    { 
     gko_log(WARNING, "setsockopt SO_RCVTIMEO error"); 
     ret = -1; 
     goto CONNECT_END; 
    } 
    if (FAIL_CHECK(setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &send_timeout, 
        sizeof(struct timeval)))) 
    { 
     gko_log(WARNING, "setsockopt SO_SNDTIMEO error"); 
     ret = -1; 
     goto CONNECT_END; 
    } 

    ret = sock; 

    CONNECT_END: 
    /// 
    if (ret < 0 && sock >= 0) 
    { 
     close_socket(sock); 
    } 
    return ret; 
} 

gethostname_my:奇怪核心转储

struct hostent * gethostname_my(const char *host, struct hostent * ret) 
{ 
    struct hostent * tmp; 
    if (!ret) 
    { 
     gko_log(FATAL, "Null buf passed to gethostname_my error"); 
     return (struct hostent *) NULL; 
    } 

    pthread_mutex_lock(&g_netdb_mutex); 
    tmp = gethostbyname(host); 
    if (tmp) 
    { 
     memcpy(ret, tmp, sizeof(struct hostent)); 
    } 
    else 
    { 
     gko_log(WARNING, "resolve %s failed", host); 
     ret = NULL; 
    } 
    pthread_mutex_unlock(&g_netdb_mutex); 

    return ret; 
} 

在上述芯的FUNC “的memcpy(& channel.sin_addr.s_addr,host.h_addr,host.h_length);”几次。 这怎么可能? GDB说,它是由信号11

#0 0x000000302af71900 in memcpy() from /lib64/tls/libc.so.6 
(gdb) bt 
#0 0x000000302af71900 in memcpy() from /lib64/tls/libc.so.6 
#1 0x000000000040c42f in connect_host (h=0x2cd13ee060, recv_sec=2, send_sec=2) at socket.cpp:121 
#2 0x0000000000409f65 in sendcmd (h=0x2cd13ee060, cmd=0x2a9bcf4fb0 "DELE\t127.0.0.1\t59968", recv_sec=2, send_sec=2) at gingko_base.cpp:643 
#3 0x000000000040685e in quit_job_s (uri=0x2cd13ed170, fd=1015) at gingko_common.h:550 
#4 0x0000000000403e48 in conn_send_data (fd=1015, str=0x2cd13ed170, len=8) at async_conn.cpp:281 
#5 0x0000000000404045 in conn_tcp_server_on_data (fd=1015, ev=2, arg=0x1d51a00) at async_conn.cpp:358 
#6 0x000000000040f537 in event_base_loop (base=0x7b1ab0, flags=0) at event.c:392 
#7 0x00000000004034cf in thread_worker_init (arg=0x7b1a00) at async_threads.cpp:84 
#8 0x000000302b80610a in start_thread() from /lib64/tls/libpthread.so.0 
#9 0x000000302afc6003 in clone() from /lib64/tls/libc.so.6 
#10 0x0000000000000000 in ??() 
(gdb) f 1 
#1 0x000000000040c42f in connect_host (h=0x2cd13ee060, recv_sec=2, send_sec=2) at socket.cpp:121 
121   memcpy(&channel.sin_addr.s_addr, host.h_addr, host.h_length); 
(gdb) p host 
$1 = {h_name = 0x2ccd100d08 "127.0.0.1", h_aliases = 0x2ccd100d00, h_addrtype = 2, h_length = 4, h_addr_list = 0x2ccd100cf0} 
(gdb) p &channel.sin_addr.s_addr 
$2 = (in_addr_t *) 0x2a9bcf4f04 
(gdb) p channel 
$3 = {sin_family = 2, sin_port = 0, sin_addr = {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"} 
(gdb) p host.h_addr_list[0] 
$5 = 0x2ccd100ce0 "\177" 
(gdb) p host.h_addr_list[0][0] 
$6 = 127 '\177' 
(gdb) p host.h_addr_list[0][1] 
$7 = 0 '\0' 
(gdb) p host.h_addr_list[0][2] 
$8 = 0 '\0' 
(gdb) p host.h_addr_list[0][3] 
$9 = 1 '\001' 
+0

(GDB)p主机 $ 1 = {h_name复制= 0x1d550c8 “127.0.0.1”,h_aliases = 0x1d550c0,h_addrtype = 2,长度h_length = 4,h_addr_list = 0x1d550b0} – auxten

+0

和倾倒'h_addr_list'? –

+0

h_addr_list追加 – auxten

回答

2

您正在调用gethostname_my,但尚未提供其定义。

我想说它叫gethostname并返回一个hostent结构的副本。 但请注意h_addr是一个扩展到h_addr_list[0]的宏,因此如果gethostname_my函数没有适当地复制列表,它将不起作用。

CLARIFICATIONgethostname不是可重入的。它使用全局静态块内存来返回数据。该数据包含hostent结构和可变数量的别名和地址。他们通常,但不一定在这个结构旁边。

如果你正试图使这一功能的重入版本,那么你应该:

  • 锁,用互斥锁或类似的,在每次使用这个功能的(很难做,如果你使用第三方可能会在你不知道的情况下调用它的第三方库)。
  • 复制hostent struct 以及此结构指向的所有别名和地址

但是,为什么要打扰,当你有(不那么)新的和改进的getaddrinfo功能?这是可重入的,只是做紧密的事情(tm)。

+0

gethostname_my添加了 – auxten

+0

您刚刚跨过我的编辑。我是对的,你正在复制'hostent'结构,但不是它指向的地址,所以其他任何线程都可以在它们上面崩溃,并且崩溃! – rodrigo

+0

我会尝试getaddrinfo()。但请参阅我的附加h_addr_list [ 0],在这个核心结果是正确的,但为什么它仍然是核心? – auxten

0

领域

s_addr 

终止是一个长期的(我猜8个字节?)

您正在复制

host.h_length 

字节。在失败的情况下,h_length的值是多少?

+0

(gdb)p主机 $ 1 = {h_name = 0x1d550c8“127.0.0。1“,h_aliases = 0x1d550c0,h_addrtype = 2,h_length = 4,h_addr_list = 0x1d550b0) – auxten

1

h_addr是char *,如下所示,保存主机的IP地址。所以长度可以是最多15个字节的任何东西。

struct hostent 
{ 
    char *h_name;   /* Official name of host. */ 
    char **h_aliases;  /* Alias list. */ 
    int h_addrtype;  /* Host address type. */ 
    int h_length;   /* Length of address. */ 
    char **h_addr_list;  /* List of addresses from name server. */ 
    #define h_addr h_addr_list[0] /* Address, for backward compatibility. */ 
}; 

而s_addr是4个字节。最有可能的h_addr长度超过4个字节并导致核心转储

+0

看到我的更新,这个核心的h_length是4 – auxten

+0

你说得对,但它为什么是4?不应该是9(127.0。 0.1)?无论如何,这可能是memcpy接受size_t的原因吗?您是否尝试通过投射到size_t来编写4? –

+0

h_length不是“127.0.0.1”的strlen – auxten