2012-03-10 43 views
8

我正在尝试学习x86_64程序集,并且今天正在尝试使用标准输入输出,并偶然发现此帖Learning assembly - echo program name如何从STDIN读取输入(使用SYSCALL指令)?特别是如果我知道输入将始终是一个整数,并且我想将它读入一个寄存器?如何在x86_64程序集中读取STDIN的输入?

编辑: @Daniel Kozar的答案帮助我理解了STDIN和STDOUT是如何与Linux上的SYSCALL指令一起工作的。我试图编写一个小程序,它从控制台输入中读取一个数字,并打印与该数字对应的ascii字符。假如你输入65作为输入,你应该得到A作为输出。并且还有一个新的字符。如果在所有的,它可以帮助任何其他人:-)

section .text 
    global _start 

_start: 
    mov rdi, 0x0  ; file descriptor = stdin = 0 
    lea rsi, [rsp+8] ; buffer = address to store the bytes read 
    mov rdx, 0x2  ; number of bytes to read 
    mov rax, 0x0  ; SYSCALL number for reading from STDIN 
    syscall   ; make the syscall 

    xor rax, rax  ; clear off rax 
    mov rbx, [rsp+8] ; read the first byte read into rsp+8 by STDIN call to rbp 
    sub rbx, 0x30  ; Since this is read as a character, it is obtained as ASCII value, so subtract by 0x30 to get the number 
    and rbx, 0xff  ; This ensures that everything other than the last byte is set to 0 while the last byte is as is 
    mov rax, rbx  ; move this value to rax since we want to store the final result in rax 
    shl rbx, 0x1  ; We need to multiply this by 10 so that we can add up all the digits read so multiplying the number by 2 and then by 8 and adding them up, so multiply by 2 here 
    shl rax, 0x3  ; multiply by 8 here 
    add rax, rbx  ; add 8 times multiplied value with 2 times multiplied value to get 10 times multiplied value 
    mov rbx, [rsp+9] ; now read the next byte (or digit) 
    sub rbx, 0x30  ; Again get the digit value from ASCII value of that digit's character 
    and rbx, 0xff  ; clear higher bytes 
    add rax, rbx  ; Add this to rax as unit's place value 
    mov [rsp+8], rax ; Move the entire byte to rax 
    mov rdi, 0x1  ; file descriptor = stdout 
    lea rsi, [rsp+8] ; buffer = address to write to console 
    mov rdx, 0x1  ; number of bytes to write 
    mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
    syscall   ; make the syscall 

    xor rax, rax  ; clear off rax 
    mov rax, 0xa  ; move the new line character to rax 
    mov [rsp+8], rax ; put this on the stack 
    mov rdi, 0x1  ; file descriptor = stdout 
    lea rsi, [rsp+8] ; buffer = address to write to console 
    mov rdx, 0x1  ; number of bytes to write 
    mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
    syscall   ; make the syscall 

    mov rdi, 0  ; set exit status = 0 
    mov rax, 60  ; SYSCALL number for EXIT 
    syscall   ; make the syscall 

编辑2:这是我试图从标准输入读取一个无符号的32位十进制整数,它存储为整数的计算,然后写回到原点。

section .text 
     global _start 

_start: 
;Read from STDIN 
     mov rdi, 0x0  ; file descriptor = stdin = 0 
     lea rsi, [rsp+8] ; buffer = address to store the bytes read 
     mov rdx, 0xa  ; number of bytes to read 
     mov rax, 0x0  ; SYSCALL number for reading from STDIN 
     syscall   ; make the syscall 


; Ascii to decimal conversion 
     xor rax, rax  ; clear off rax 
     mov rbx, 0x0  ; initialize the counter which stores the number of bytes in the string representation of the integer 
     lea rsi, [rsp+8] ; Get the address on the stack where the first ASCII byte of the integer is stored. 

rnext: 
     mov rcx, [rsi] ; Read the byte on the stack at the address represented by rsi 
     cmp rcx, 0xa  ; Check if it is a newline character 
     je return  ; If so we are done 
     cmp rbx, 0xa  ; OR check if we have read 10 bytes (the largest 32 bit number contains 10 digits, so we will have to process at most 10 bytes 
     jg return  ; If so we are done 
     sub rcx, 0x30  ; For the byte read, subtract by 0x30/48 to get the value from the ASCII code. 0 == 0x30 in ASCII, 1 == 0x31 in ASCII and so on. 
     and rcx, 0xff  ; Clear off the higher order bytes to ensure there is no interference 
     mov rdx, rax  ; We need to multiple this by 10 to get the next byte which goes to the unit's place and this byte becomes the ten's value. So make a copy 
     shl rax, 0x3  ; Multiply the original by 8 (Shift left by 3 is multiply by 8) 
     shl rdx, 0x1  ; Multiply the copy by 2 (Shift left by 1 is multiply by 2) 
     add rax, rdx  ; Add these a * 8 + a * 2 to get a * 10. 
     add rax, rcx  ; Add the digit to be at the units place to the original number 
     add rsi, 1  ; Advance the memory address by 1 to read the next byte 
     inc rbx   ; Increment the digit counter 
     jmp rnext   ; Loop until we have read all the digits or max is reached. 

return: 
     push rax   ; Push the read number on to the stack 

; write New Line 
     mov rax, 0xa  ; move the new line character to rax 
     mov [rsp+8], rax ; put this on the stack 
     mov rdi, 0x1  ; file descriptor = stdout 
     lea rsi, [rsp+8] ; buffer = address to write to console 
     mov rdx, 0x1  ; number of bytes to write 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 


; Convert from Decimal to bytes 
     xor rdx, rdx  ; Clear rdx which stores obtains a single digit of the number to convert to ASCII bytes 
     mov r8, 0x0  ; Initialize the counter containing the number of digits 

     pop rax   ; Pop the read number from the stack 
     mov rbx, 0xa  ; We store the divisor which is 10 for decimals (base-10) in rbx. rbx will be the divisor. 

wnext: 
     div rbx   ; Divide the number in rdx:rax by rbx to get the remainder in rdx 
     add rdx, 0x30 ; Add 0x30 to get the ASCII byte equivalent of the remainder which is the digit in the number to be written to display. 
     push rdx   ; Push this byte to the stack. We do this because, we get the individial digit bytes in reverse order. So to reverse the order we use the stack 
     xor rdx, rdx  ; Clear rdx preparing it for next division 
     inc r8   ; Increment the digits counter 
     cmp rax, 0x0  ; Continue until the number becomes 0 when there are no more digits to write to the console. 
     jne wnext  ; Loop until there aren't any more digits. 

popnext: 
     cmp r8, 0x0  ; Check if the counter which contains the number of digits to write is 0 
     jle endw   ; If so there are no more digits to write 
     mov rdx, 0x1  ; number of bytes to write 
     mov rsi, rsp  ; buffer = address to write to console 
     mov rdi, 0x1  ; file descriptor = stdout 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 
     dec r8   ; Decrement the counter 
     pop rbx   ; Pop the current digit that was already written to the display preparing the stack pointer for next digit. 
     jmp popnext  ; Loop until the counter which contains the number of digits goes down to 0. 

endw: 
; write New Line 
     xor rax, rax  ; clear off rax 
     mov rax, 0xa  ; move the new line character to rax 
     mov [rsp+9], rax ; put this on the stack 
     mov rdi, 0x1  ; file descriptor = stdout 
     lea rsi, [rsp+9] ; buffer = address to write to console 
     mov rdx, 0x1  ; number of bytes to write 
     mov rax, 0x1  ; SYSCALL number for writing to STDOUT 
     syscall   ; make the syscall 

; Exit 
     mov rdi, 0  ; set exit status = 0 
     mov rax, 60  ; SYSCALL number for EXIT 
     syscall   ; make the syscall 
+1

你在使用什么操作系统?视窗? DOS? Linux呢? – Gabe 2012-03-10 13:26:58

+0

使用'syscall'取决于操作系统。 – hirschhornsalz 2012-03-10 13:49:27

+0

我正在使用Linux。确切的代码适用于我。 – 2012-03-10 13:53:31

回答

5

首先:没有变量组装。只有某些数据的标签。数据在设计上是无类型的 - 至少在真正的汇编程序中,不是HLA(例如MASM)。

从标准输入读取是通过使用系统调用read来实现的。我假设你已经阅读过你提到的文章,并且知道如何在x64 Linux中调用系统调用。假设您正在使用NASM(或类似于其语法的东西),并且您希望将stdin的输入存储在地址buffer(您已预留BUFSIZE字节的内存),则执行系统调用将如下所示:

xor eax, eax ; rax <- 0 (write syscall number) 
xor edi, edi ; rdi <- 0 (stdin file descriptor) 
mov rsi, buffer ; rsi <- address of the buffer 
mov edx, BUFSIZE ; rdx <- size of the buffer 
syscall ; execute 

返回时,rax将包含系统调用的结果。如果您想了解更多关于它的工作原理,请咨询man 2 read

解析汇编语言中的整数并不那么简单。由于read只给出显示在标准输入上的纯二进制数据,因此您需要自己转换整数值。请记住,键盘上键入的内容将作为ASCII码(或者您可能使用的任何其他编码 - 我在此假设为ASCII)发送到应用程序。因此,您需要将数据从ASCII编码的十进制转换为二进制。

为这样的结构转换成正常无符号整型将看起来像这样在C A功能:

unsigned int parse_ascii_decimal(char *str,unsigned int strlen) 
{ 
    unsigned int ret = 0, mul = 1; 
    int i = strlen-1; 
    while(i >= 0) 
    { 
     ret += (str[i] & 0xf) * mul; 
     mul *= 10; 
     --i; 
    } 
    return ret; 
} 

此转换为组件(和延伸支持符号数)留给读者作为练习的读者。 :)

最后但并非最不重要 - write系统调用要求您始终将指针传递到缓冲区,并将数据写入给定的文件描述符。因此,如果你想输出一个换行符,除了创建一个包含换行符序列的缓冲区外别无它法。

+0

你太棒了!谢谢!是的,我正在使用NASM。所以,而不是分配一个缓冲区,我可以直接读取堆栈?就像说say mov rsi,[rsp + 8]? 我会在练习btw ;-) – 2012-03-11 01:39:59

+0

'mov rsi,[rsp + 8]'会将实际内容从堆栈移动到寄存器。你想要的是这个地址,在这种情况下,''''''''''''''''''rss,[rsp + 8]'可以正常工作。是的,您也可以使用堆栈满足您的所有阅读/写作需求。 – 2012-03-11 12:42:42

相关问题