;Copyright (C) 1999-2002 Konstantin Boldyshev ;Copyright (C) 1999 Cecchinel Stephan ; ;$Id: libc.asm,v 1.16 2006/02/18 09:39:33 konst Exp $ ; ;hackers' libc ; ;Yes, this is the most advanced libc ever seen. ;It uses advanced technologies which are possible only with assembly. ;Two main features that make this libc outstanding: ;1) calling convention can be configured AT RUNTIME (cdecl is default) ;2) THE smallest size ; ;It uses mixed code-data database approach for syscalls, ;resulting in extremely small size. ; ;Well, there's still a lot of work to be done. ; ;0.01: 10-Sep-1999 initial alpha pre beta 0 non-release ;0.02: 24-Dec-1999 first working version ;0.03: 21-Feb-2000 fastcall support ;0.04: 20-Jul-2000 fixed stupid bug/misprint, merged clib.asm & string.asm ; printf() ;0.05: 16-Jan-2001 usual functions now work with both cdecl and fastcall, ; added PIC support (and __GET_GOT macro), ; added __ADJUST_CDECL3 macro, ; syscall mechanism rewritten (size improved), ; separated and optimized sprintf(), ; printf() implemented via sprintf(), ; lots of other various fixes (KB) ; finally ready for additions and active development. ;0.06: 28-Jan-2001 added __start_main - it is called from stub in order ; to prepare main() arguments (argc, argv, envp), ; PIC fixes (KB) ;0.07: 25-Feb-2001 added __VERBOSE__, memcmp(), getenv() (KB) ;0.08: 20-Jan-2002 strlen() bugfix, various fixes (KB) ;0.09: 03-Mar-2002 __start_main fastcall fix (KB) ;0.10: 18-Feb-2006 static build fix (KB) %undef __ELF_MACROS__ %include "system.inc" %define __PIC__ ;build PIC version ;%define __VERBOSE__ ;treat stack with care ; ; macro used for function declaration ; %macro _DECLARE_FUNCTION 1-* %rep %0 global %1:function %rotate 1 %endrep %endmacro ; ; macro used for syscall declaration ; ;%1 syscall name ;%2 number of parameters ; ;Ok, what is written below? ;Yes - a really dirty trick, but it really saves size. ;This is the thing I like assembly for, ;and this why this libc is the most advanced :) ; ;This macro generates the following code: ;six bytes - call instruction ;one byte - number of syscall parameters (<0 means always cdecl) ;one byte - syscall number (two bytes on BSD systems) ; ;So, each syscall will take only 8 bytes (9 bytes on BSD systems) ;in executable image. We use call instruction to push return address, ;and then find out syscall number and number of parameters using ;this address in __system_call function. ret instructions is also ;missing, because we will handle correct return in __system_call too. %macro _DECLARE_SYSCALL 2 global %1:function %1: call __system_call db %2 ;number of parameters %ifndef __BSD__ db SYS_%{1};syscall number %else dw SYS_%{1} %endif %endmacro ; ;PIC handling ; %define __EXT_VAR(x) [ebx + (x) wrt ..got] %define __INT_VAR(x) ebx + (x) wrt ..gotoff %macro __GET_GOT 0 call __get_GOT %%get_GOT: %define gotpc %%get_GOT wrt ..gotpc add ebx,_GLOBAL_OFFSET_TABLE_ + $$ - gotpc %undef gotpc %endmacro ;adjust cdecl call (1 - 3 parameters) ; ;%1 stack frame to add ;%2 - %4 registers %macro __ADJUST_CDECL3 2-4 ; _mov %2,eax ;%if %0>2 ; _mov %3,edx ;%if %0>3 ; _mov %4,ecx ;%endif ;%endif %ifdef __PIC__ push ebx __GET_GOT mov ebx,__EXT_VAR(__cc) cmp byte [ebx],0 pop ebx %else cmp byte [__cc],0 %endif jnz %%fc mov %2,[esp + %1 + 4 ] %if %0>2 mov %3,[esp + %1 + 8 ] %if %0>3 mov %4,[esp + %1 + 12] %endif %endif %%fc: %endmacro ; ;for accessing registers after pusha ; %define __ret esp+4*8 %define __eax esp+4*7 %define __ecx esp+4*6 %define __edx esp+4*5 %define __ebx esp+4*4 %define __esp esp+4*3 %define __ebp esp+4*2 %define __esi esp+4*1 %define __edi esp+4*0 CODESEG %ifdef __PIC__ __GET_GOT lea ecx,[__INT_VAR(__libc_banner)] %else mov ecx,__libc_banner %endif sys_write STDOUT,EMPTY,__LIBC_BANNER_LEN sys_exit 0 __libc_banner db "a r e y o u s i c k ?", __n __LIBC_BANNER_LEN equ $ - __libc_banner %ifdef __PIC__ __get_GOT: mov ebx,[esp] ret %endif extern _GLOBAL_OFFSET_TABLE_ ;************************************************** ;* INTERNAL FUNCTIONS * ;************************************************** ; ;perform a system call (up to 6 arguments) ; __system_call: pusha mov eax,[__esp] ;load number of syscall args into eax mov eax,[eax] movzx eax,byte [eax] test al,al jz .ssn ;no args %ifdef __VERBOSE__ jns .sk1 ;usual call neg al ;always cdecl call jmps .cdecl %else js .cdecl %endif .sk1: %ifdef __PIC__ __GET_GOT mov ebx,__EXT_VAR(__cc) cmp byte [ebx],0 %else cmp byte [__cc],0 %endif jnz .fc %define _STACK_ADD 8 + 4*8 %macro _JZ_SSN_ 0 %ifdef __VERBOSE__ dec eax jz .ssn %endif %endmacro .cdecl: mov ebx,[esp + _STACK_ADD] ;1st arg _JZ_SSN_ mov ecx,[esp + _STACK_ADD + 4] ;2nd arg _JZ_SSN_ mov edx,[esp + _STACK_ADD + 8] ;3rd arg _JZ_SSN_ mov esi,[esp + _STACK_ADD + 12] ;4th arg _JZ_SSN_ mov edi,[esp + _STACK_ADD + 16] ;5th arg _JZ_SSN_ mov ebp,[esp + _STACK_ADD + 20] ;6th arg jmps .ssn .fc: mov ebx,[__eax] ;1st arg _JZ_SSN_ xchg ecx,edx ;2nd & 3rd arg _JZ_SSN_ _JZ_SSN_ mov esi,[esp + _STACK_ADD] ;4th arg _JZ_SSN_ mov edi,[esp + _STACK_ADD + 4] ;5th arg _JZ_SSN_ mov ebp,[esp + _STACK_ADD + 8] ;6th arg %undef _STACK_ADD .ssn: mov eax,[__esp] ;set syscall number mov eax,[eax] %ifndef __BSD__ movzx eax,byte [eax + 1] ;return address + 1 %else movzx eax,word [eax + 1] ;return address + 1 %endif sys_generic cmp eax,-4095 jb .leave ; test eax,eax ; jns .leave neg eax %ifdef __PIC__ __GET_GOT mov ebx,__EXT_VAR(errno) mov [ebx],eax %else mov [errno],eax %endif or eax,byte -1 .leave: mov [__eax + 4],eax ;replace return address with eax popa pop eax ;now get it back ret ;and return to previous caller ; ; ; _DECLARE_SYSCALL open, -3 ;<0 means always cdecl _DECLARE_SYSCALL close, 1 _DECLARE_SYSCALL read, 3 _DECLARE_SYSCALL write, 3 _DECLARE_SYSCALL lseek, 3 _DECLARE_SYSCALL chmod, 2 _DECLARE_SYSCALL chown, 2 _DECLARE_SYSCALL pipe, 1 _DECLARE_SYSCALL link, 2 _DECLARE_SYSCALL symlink,2 _DECLARE_SYSCALL unlink, 1 _DECLARE_SYSCALL mkdir, 1 _DECLARE_SYSCALL rmdir, 1 _DECLARE_SYSCALL exit, 1 _DECLARE_SYSCALL fork, 0 _DECLARE_SYSCALL execve, 3 _DECLARE_SYSCALL uname, 1 _DECLARE_SYSCALL ioctl, 3 _DECLARE_SYSCALL alarm, 1 _DECLARE_SYSCALL nanosleep, 2 _DECLARE_SYSCALL kill, 2 _DECLARE_SYSCALL signal, 2 _DECLARE_SYSCALL wait4, 4 ;_DECLARE_SYSCALL stat, 2 _DECLARE_SYSCALL fstat, 2 _DECLARE_SYSCALL lstat, 2 _DECLARE_SYSCALL getuid, 0 _DECLARE_SYSCALL getgid, 0 _DECLARE_FUNCTION _fastcall _DECLARE_FUNCTION memcpy, memset, memcmp _DECLARE_FUNCTION strlen _DECLARE_FUNCTION strtol _DECLARE_FUNCTION itoa _DECLARE_FUNCTION printf, sprintf _DECLARE_FUNCTION getenv _DECLARE_FUNCTION __start_main ; ; ;ebp - main() address __start_main: pop ebp ;main() address pop eax ;argc lea ecx,[esp + eax * 4 + 4] ;**envp %ifdef __PIC__ __GET_GOT mov ebx,__EXT_VAR(__envp) mov [ebx],ecx %else mov [__envp],ecx %endif mov edx,esp ;**argv push ecx push edx push eax call ebp push eax call exit ;************************************************** ;* GLOBAL LIBRARY FUNCTIONS * ;************************************************** ;void fastcall(int regnum) ; ;set fastcall/cdecl calling convention ;note: always uses fasctall convention ; ;EAX strlen: push edx __ADJUST_CDECL3 4*1,eax mov edx,eax .real: %if __OPTIMIZE__=__O_SPEED__ push ecx test dl,3 jz .boucle cmp byte[eax],0 jz .strfi cmp byte[eax+1],0 jz .ret1 cmp byte[eax+2],0 jnz .align add eax,byte 2 jmps .strfi .align: add eax,byte 3 and eax,byte -4 .boucle: ;normally the whole loop is 7 cycles (for 8 bytes) mov ecx,dword[eax] test cl,cl jz .strfi test ch,ch jz .ret1 test ecx,0xFF0000 jz .ret2 shr ecx,24 jz .ret3 mov ecx,dword[eax+8] test cl,cl jz .ret4 test ch,ch jz .ret5 test ecx,0xFF0000 jz .ret6 add eax,byte 8 shr ecx,4 jnz .boucle dec eax jmps .strfi .ret1: inc eax jmps .strfi .ret2: add eax,byte 2 jmps .strfi .ret3: add eax,byte 3 jmps .strfi .ret4: add eax,byte 4 jmps .strfi .ret5: dec eax .ret6: add ecx,byte 6 .strfi: sub eax,edx pop ecx %else ;__O_SIZE__ xor eax,eax .boucle: cmp byte[edx+eax],1 inc eax jnc .boucle dec eax %endif ;__OPTIMIZE__ pop edx ret ;itoa (unsigned long value, char *string, int radix) ; ;print 32 bit number as binary,octal,decimal,or hexadecimal value ; ;EAX inet_aton: push esi push edi push edx mov esi,eax mov edi,edx __ADJUST_CDECL3 4*3,esi,edi cld _mov ecx,4 ; convert xx.xx.xx.xx to network notation .conv: xor edx,edx .next: lodsb sub al,'0' jb .loop1 add edx,edx lea edx,[edx+edx*4] add dl,al jmps .next .loop1: mov al,dl stosb loop .next xor eax,eax ;assume address was valid pop edx pop edi pop esi ret ;long strtol(const char *nptr, char **endptr, int base) ; ;convert string in npt to a long integer value ;according to given base (between 2 and 36) ;if enptr if not 0, it is the end of the string ;else the string is null-terminated ; ;EAX strtol: push edi push esi push ebx push ecx mov edi,eax mov esi,edx __ADJUST_CDECL3 4*4,edi,esi,ecx test ecx,ecx jnz .base_ok _mov ecx,10 ;default base to use .base_ok: xor eax,eax xor ebx,ebx .parse1: cmp byte [edi],32 jnz .parse2 inc edi jmps .parse1 .parse2: cmp word[edi],'0x' jnz .next _mov ecx,16 add edi,byte 2 .next: mov bl,[edi] sub bl,'0' jb .done cmp bl,9 jbe .ok sub bl,7 cmp bl,35 jbe .ok sub bl,32 .ok: imul ecx add eax,ebx inc edi cmp edi,esi jnz .next .done: pop ecx pop ebx pop esi pop edi ret ; ;unused functions ; %macro _UNUSED_ 0 ; ;convert 32 bit number to hex string ; ;>EAX ;EAX StrToLong: push ebx push ecx push edi _mov eax,0 _mov ebx,10 _mov ecx,0 .next: mov cl,[edi] sub cl,'0' jb .done cmp cl,9 ja .done mul bx add eax,ecx ; adc edx,0 ;for 64 bit inc edi jmp short .next .done: pop edi pop ecx pop ebx ret strlen2: %if __OPTIMIZE__=__O_SIZE__ push edi mov edi,[esp + 8] mov eax,edi dec edi .l1: inc edi cmp [edi],byte 0 jnz .l1 xchg eax,edi sub eax,edi pop edi %else ; (NK) ; note: below is classic variant of strlen ; if not needed to save ecx register then size of classic code ; will be same as above ; remark: fastcall version of strlen will on 2 bytes less than cdecl push esi push ecx mov esi,[esp + 12] xor eax,eax or ecx,-1 repne scasb not ecx mov eax,ecx dec eax pop ecx pop esi %endif _leave %endmacro ;_UNUSED_ UDATASEG ; ;store them within caller's image ; common errno 4 ;guess what common __cc 4 ;calling convention (how many registers for fastcall) ;0 = cdecl common __envp 4 ;envp, for getenv() END