Download code

Jump to: navigation, search

Back to Word_count_(Assembly_Intel_x86_Linux)

Download for Windows: single file, zip

Download for UNIX: single file, zip, tar.gz, tar.bz2

wc.asm

  1 ; The authors of this work have released all rights to it and placed it
  2 ; in the public domain under the Creative Commons CC0 1.0 waiver
  3 ; (http://creativecommons.org/publicdomain/zero/1.0/).
  4 ; 
  5 ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  6 ; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  7 ; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  8 ; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  9 ; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 10 ; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 11 ; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 12 ; 
 13 ; Retrieved from: http://en.literateprograms.org/Word_count_(Assembly_Intel_x86_Linux)?oldid=10598
 14 
 15 section .data
 16 ; constants for system calls
 17 sys_exit   equ 1
 18 sys_read   equ 3
 19 sys_write  equ 4
 20 sys_open   equ 5
 21 sys_close  equ 6
 22 ; system call interrupt number
 23 sys_call   equ 0x80
 24 ; constants for standard file descriptors
 25 stdin      equ 0
 26 stdout     equ 1
 27 stderr     equ 2
 28 ; constant for open mode
 29 O_RDONLY   equ 0
 30 ; size of scratch space
 31 scratch_size equ 12
 32 ; option flags
 33 opt_c      equ 1
 34 opt_w      equ 2
 35 opt_l      equ 4
 36 ; option positions
 37 line_counter equ 0
 38 word_counter equ 4
 39 char_counter equ 8
 40 buf_size equ 2048    ; 2 KB buffer to speed things up
 41 err_option      db "wc: invalid option: "
 42 err_option_len  equ $-err_option
 43 err_open      db "wc: cannot open file: "
 44 err_open_len  equ $-err_open
 45 newline        db 10         ; '\n'
 46 name_total     db "total", 0 ; zero-terminated string
 47 name_empty     db 0          ; empty zero-terminated string
 48 err_read      db "wc: error reading file: "
 49 err_read_len  equ $-err_read
 50 
 51 section .bss
 52 scratch resb scratch_size ; scratch space, esp. for output functions
 53 exit_code   resq 1 ; stores the exit code
 54 options resb 1 ; option flags
 55 counts  resq 3 ; lines, words, bytes
 56 totals  resq 3 ; total lines, words, bytes
 57 print_total resb 1 ; stores whether a total should be printed
 58 buffer resb buf_size ; file buffer
 59 whitespace  resb 1 ; stores if we are currently parsing whitespace
 60 
 61 section .text
 62         global _start
 63 _start:
 64 ; clear exit code
 65         xor eax, eax
 66         mov [exit_code], eax
 67         pop ebx         ; argc - not needed because argv ends with 0
 68         pop ebx         ; argv[0] - not used
 69         xor dl, dl    ; during the option loop, this holds the option flags
 70 .opt_arg_loop:
 71         pop ebx         ; next argument
 72         test ebx, ebx   ; end of arguments reached?
 73         jz .opt_end
 74         mov al, [ebx]   ; read first character
 75         cmp al, '-'     ; '-' indicated either option or stdin "filename"
 76         jne .opt_end    ; no option -> first filename
 77         mov al, [ebx+1]
 78         test al, al
 79         jnz .eval_opts  ; option characters follow
 80         jmp .opt_end
 81 .eval_opts:
 82 .opt_loop:
 83         inc ebx
 84         mov al, [ebx]
 85         test al, al
 86         jz .opt_arg_loop
 87         cmp al, 'c'
 88         je .set_char_flag
 89         cmp al, 'w'
 90         je .set_word_flag
 91         cmp al, 'l'
 92         je .set_line_flag
 93 ; if we get here, we have an invalid option, thus give an error and exit
 94 ; first save option char + linefeed in scratch
 95         mov [scratch], al
 96         mov al, 10            ; ASCII 10 = '\n'
 97         mov [scratch+1], al
 98 ; now, print the constant part of the error message
 99         mov eax, sys_write
100         mov ebx, stderr
101         mov ecx, err_option
102         mov edx, err_option_len
103         int sys_call
104 ; print the part stored in scratch
105         mov eax, sys_write
106         mov ebx, stderr,
107         mov ecx, scratch
108         mov edx, 2        ; 1 option character and '\n'
109         int sys_call
110 ; return an error code
111         mov ebx, 1
112         mov [exit_code], ebx
113         jmp .end;
114 .set_char_flag:
115         or dl, opt_c
116         jmp .opt_loop
117 .set_word_flag:
118         or dl, opt_w
119         jmp .opt_loop
120 .set_line_flag:
121         or dl, opt_l
122         jmp .opt_loop
123 .opt_end:
124 ; if none of the options are given, we want them all
125         test dl, dl
126         jnz .options_set
127         mov dl, opt_c | opt_w | opt_l
128 .options_set:
129         mov [options], dl
130 ; clear the totals
131         xor eax, eax
132         mov edi, totals
133         cld
134         stosd
135         stosd
136         stosd
137         mov [print_total], al
138         test ebx, ebx  ; if ebx is 0, no file names were given
139         jnz .file_loop
140         jmp .only_stdin
141 .file_loop:
142 ; first, initialize the per-file counters
143         xor eax, eax
144         mov edi, counts
145         cld
146         stosd
147         stosd
148         stosd
149 ; test if we have to read stdin
150         mov al, [ebx]
151         cmp al, '-'
152         jne .real_file
153         mov al, [ebx+1]
154         test al, al
155         jnz .real_file
156 ; count words on stdin
157         mov eax, stdin     ; word_count expects the file descriptor in eax
158         call word_count    ; word_count signals error with carry flag
159         jnc .print_counts  ; if no error occured, print the counts
160         jmp .next_file     ; otherwise just continue with the next file
161 .real_file:
162         ; a real file first has to be opened
163         push ebx           ; must survive the call to sys_open
164         mov eax, sys_open
165         mov ecx, O_RDONLY
166         int sys_call       ; note: mode (in edx) is ignored for O_RDONLY
167         pop ebx            ; restore
168         test eax, eax      ; was the open successful?
169         js .open_failed    ; if not, report an error and skip that file
170         push eax           ; save the file descriptor
171         call word_count    ; the file descriptor already is in eax
172         sbb eax, eax       ; this effectively stores the carry into eax
173         xchg ebx, [esp]    ; restore the file descriptor into ebx
174                            ; while saving the file name on the stack
175         push eax           ; save error state of word count, too
176         mov eax, sys_close ; close the file
177         int sys_call
178         pop eax
179         pop ebx
180         test eax, eax      ; did word_count report an error?
181         jnz .next_file     ; if so, skip to the next file
182 .print_counts:
183         mov esi, counts    ; output expects the counts to output in esi
184         call output
185 ; add the counts to the totals
186         mov esi, counts
187         mov edi, totals
188         cld
189         lodsd
190         add eax, [edi]
191         stosd
192         lodsd
193         add eax, [edi]
194         stosd
195         lodsd
196         add eax, [edi]
197         stosd
198 .next_file:
199         pop ebx            ; get the next file name
200         test ebx, ebx
201         jz .summarize       ; if ebx is 0, we are ready
202         mov al, 1          ; more than 1 file -> print totals
203         mov [print_total], al
204         jmp .file_loop
205 .open_failed:
206         push ebx           ; save file name
207 ; print message start
208         mov eax, sys_write
209         mov ebx, stderr
210         mov ecx, err_open
211         mov edx, err_open_len
212         int sys_call
213 ; print file name
214         pop ecx            ; print_asciiz expects filename in ecx, not ebx
215         mov ebx, stderr
216         call print_asciiz
217 ; print '\n' (filename need not be preserved any more)
218         mov eax, sys_write
219         mov ebx, stderr
220         mov ecx, newline
221         mov edx, 1
222         int sys_call
223 ; set exit code to 1
224         mov eax, 1
225         mov [exit_code], eax
226 ; process next file
227         jmp .next_file
228 .summarize:
229         mov al, [print_total] ; is printing of totals needed?
230         test al, al
231         jz .end               ; if not, we are ready
232         mov ebx, name_total
233         mov esi, totals
234         call output
235         jmp .end
236 .only_stdin:
237 ; special case: no filenames given
238         mov ebx, name_empty   ; the "file name" is just the empty string
239         mov eax, stdin
240         call word_count
241         mov esi, counts
242         call output
243 ; fall-through to end
244 .end:
245         mov eax, sys_exit
246         mov ebx, [exit_code]
247         int sys_call
248 
249 ; procedure word_count
250 ; purpose:
251 ;   count the characters, words and lines in a file.
252 ; input:
253 ;   eax: file descriptor
254 ;   ebx: file name
255 ; output:
256 ;   if an error occured, the carry flag is set
257 ; preserved registers:
258 ;   ebx
259 word_count:
260         mov dl, 1
261         mov [whitespace], dl ; initially we are in whitespace mode
262         push ebx
263         mov ebx, eax
264 .read_loop:
265         push ebx
266         mov eax, sys_read
267         mov ecx, buffer
268         mov edx, buf_size
269         int sys_call
270         pop ebx
271         test ax, ax
272         js .error    ; if negative, an error occured
273         jz .end      ; if zero, we've hit EOF
274         add [counts + char_counter], eax ; update char count
275         mov ecx, eax
276         mov esi, buffer
277         cld
278 ; during the evaluation loop, dl holds if we are in whitespace mode
279         mov dl, [whitespace]
280 .eval_loop:
281         lodsb
282         cmp al, ' '
283         jz .whitespace
284         cmp al, 9    ; '\t'
285         jz .whitespace
286         cmp al, 10   ; '\r'
287         jz .newline
288 ; if we got here, we have a non-whitespace character
289         test dl, dl
290         jz .nowhite
291         inc dword [counts + word_counter]
292 .nowhite:
293         mov dl, 0
294         jmp .next
295 .newline:
296 ; Increment line counter, then proceed just as with any other whitespace
297         inc dword [counts + line_counter]
298 .whitespace:
299         mov dl, 1      ; we are now in whitespace mode
300 ; fall-through to .next
301 .next:
302         dec ecx        ; decrement the character counter
303         jnz .eval_loop ; if there are unprocessed characters, continue
304         mov [whitespace], dl ; store current whitespace mode
305         jmp .read_loop
306 .end:
307         pop ebx        ; restore file name
308         clc            ; to indicate no error, clear the carry flag.
309         ret
310 .error:
311 ; print error message
312         mov eax, sys_write
313         mov ebx, stderr
314         mov ecx, err_read
315         mov edx, err_read_len
316         int sys_call          ; print static part of the error message
317         mov ebx, stderr
318         mov ecx, [esp]        ; get the file name
319         call print_asciiz     ; and print it
320         mov eax, sys_write
321         mov ebx, stderr
322         mov ecx, newline
323         mov edx, 1
324         int sys_call          ; print newline
325 ; set exit code to 1
326         mov eax, 1
327         mov [exit_code], eax
328 ; restore file name
329         pop ebx
330 ; set carry to indicate error
331         stc
332         ret
333 ; procedure output
334 ; purpose:
335 ;   output file statistics
336 ; input:
337 ;   ebx: pointer to filename (ASCIIZ)
338 ;   esi: pointer to table of counters to output
339 ; preserved registers:
340 ;   none
341 ; uses scratch space
342 output:
343         push ebx
344         mov dl, opt_l
345         call .output_value
346         mov dl, opt_w
347         call .output_value
348         mov dl, opt_c
349         call .output_value
350         pop ecx
351         mov ebx, stdout
352         call print_asciiz
353         mov eax, sys_write
354         mov ebx, stdout
355         mov ecx, newline
356         mov edx, 1
357         int sys_call
358         ret
359 .output_value:
360         cld
361         lodsd
362         test [options], dl
363         jz .skip
364 ; fill scratch space with spaces
365         mov ebx, eax
366         mov edi, scratch
367         mov ecx, scratch_size
368         mov al, ' '
369         rep stosb
370         mov eax, ebx
371 ; convert value into decimal
372         dec edi       ; for the final space
373 .convert_loop:
374         dec edi
375         mov ecx, 10
376         mov edx, 0    ; div actually divides the 64-bit value in edx:eax by its operand
377         div ecx
378         add dl, '0'
379         mov [edi], dl
380         test eax, eax
381         jnz .convert_loop
382 ; output decimal value
383         mov eax, sys_write
384         mov ebx, stdout
385         mov ecx, scratch
386         mov edx, scratch_size
387         int sys_call
388 .skip:
389         ret
390 ; procedure print_asciiz
391 ; purpose:
392 ;   output an ASCIIZ string
393 ; input:
394 ;   ebx: file descriptor
395 ;   ecx: pointer to asciiz string
396 ; preserved registers:
397 ;   none
398 print_asciiz:
399         mov esi, ecx
400         cld
401 ; search final '\0'
402 .search_loop:
403         lodsb
404         test al, al
405         jnz .search_loop
406 ; now esi points to the byte after the '\0'
407 ; the string length is that address minus the start address minus one
408         mov edx, esi
409         dec edx
410         sub edx, ecx
411 ; now we are ready to make the syscall and return
412         mov eax, sys_write
413         int sys_call
414         ret
415 


hijacker
hijacker
hijacker
hijacker