AFFECTIVESILICON

x86-64 Assembly Language Projects

Revision 1.0 2024-01-31

These little projects were written while getting to grips with x86-64 assembly language on Linux. They are not really intended to be production programs, but just little challenges. They were compiled with nasm on a recent Linux kernel. The -g nasm flag was used so that the program can be easily inspected in gdb.

Exiting a program

In assembly, a program doesn't just stop running, it keeps going until it segfaults. To stop a program the kernel has to be asked to do it using the appropriate syscall. A table of syscalls is available here.


; nasm -g -f elf64 exit.asm
; ld -o exit exit.o

section .text

    global _start

_start:

    mov rax, 0x3c   ; syscall 60 is exit
    mov rdi, 0      ; this is the exit status
    syscall         ; call the kernel to execute
				
				

Hello World

The classic "Hello, World" program. An opportunity to define some data and write to STDOUT using the appropriate kernel syscall.


; nasm -g -f elf64 hello_world.asm
; ld -o hello_world hello_world.o

section .data

    hello:          db      'Hello, world!', 0x0a           ; 0x0a is newline char
    helloLen:       equ     $-hello

section .text

    global _start

_start:

    mov rax, 1          ; write
    mov rdi, 1          ; to stdout
    mov rsi, hello      ; pointer to string
    mov rdx, helloLen   ; number of characters to write
    syscall             ; execute

    mov rax, 0x3c       ; exit as before
    mov rdi, 0
    syscall
				
				

Showing Hello World a specified number of times

In Linux, the kernel puts pointers to parameters passed to a program onto the stack. Here a parameter for the number of times "Hello, World!" should be printed on the screen is required. This would be straightforward except there is an immediate problem: the parameter appears as a text string and has to be converted to a number for the loop. A program usage description also has to be shown so the user knows what to do.


; nasm -g -f elf64 hello_world_param.asm
; ld -o hello_world_param hello_world_param.o
; e.g. "./hello_world_param 15"

section .data

    hello:          db      'Hello, world!', 0x0a   
    helloLen:       equ     $-hello

    usage:          db      'Enter how many times to say "Hello, world!"', 0x0a
    usageLen:       equ     $-usage

section .text

    global _start

_start:

    pop rax                     ; number of params passed to program (plus the program name itself)
    cmp rax, 2                  ; therefore expecting 2 params: the program name and the number of times to display "Hello, world!"
    jne show_usage              ; show the usage if we don't get the correct number of parameters

    pop rsi                     ; pointer to program name, not interested in this
    pop rsi                     ; pointer to first param: should be the number we want

    ; already we have the issue of converting a text string to number
    ; accumulate number in rcx
    mov rcx, 0

    ; we're going to make some serious assumptions about the nature/format of the text string number passed in

atoi_loop:

    movzx rbx, byte [rsi]       ; get the char pointed to by rsi
    cmp rbx, 0x30               ; Check if char is below '0' (ASCII)
    jl show_usage
    cmp rbx, 0x39               ; Check if char is above '9'
    jg show_usage
    sub rbx, 0x30               ; adjust to actual number by subtracting ASCII offset to 0
    add rcx, rbx                ; accumulate number in rcx, a register often used for counting
    
    movzx rbx, byte [rsi+1]     ; check the next char to see if the string continues
    cmp rbx, 0                  ; parameter string should be null-terminated
    je get_on_with_hellos       ; if it's null we're done converting
    
    imul rcx, 10                ; multiply rcx by ten
    inc rsi                     ; increment pointer to get next char when we loop
    jmp atoi_loop               ; keep going

get_on_with_hellos:

    cmp rcx, 0
    je exit                 ; if rcx is zero just exit without showing anything. This is the logical output for a parameter of 0 anyway

hello_loop:

    mov rax, 1              ; 'write'
    mov rdi, 1              ; to STDOUT
    mov rsi, hello          ; pointer to char buffer
    mov rdx, helloLen       ; length of string to write
    push rcx                ; preserve counter as syscall seems to bork it
    syscall                     
    pop rcx                 ; restore counter
    dec rcx                 ; decrement
    jnz hello_loop          ; and loop until zero
    jmp exit                ; skip displaying the usage

show_usage:

    mov rax, 1              ; write to STDOUT as above
    mov rdi, 1
    mov rsi, usage          ; but display the usage, such as it is
    mov rdx, usageLen   
    syscall

exit:
    mov rax, 0x3c           ; exit the program 
    mov rdi, 0
    syscall
				
				

Showing Hello World a random number of times

In Linux it is possible to get random numbers from the /dev/random and /dev/urandom files. This creates an opportunity to try opening a file and reading from it. This program does that to display "Hello, World" a random number of times. Opening and reading from files demands extra work, checking for returned error codes.


; hello_world_rand
; display a random number of hello worlds (up to 255)
; nasm -g -f elf64 hello_world_rand.asm
; ld -o hello_world_rand hello_world_rand.o
; e.g. "./hello_world_rand"

section .data

    hello:          db      'Hello, world!', 0x0a   
    helloLen:       equ     $-hello

    errMsg:         db      'Could not open file', 0x0a
    errMsgLen:      equ     $-errMsg

    ; this little program doesn't need to draw from /dev/random
    randSrc:        db      '/dev/urandom', 0x0
    randNum:        db      0

section .text

    global _start

_start:

    ; open the source of 'randomness'
    mov rax, 2              ; 'open'
    mov rdi, randSrc        ; pointer to filename
    mov rsi, 0              ; flags: 0 is O_RDONLY on my system
    mov rdx, 0              
    syscall
    
    cmp rax, -2             ; file not found           
    je open_error
    cmp rax, -13            ; permission denied
    je open_error

    mov rbx, rax            ; save the file descriptor

    ; read a byte
    mov rax, 0              ; 'read'
    mov rdi, rbx            ; file descriptor
    mov rsi, randNum        ; memory location to read to
    mov rdx, 1              ; read 1 byte
    push rbx                ; preserve file descriptor in rbx
    syscall                 ; execute
    pop rbx

    ; close it
    mov rax, 3              ; 'close'
    mov rdi, rbx            ; file descriptor
    syscall

    ; put the random number into the loop counter
    movzx rcx, byte [randNum]   
    cmp rcx, 0
    je exit                 ; if rcx is zero just exit without showing anything.

hello_loop:

    mov rax, 1              ; 'write'
    mov rdi, 1              ; to STDOUT
    mov rsi, hello          ; pointer to char buffer
    mov rdx, helloLen       ; length of string to write
    push rcx                ; preserve counter as syscall seems to bork it
    syscall                     
    pop rcx                 ; restore counter
    dec rcx                 ; and loop
    jnz hello_loop
    jmp exit

open_error:

    ; display a simple message if could not open

    mov rax, 1
    mov rdi, 1
    mov rsi, errMsg
    mov rdx, errMsgLen
    syscall

exit:

    mov rax, 0x3c           ; exit the program 
    mov rdi, 0
    syscall
				
				

XOR Cipher for a stream of data

Looking further at reading and writing data with files, it would be interesting to write a tool that did something with data coming from STDIN, and then passed it on to STDOUT. The code below performs an exclusive-OR of the input from STDIN with a cipher key and writes it straight to STDOUT, this function chosen more for its simplicity than anything else, as it was just an exercise. XOR is a symmetric operation, so the encoded data produced can be passed back through the program to generate the original input. This shouldn't be used for any kind of real enciphering as the key is too small; however it could be lengthened, as could the input and output buffers, and the inner loop could be optimised to work on e.g. quadwords where available.


; xor_cipher.asm
; nasm -g -f elf64 xor_cipher.asm
; ld -o xor_cipher xor_cipher.o
; cat some_text | ./xor_cipher > some_text.enc
; cat some_text.enc | ./xor_cipher > some_text.unenc

section .data

    ; apparently trailing colons optional in nasm labels
    key     dq      0xcafefeeddeadbeef      ; some key to xor with
    input   dq      0x0                     ; space for read buffer
    output  dq      0x0                     ; space for writeout buffer
    buf_len equ     8                       ; buffer length

section .text

    global _start

_start:

    mov rax, 0              ; 'read'
    mov rdi, 0              ; from stdin
    mov rsi, input          ; into input buffer 
    mov rdx, buf_len        ; buf_len bytes 
    syscall
    ; rax has the number of bytes actually read
    cmp rax, 0              ; got nothing?
    jz exit                 ; if so, exit

    ; xor the input buffer with the key and put in the output buffer

    mov rdx, rax            ; save the number of bytes read
    mov rcx, 0              ; loop counter
    
    ; process xor byte-by-byte in case we received fewer bytes than buffer permits
    ; rcx is both the loop counter and index into the buffers

xor_loop:

    mov al, [input+rcx]     ; char from input buffer
    xor al, [key+rcx]       ; xor with corresponding part of key
    mov [output+rcx], al    ; put result into output buffer

    inc rcx                 ; handle the loop
    cmp rcx, rdx
    jle xor_loop       

    ; done

    mov rax, 1              ; 'write'
    mov rdi, 1              ; to stdout
    mov rsi, output         ; from the output buffer
                            ; rdx already set with number of chars from rax returned from read above
    syscall

    jmp _start              ; loop to get more input from stdin

exit:

    mov rax, 0x3c           ; exit the program 
    mov rdi, 0
    syscall
				
				

Read Bitmap File Header

Bitmap (.bmp) files are a very well documented image file format. This program reads and displays some information from a bitmap file header. This time code for converting from a number to a string is required. Also, more error checking is done around opening and reading from the file.


; nasm -g -f elf64 bmp_read.asm
; ld -o bmp_read bmp_read.o
; e.g. "./bmp_read ./image.bmp"

section .data

	; error messages

	usage:          db      'Enter bitmap file name.', 0x0a
	usageLen:       equ     $-usage

	errorFNF:       db      'File not found.', 0x0a
	errorFNFLen:    equ     $-errorFNF

	errorDenied:    db      'Permission denied.', 0x0a
	errorDeniedLen: equ     $-errorDenied

	errorRead:      db      'Error reading file.', 0x0a
	errorReadLen:   equ     $-errorRead

	errorFormat:    db      "Doesn't look like a BMP file.", 0x0a
	errorFormatLen: equ     $-errorFormat

	; output buffers. Numbers from the BMP header will be converted to text string
	; and inserted in here for display.

	outWidth:	    db 'The image width (pixels) is: '
	outWidth_len:	equ $-outWidth
	outWidth_s:	    times 10 db 0               ; extra space at the end for the number string

	outHeight:	    db 'The image height (pixels) is: '
	outHeight_len:	equ $-outHeight
	outHeight_s:	times 10 db 0

	outbpp:		    db 'The bits per pixel is: '
	outbpp_len:	    equ $-outbpp
	outbpp_s:	    times 10 db 0

	outXRes:	    db 'The X resolution is: '
	outXRes_len:	equ $-outXRes
	outXRes_s:	    times 10 db 0

	outYRes:	    db 'The Y resolution is: '
	outYRes_len:	equ $-outYRes
	outYRes_s:	    times 10 db 0

	; a temp buffer for conversion between number and its string representation

	scratch:	    times 10 db 0	; should be plenty of room (!)
	scratchLen:	    equ $-scratch
	scratchend:	    db 0

section .bss

	; bitmap file header

	BMP_ident:          resb        2       ; 'BM'
	BMP_file_size:      resb        4       ; size of file
	BMP_res1:           resb        2       ; 'reserved'
	BMP_res2:           resb        2       ; 'reserved'    
	BMP_img_offset:     resb        4       ; offset to image data
	BMP_header_size:    resb        4       ; header size
	BMP_width:          resb        4       ; image width
	BMP_height:         resb        4       ; image height
	BMP_planes:         resb        2       ; number of planes
	BMP_bpp:            resb        2       ; bits per pixel
	BMP_compression:    resb        4       ; compression
	BMP_img_size:       resb        4       ; image size
	BMP_x_res:          resb        4       ; x resolution
	BMP_y_res:          resb        4       ; y resolution
	BMP_num_cols:       resb        4       ; number of colours
	BMP_imp_cols:       resb        4       ; important colours?

section .text

global _start

_start:

	pop rax                     ; number of params passed to program (plus the program name itself)
	cmp rax, 2                  ; therefore expecting 2 params: the program name and the filename of the bitmap file to read
	jne show_usage              ; error: user not using program correctly

	pop rdi                     ; skip past program name
	pop rdi                     ; should be pointer to supplied bitmap filename

	; open the specified file
	mov rax, 2                  ; 'open': rdi is already set with pointer to filename
	mov rsi, 0                  ; flags: O_RDONLY
	mov rdx, 0              
	syscall

	; check for open errors
	; on my system:-
	; -2 is file not found
	; -13 is permission denied

	cmp rax, -2             
	je fnf_error                ; error: file not found

	cmp rax, -13
	je denied_error             ; error: permission denied
    
proceed:

	mov r8, rax                 ; preserve file descriptor

	; read header data from the file

	mov rax, 0              ; 'read'
	mov rdi, r8             ; retrieve file descriptor
	mov rsi, BMP_ident      ; memory location to read to
	mov rdx, 54             ; read entire bitmap header, 54 bytes
	syscall

	cmp rax, 54             ; check all 54 bytes have been read
	jne read_error          ; error: for some reason could not read the required number of bytes

	; close the file
	mov rax, 3                  ; 'close'
	mov rdi, r8                 ; restore file descriptor
	syscall

inspect_header:

	; check for magic identifier
	mov ax, [BMP_ident]
	cmp ax, 'BM'
	jne format_error	; error: not a BMP file

	; if we've got this far, let's just say it's ok

	; display width of image
	mov eax, [BMP_width]	; mov'ing 32 bits to eax should 0 upper half of rax as well
	mov r9, outWidth	    ; setup buffers
	mov r10, outWidth_len
	mov r11, outWidth_s
	call display_line

	; display height of image
	mov eax, [BMP_height]
	mov r9, outHeight
	mov r10, outHeight_len
	mov r11, outHeight_s
	call display_line
    
	; display bits per pixel
	movzx rax, word [BMP_bpp]
	mov r9, outbpp
	mov r10, outbpp_len
	mov r11, outbpp_s
	call display_line

	; display X resolution of image
	mov eax, [BMP_x_res]
	mov r9, outXRes
	mov r10, outXRes_len
	mov r11, outXRes_s
	call display_line

	; display Y resolution of image
	mov eax, [BMP_y_res]
	mov r9, outYRes
	mov r10, outYRes_len
	mov r11, outYRes_s
	call display_line
 
 	; call it a day
exit:

	mov rax, 0x3c
	mov rdi, 0
	syscall

;;;;;;;;;;;;;;;;;;;;;;;;
;;; error messages ;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;

show_usage:

	mov rsi, usage          
	mov rdx, usageLen   
	jmp write_and_exit

fnf_error:

	mov rsi, errorFNF
	mov rdx, errorFNFLen
	jmp write_and_exit

denied_error:

	mov rsi, errorDenied
	mov rdx, errorDeniedLen
	jmp write_and_exit

read_error:

	mov rsi, errorRead
	mov rdx, errorReadLen
	jmp write_and_exit

format_error:

	mov rsi, errorFormat
	mov rdx, errorFormatLen   
	jmp write_and_exit

write_and_exit:
	; assume rsi and rdx set correctly
	mov rax, 1
	mov rdi, 1
	syscall
	jmp exit

; routine to display a line of info about the bitmap
display_line:
	; 1. Convert number to string in scratch buffer
	mov r8, 10		    ; we divide repeatedly by 10 to convert number to string
	mov rdi, scratchend	; start from the end of the scratch buffer and work back
	mov rcx, 0		    ; this will contain the final number of chars
itoa_inner:
	dec rdi			    ; going backwards in memory
	mov rdx, 0		    ; set up the division: rax already set coming into procedure
	div r8			    ; divide by ten
	add rdx, 0x30	    ; offset the remainder of the division to get the required ascii char
	mov [rdi], dl		; write the ascii char to the scratch buffer
	inc rcx			    ; keep track of the number of chars produced
	cmp rcx, scratchLen	; try not to overfeed the buffer
	je itoa_done		; break out if we reach the end of the buffer 
	cmp rax, 0		    ; otherwise keep dividing until nothing left 
	jne itoa_inner
itoa_done:
	; 2. Copy contents of scratch buffer into correct place in output string
	; rdi now points to beginning of char string and rcx is the number of chars
	; copy number into display buffer
	mov rsi, rdi
	mov rdi, r11            ; r11 is set coming into procedure, points to where in memory the number string should go
	; rcx already set from above
	mov r8, rcx;		    ; preserve number of chars in number string 
	rep movsb		        ; copy the number string to the output buffer
	mov byte [rdi], 0x0a	; and put a newline on the end of it
show_num:
	; 3. Write the complete final string to stdout
	mov rsi, r9		    ; pointer to final char buffer, r9 is set coming into procedure
	; calculate number of chars to display
	mov rdx, r10 		; length of the preamble, r10 set coming into procedure
	add rdx, r8		    ; plus length of the number string we just made
	inc rdx			    ; plus one for newline char
	mov rax, 1		    ; write
	mov rdi, 1		    ; to stdout
	syscall             ; execute
	ret                 ; done
				
				

Number Guessing Game

Using the techniques developed in the previous programs, a simple game can be created. This program pulls a random number (0-255) and asks the player to guess it in a certain number of tries, responding "higher" or "lower" to help the player. When run in a terminal, the kernel seems to connect STDIN to the keyboard, allowing player input. Input errors are not handled particularly well, the game simply quits.


; guess a number game
; nasm -g -f elf64 guess_number.asm
; ld -o guess_number guess_number.o
; e.g. "./guess_number" or "./guess_number 20" to specify the number of tries

section .data

	greeting	    db 'Hello! I have thought of a number between 0 and 255. Try to guess it. You have this number of attempts: '
	greeting_len	equ $-greeting
	greeting_s	    times 10 db 0

	equal_str	    db 'Correct! You guessed the number.', 0x0a
	equal_str_len	equ $-equal_str

	sorry		    db "Sorry, you didn't guess the number within the number of goes.", 0x0a, "The number was: "
	sorry_len	    equ $-sorry
	sorry_s		    times 4 db 0

	higher_str	    db 'Higher!', 0x0a
	higher_str_len	equ $-higher_str

	lower_str	    db 'Lower!', 0x0a
	lower_str_len	equ $-lower_str

	input_buffer 	times 10 db 0
	buf_len		    equ $-input_buffer
	null_char	    db 0

	scratch		    times 10 db 0 
	scratchLen	    equ $-scratch
	scratchend	    db 0

    randSrc         db      '/dev/urandom', 0x0
	num_to_guess	db 0    ; will be filled in from /dev/urandom
	num_of_goes	    db 10	; default the number of goes to 10

section .text

	global _start

_start:

	; check if the player has passed the number of goes they want as a parameter.
	pop rax
	cmp rax, 2
	jne begin 	; if not use the default and start the game

	pop rsi                     
	pop rsi                     	

	call string_to_num

	mov [num_of_goes], cl 

begin:

	; come up with number for the player to guess
    ; read a byte from /dev/urandom

	mov rax, 2              ; 'open'
	mov rdi, randSrc        ; pointer to filename
	mov rsi, 0              ; O_RDONLY
	mov rdx, 0              
	syscall
    
	cmp rax, -2             ; file not found           
	je exit 
	cmp rax, -13            ; permission denied
	je exit

	mov rbx, rax            ; save the file descriptor

	; read a byte
	mov rax, 0              ; 'read'
	mov rdi, rbx            ; file descriptor
	mov rsi, num_to_guess   ; memory location to read to
	mov rdx, 1              ; read 1 byte
	push rbx                ; just in case
	syscall
	pop rbx

	; close it
	mov rax, 3              ; 'close'
	mov rdi, rbx            ; file descriptor
	syscall
	
	; greet the player and tell them how many goes they have
	movzx rax, byte [num_of_goes]		 
	mov r9, greeting	
	mov r10, greeting_len
	mov r11, greeting_s
	call display_line

player_input:	; mainloop

	; player enters a number
	call read_string

	; convert it from a string to a number in rcx
	mov rsi, input_buffer 
	call string_to_num

	; compare the guess to the chosen number 
	movzx rbx, byte [num_to_guess]
	cmp rcx, rbx
	je got_it		    ; if it's equal player has guessed correctly
	jl higher		    ; tell them to guess a higher number
	jg lower		    ; tell them to guess a lower number

loop_around:

	sub byte [num_of_goes], 1
	jnz player_input

no_more_goes:

	; player has not guessed the number. Tell them what it is and exit
	movzx rax, byte [num_to_guess]		 
	mov r9, sorry		
	mov r10, sorry_len
	mov r11, sorry_s
	call display_line

exit:

	mov rax, 0x3c           ; exit the program 
	mov rdi, 0
	syscall

;;;;;;;;
	

got_it:

	mov rsi, equal_str
	mov rdx, equal_str_len
	call write_out
	jmp exit

higher:
	mov rsi, higher_str
	mov rdx, higher_str_len
	call write_out
	jmp loop_around

lower:
	mov rsi, lower_str
	mov rdx, lower_str_len
	call write_out
	jmp loop_around

write_out:
	
	mov rax, 1
	mov rdi, 1
	syscall
	ret

;;;;;;;;

read_string:
	; player is going to enter something in the terminal
	mov rcx, 0		; count number of chars entered
get_char:
	; read a char into the buffer
	mov rax, 0		; read
	mov rdi, 0		; from stdin
	mov rdx, 1		; 1 char
	mov rsi, input_buffer	; calculate the current offset into input buffer
	add rsi, rcx		; fill it up one char at a time until newline entered
	push rsi		; preserve the pointer
	push rcx		; and the counter
	syscall
	pop rcx			; restore
	pop rsi
	cmp rax, 0		; check for nothing read (?)
	je exit;		; for now just quit
	inc rcx			; increment counter
	movzx rax, byte [rsi]		; check for newline entered
	cmp rax, 0x0a
	je done_read		; break out of loop when user hits return 
	cmp rcx, buf_len
	jge exit;		    ; let's not read beyond the end of the buffer
	jmp get_char		; continue
done_read:
	mov byte [rsi], 0
	ret

;;;;;;;;

string_to_num:
	mov rcx, 0			        ; rcx will be the final number
atoi_loop:
	movzx rbx, byte [rsi]       ; get the char pointed to by rsi
	cmp rbx, 0x30               ; Check if char is below '0' 
	jl exit
	cmp rbx, 0x39               ; Check if char is above '9'
	jg exit
	sub rbx, 0x30               ; adjust to actual number by subtracting ASCII offset to 0
	add rcx, rbx                ; accumulate number in rcx
	movzx rbx, byte [rsi+1]     ; check the next char to see if the string continues
	cmp rbx, 0                  ; string should be null-terminated
	je done_string			    ; if it's null we're done converting
	imul rcx, 10                ; multiply rcx by ten
	inc rsi                     ; increment pointer to get next char when we loop
	jmp atoi_loop
done_string:
	; rcx is the number
	ret

;;;;;;;;

display_line:
	; 1. Convert number to string in scratch buffer
	mov r8, 10		    ; we divide repeatedly by 10 to convert number to string
	mov rdi, scratchend	; start from the end of the scratch buffer and work back
	mov rcx, 0		    ; this will contain the final number of chars
itoa_inner:
	dec rdi			    ; going backwards in memory
	mov rdx, 0		    ; set up the division: rax already set coming into procedure
	div r8			    ; divide by ten
	add rdx, 0x30	    ; offset the remainder of the division to get the required ascii char
	mov [rdi], dl		; write the ascii char to the scratch buffer
	inc rcx			    ; keep track of the number of chars produced
	cmp rcx, scratchLen	; try not to overfeed the buffer
	je itoa_done		; break out if we reach the end of the buffer 
	cmp rax, 0		    ; otherwise keep dividing until nothing left 
	jne itoa_inner
itoa_done:
	; 2. Copy contents of scratch buffer into correct place in output string
	; rdi now points to beginning of char string and rcx is the number of chars
	; copy number into display buffer
	mov rsi, rdi
	mov rdi, r11            ; r11 is set coming into procedure, points to where in memory the number string should go
	; rcx already set from above
	mov r8, rcx;		    ; preserve number of chars in number string 
	rep movsb		        ; copy the number string to the output buffer
	mov byte [rdi], 0x0a	; and put a newline on the end of it
show_num:
	; 3. Write the complete final string to stdout
	mov rsi, r9		    ; pointer to final char buffer, r9 is set coming into procedure
	; calculate number of chars to display
	mov rdx, r10 		; length of the preamble, r10 set coming into procedure
	add rdx, r8		    ; plus length of the number string we just made
	inc rdx			    ; plus one for newline char
	mov rax, 1		    ; write
	mov rdi, 1		    ; to stdout
	syscall             ; execute
	ret                 ; done

				
				

Write Bitmap File

This little program creates a bitmap file of the specified height and width, filling it with the specified colour. It makes use of the mmap syscall to allocate some memory. This goes straight in with 32 bits per pixel uncompressed, so if large heights and widths are passed to it, a large file will be created.


; create a bitmap image of the specified size and colour
; nasm -g -f elf64 bmp_write.asm
; ld -o bmp_write bmp_write.o
; e.g. "./bmp_write 32 24 00ff9933 ./test.bmp"

section .data

	; bitmap file header
	; Various parts of this will be updated in the code 
	BMP_ident:		    db		'BM'
	BMP_file_size:		dd		0		
	BMP_res1:		    dw		0
	BMP_res2:		    dw		0
	BMP_img_offset:		dd		0
	BMP_initial_len:	equ		$-BMP_ident
	BMP_header_size:	dd		0		
	BMP_width:		    dd		0
	BMP_height:		    dd		0
	BMP_planes:		    dw		1
	BMP_bpp:		    dw		32
	BMP_compression:	dd		0
	BMP_img_size:		dd		0 
	BMP_x_res:		    dd		2835
	BMP_y_res:		    dd		2835
	BMP_num_cols:		dd		0
	BMP_imp_cols:		dd		0
	BMP_header_len:		equ		$-BMP_ident

	; show some usage if incorrect number of parameters
	usage_str:		    db		'Please pass width, height, colour (ARGB) and output file name as parameters.', 0x0a, 'E.g. "./bmp_write 1024 768 00ff0066 ./image.bmp"', 0x0a
	usage_len:		    equ		$-usage_str

	; various error messages
	number_error_str:	db		'Error in height/width parameters.', 0x0a
	number_error_len:	equ		$-number_error_str

	colour_error_str:	db		'Error in colour parameter (must be hex string).', 0x0a
	colour_error_len:	equ		$-colour_error_str

	map_f_str:		    db		'mmap failed.', 0x0a
	map_f_len:		    equ		$-map_f_str

	file_error_str:		db		'Error writing to output file.', 0x0a
	file_error_len:		equ		$-file_error_str

section .bss

	; some non-header data associated with the bitmap
	BMP_pixel_size:		resd		1	; size of the bitmap in pixels
	BMP_data_ptr:		resq		1 	; pointer to memory from mmap, for image data
	pixel_colour:		resd		1	; ARGB in four bytes

	; pointers to params passed into program
	width_str_ptr:		resq		1		
	height_str_ptr:		resq		1	
	colour_str_ptr:		resq		1
	out_file_ptr:		resq		1

section .text

global _start

_start:

	pop rax			    ; process input parameters
	cmp rax, 5 
	jne usage

	pop rsi				; skip past program name

	; get pointers to the strings entered for width, height, colour
	pop rsi				; width
	mov [width_str_ptr], rsi
	pop rsi				; height
	mov [height_str_ptr], rsi
	pop rsi				; colour
	mov [colour_str_ptr], rsi
	pop rsi				; output file name
	mov [out_file_ptr], rsi

	; convert width string from parameters into a number
	mov rsi, [width_str_ptr]
	call string_to_num
	mov [BMP_width], ecx
	
	; convert height string from parameters into a number
	mov rsi, [height_str_ptr]
	call string_to_num
	mov [BMP_height], ecx

	; convert colour string into a 4-byte value 
	mov rsi, [colour_str_ptr]
	call hex_string_to_num
	mov [pixel_colour], ecx

	; perform some calculations for the bitmap header and image size
	mov dword [BMP_img_offset], BMP_header_len 	; image data is right after header
	mov dword [BMP_header_size], BMP_header_len-BMP_initial_len
	mov eax, [BMP_width]
	mov ecx, [BMP_height]
	mul ecx 			        ; image size in pixels is width*height
	mov [BMP_pixel_size], eax	
	sal eax, 2			        ; image size in bytes is image size * 4 (32 bpp)
	mov [BMP_img_size], eax
	add eax, BMP_header_len	    ; file size is image size in bytes plus header size
	mov [BMP_file_size], eax	

	; setup for mmap. Use this syscall to allocate some memory to write the
    ; pixel data to before copying that out to the destination file
	mov rax, 9			        ; mmap is syscall 9
	mov rdi, 0 			        ; let the kernel choose where the memory starts
	mov esi, [BMP_img_size] 	; the size in bytes that we want
	mov rdx, 3 			        ; memory protection: PROT_READ | PROT_WRITE
	mov r10, 34 			    ; flags: MAP_PRIVATE | MAP_ANONYMOUS
	mov r8, -1 			        ; no file descriptor
	mov r9, 0 			        ; no offset
	syscall	

	cmp rax, -1			        ; if returns -1 then map failed. Otherwise contains the starting address of the memory we wanted
	je map_failed
	mov [BMP_data_ptr], rax		; save address

	; because we are filling with one colour, we can use rep stos
	mov rdi, rax
	mov eax, [pixel_colour] 	; 1 pixel: argb
	mov ecx, [BMP_pixel_size] 
	rep stosd

	; open the specified file for writing. 
	; Create and truncate the file if necessary and apply some basic permissions
	mov rax, 2
	mov rdi, [out_file_ptr]
	mov rsi, 0o1101 		    ; flags: O_TRUNC | O_CREAT | O_WRONLY
	mov rdx, 0o0664			    ; permissions: -rw-rw-r--
	syscall

	cmp rax, -13
	je exit

	mov r8, rax			        ; file descriptor

	; write out the file
	; header first
	mov rsi, BMP_ident
	mov rdx, BMP_header_len 
	mov rax, 1
	mov rdi, r8 
	syscall

	; check to make sure the entire header was written
	cmp rax, BMP_header_len
	jne file_write_error

	; write out the image data image data
	mov rsi, [BMP_data_ptr]
	mov edx, [BMP_img_size]
	mov rax, 1
	mov rdi, r8
	syscall

	; check to make sure all the image data was written
	cmp eax, [BMP_img_size]
	jne file_write_error

	; close the file
	mov rax, 3
	mov rdi, r8
	syscall

    ; done
exit:

	mov rax, 0x3c
	mov rdi, 0
	syscall

;;;;;;;;;

usage:
	mov rsi, usage_str
	mov rdx, usage_len
	call write_out
	jmp exit

map_failed:
	mov rsi, map_f_str
	mov rdx, map_f_len
	call write_out
	jmp exit

file_write_error:
	mov rsi, file_error_str
	mov rdx, file_error_len
	call write_out
	jmp exit

number_error:
	mov rsi, number_error_str
	mov rdx, number_error_len
	call write_out
	jmp exit

colour_error:
	mov rsi, colour_error_str
	mov rdx, colour_error_len
	call write_out
	jmp exit

write_out:
	mov rax, 1
	mov rdi, 1
	syscall
	ret

;;;;;;;;

string_to_num:
	mov rcx, 0			        ; rcx will be the final number
atoi_loop:
	movzx rbx, byte [rsi]       ; get the char pointed to by rsi
	cmp rbx, 0x30               ; Check if char is below '0' 
	jl number_error
	cmp rbx, 0x39               ; Check if char is above '9'
	jg number_error
	sub rbx, 0x30               ; adjust to actual number by subtracting ASCII offset to 0
	add rcx, rbx                ; accumulate number in rcx
	movzx rbx, byte [rsi+1]     ; check the next char to see if the string continues
	cmp rbx, 0                  ; string should be null-terminated
	je done_string			    ; if it's null we're done converting
	imul rcx, 10                ; multiply rcx by ten
	inc rsi                     ; increment pointer to get next char when we loop
	jmp atoi_loop
done_string:
	; rcx is the number
	ret

hex_string_to_num:
	mov rcx, 0
hex_loop:
	movzx rax, byte [rsi]
	cmp rax, 0x30
	jl colour_error
	cmp rax, 0x39
	jg try_a_to_f
	sub rax, 0x30
	jmp add_nibble
try_a_to_f:
	and ax, 0b11011111 		    ; convert lowercase to upper through bitmask
	cmp rax, 0x41			    ; 'A'
	jl colour_error
	cmp rax, 0x46			    ; 'F'
	jg colour_error
	sub rax, 0x37			    ; offset to 0x0a
add_nibble:
	add rcx, rax 
 	movzx rax, byte [rsi+1]		; check for end of string
	cmp rax, 0
	je done_hex
	sal rcx, 4			        ; multiply by 16, because hex 
	inc rsi
	jmp hex_loop
done_hex:
	; rcx is the number
	ret

				
				

Conway's Game of Life

The Game of Life has simple rules, a binary cell state, and works on a grid, so is therefore ideal for an assembly language project. There are two main difficulties — loading a pattern file, and visualising the output. In the implementation below the previous bitmap writing code is used to save the board state at each tick. Although the cell state is binary, a byte per cell is used for convenience, and this is carried through to the bitmap output which is 8 bpp.

Loading a pattern file is even more involved. What would take just a couple of lines in a higher-level language becomes its own project in assembly. The implementation here reads .cells files, a repository of which can be found here.

Some care should be taken if run with a large pattern file for lots of ticks, as the program will instantly fill up a hard drive with lots of uncompressed bitmap files.


; Conway's Game of Life
; nasm -g -f elf64 game_of_life.asm
; ld -o game_of_life game_of_life.o
; in usage takes .cells file and number of ticks to run for, and produces bitmaps for output. e.g.:-
; ./game_of_life 232p7h3v0puffer.cells 50 

section .data

	; various error messages
	usage_str		    db	    'Please enter pattern filename (.cells format) and number of iterations', 0x0a
	usage_len		    equ	    $-usage_str

	mmap_error_str		db	    'mmap failed.', 0x0a
	mmap_error_len		equ	    $-mmap_error_str

	file_error_str		db	    'Error writing to output file.', 0x0a
	file_error_len		equ	    $-file_error_str

	number_error_str	db	    'Error in number of iterations.', 0x0a
	number_error_len	equ	    $-number_error_str

	FNF_error_str       db      'File not found.', 0x0a
	FNF_error_len    	equ     $-FNF_error_str

	denied_error_str    db      'Permission denied.', 0x0a
	denied_error_len 	equ     $-denied_error_str

	; Write the board to a bitmap every iteration/'tick'
	; 62 is the total length of the header including the colour table
	; the width and height are calculated based on the specified input file
	; BMP header
	BMP_ident:		    db	'BM'
	BMP_file_size:		dd	0	    ;62+(row_length*total_rows)
	BMP_res1:		    dw	0
	BMP_res2:		    dw	0
	BMP_img_offset:		dd	62
	; DIB header
	BMP_header_size:	dd	40		
	BMP_width:		    dd	0 	    ; row_length
	BMP_height:		    dd	0 	    ; total_rows
	BMP_planes:		    dw	1
	BMP_bpp:		    dw	8	
	BMP_compression:	dd	0
	BMP_img_size:		dd	0	    ; row_length*total_rows 
	BMP_x_res:		    dd	2835
	BMP_y_res:		    dd	2835
	BMP_num_cols:		dd	2		; number of colours in the colour table	
	BMP_imp_cols:		dd	0
	; Colour table. Only two entries as there are only two cell states
	dead_colour:		db	0xff, 0xff, 0xff, 0xff	; white
	live_colour:		db	0x00, 0x00, 0x00, 0x00	; black

	out_file:		    db	'GoL_'	
	file_num:		    db	'00000000'	; this will be overwritten according to the tick number
	file_num_len		equ	$-file_num
	file_ext:		    db	'.bmp', 0x00
	
    ; ticks
	iteration		    dq	1
	num_iterations		dq	0

section .bss

	; pointers to passed parameters
	pattern_file_ptr	resq	1
	ticks_ptr		    resq	1

	; for reading in the pattern (.cells) file
	PT_file_desc		resq	1
	PT_char_buf		    resb	1

	pattern_start		resq	1
	pattern_cols		resq	1
	pattern_rows		resq	1
	pattern_size		resq	1
	pattern_ptr		    resq	1

	; buffers for simulation
	buffer_ptr		    resq	1
	row_length		    resq	1
	total_rows		    resq	1
	buffer_size		    resq	1
	src_ptr			    resq	1
	dst_ptr			    resq	1

section .text

global _start

_start:

	; process input parameters
	pop rax			
	cmp rax, 3 
	jne usage

	pop rsi				    ; skip past program name

	; get pointers to the strings entered for input pattern and number of ticks
	pop rsi				    ; filename
	mov [pattern_file_ptr], rsi
	pop rsi				    ; ticks
	mov [ticks_ptr], rsi

	; convert ticks passed into the program, into a number
	mov rsi, [ticks_ptr]
	call string_to_num
	mov [num_iterations], rcx
	
	; load the .cells file
	mov rdi, [pattern_file_ptr]
	mov rax, 2              ; 'open'
	mov rsi, 0              ; flags: O_RDONLY
	mov rdx, 0              
	syscall

	; check for open errors
	cmp rax, -2             
	je fnf_error            ; error: file not found

	cmp rax, -13
	je denied_error         ; error: permission denied
    
PT_proceed:

	; use a simple state machine to process the .cells file.
	; these files can have comments as well as data.
	; There are two passes through the file. In the first pass,
	; try to determine the dimensions of the board in the file.
	; Use those dimensions to allocate some memory, then rewind
	; the file and read the file again to get the actual board 
    ; data into the allocated memory.
	; This file loader makes a lot of assumptions about the 
	; format of the input file (e.g. the comment (!) char is at 
    ; the beginning of a line) but it seems to work so far

	mov [PT_file_desc], rax    	; file descriptor
	mov r9, 0			        ; state. Start in state=0
	mov r10, 0			        ; columns
	mov r11, 0			        ; rows
	mov r12, 0			        ; temp for current row column counter
	mov r13, 0			        ; offset into file where the board data begins

PT_read_loop:
	push r9				        ; save vars across syscall
	push r10
	push r11
	push r12
	push r13
	xor eax, eax            	; read
	mov rdi, [PT_file_desc]    	; from .cells file
	mov rsi, PT_char_buf		; memory location to read to
	mov rdx, 1			        ; read 1 char from the file
	syscall
	pop r13				        ; restore vars
	pop r12
	pop r11
	pop r10
	pop r9

	cmp rax, 1             		; check for EOF
	jne PT_d1

	; current state determines how to process the char just read
	cmp r9, 0		            ; state 0 is a kind of holding state
	je PT_state_0
	cmp r9, 1		            ; state 1 means reading a comment
	je PT_state_1
	; assume state 2 - reading the cell data block
PT_state_2:
	; check for end of line
	cmp byte [PT_char_buf], 0x0a
	je PT_state_2_0
	; skip past 0x0d if present
	cmp byte [PT_char_buf], 0x0d
	je PT_read_loop
	; assume char_buf has valid cell data
	inc r12			            ; keep track of the number of columns
	jmp PT_read_loop
PT_state_2_0:
	inc r11			            ; increment row counter
	mov r9, 0		            ; reset state
	cmp r12, r10		        ; keep track of the maximum row length in r10
	cmova r10, r12		        ; since .cells files can have a variable row length
	mov r12, 0
	jmp PT_read_loop
PT_state_0: 
	; check to see if it's the start of a comment
	cmp byte [PT_char_buf], '!'	; start of a comment
	je PT_state_0_0
	mov r9, 2			        ; assume we've entered a cell data row, set state=2
	jmp PT_state_2			    ; process the data cell just read
PT_state_0_0:
	mov r9, 1			        ; set state=1, 'reading comment' state
	inc r13
	jmp PT_read_loop
PT_state_1:	
	; reading a comment. Just pull in bytes until the newline
	inc r13
	cmp byte [PT_char_buf], 0x0a
	jne PT_read_loop
	mov r9, 0		; reset to state=0
	jmp PT_read_loop
PT_d1:
	; final row might not have a newline at the end.
	; Catch up with any final data cells
	cmp r12, 0	
	je PT_d2
	cmp r12, r10
	cmova r10, r12
	inc r11
PT_d2:
	; at this point, r10 should be the number of data columns, r11 the number of data rows
	; save these
	mov [pattern_cols], r10
	mov [pattern_rows], r11
	mov [pattern_start], r13
	mov rax, r10
	mul r11
	mov [pattern_size], rax

	; now have to allocate memory and read the file again to extract the data
	mov rax, 9			        ; mmap is syscall 9
	mov rdi, 0 			        ; let the kernel choose where the memory starts
	mov esi, [pattern_size]		; the size in bytes that we want
	mov rdx, 3 			        ; memory protection: PROT_READ | PROT_WRITE
	mov r10, 34 			    ; flags: MAP_PRIVATE | MAP_ANONYMOUS
	mov r8, -1 			        ; no file descriptor
	mov r9, 0 			        ; no offset
	syscall	

	cmp rax, -1			        ; if returns -1 then map failed. Otherwise contains the starting address of the memory we wanted
	je mmap_failed
	mov [pattern_ptr], rax		; save address

	; seek back in the file
	mov rax, 8			        ; sys_lseek
	mov rdi, [PT_file_desc]		; file descriptor
	mov rsi, [pattern_start]	; offset into the file where data begins
	mov rdx, 0			        ; SEEK_SET
	syscall

	; read the pattern data into the buffer just created
	xor ecx, ecx		        ; rcx is index into column
	mov rdi, [pattern_ptr]	    ; pointer to the beginning of the memory buffer
PT_pattern_in:
	push rcx
	push rdi
	xor eax, eax                ; read
	mov rdi, [PT_file_desc]	    ; from .cells file
	mov rsi, PT_char_buf	    ; memory location to read to
	mov rdx, 1		            ; read 1 byte
	syscall
	pop rdi
	pop rcx

	cmp rax, 1		            ; check for end of file
	jne PT_d3

	cmp byte [PT_char_buf], 0x0d	; ignore
	je PT_pattern_in
	
	cmp byte [PT_char_buf], 0x0a	; increase row counter if newline
	jne PT_set_cell

	add rdi, qword [pattern_cols]
	xor ecx, ecx
	jmp PT_pattern_in

PT_set_cell:
	; set the byte in the destination buffer if 'O' or '*' (sometimes used in .cells files apparently)
	cmp byte [PT_char_buf], 'O'
	sete [rdi+rcx]	
	cmp byte [PT_char_buf], '*'
	sete al
	or [rdi+rcx], al

	inc rcx
	jmp PT_pattern_in

PT_d3:
	; file has been read into memory.
	; determine size of final buffers for simulation,
	; and setup bitmap header
	; Make the simulation buffer twice the dimension of the pattern read in.
	; This is a bit arbitrary, would perhaps be better to ask the user for the board dimensions

	mov rax, [pattern_cols]
	sal rax, 1			        ; buffer is double the initial pattern width
	and al, 11111100b		    ; ensure a multiple of 4 for bitmap
	mov [row_length], rax
	mov [BMP_width], eax

	mov rbx, [pattern_rows]
	sal rbx, 1			        ; buffer is double the height of the initial pattern
	mov [total_rows], rbx
	mov [BMP_height], ebx

	mul rbx				        ; compute the total size, rows*cols
	mov [buffer_size], rax
	mov [BMP_img_size], eax
	mov rsi, rax			
	add rax, 62			        ; BMP file size including header
	mov [BMP_file_size], eax

	; allocate memory for the simulation buffers
	; 2 * row_length * total_rows
	mov rax, 9			        ; mmap is syscall 9
	mov rdi, 0 			        ; let the kernel choose where the memory starts
	;mov esi, buffer_size 		; rsi is already set above
	sal rsi, 1			        ; double it, as we want two buffers
	mov rdx, 3 			        ; memory protection: PROT_READ | PROT_WRITE
	mov r10, 34 			    ; flags: MAP_PRIVATE | MAP_ANONYMOUS
	mov r8, -1 			        ; no file descriptor
	mov r9, 0 			        ; no offset
	syscall	

	cmp rax, -1			        ; if returns -1 then map failed. Otherwise contains the starting address of the memory we wanted
	je mmap_failed
	mov [buffer_ptr], rax		; save address

	; zero the buffers
	mov rdi, [buffer_ptr]
	mov rax, 0x00
	mov rcx, [buffer_size]
	sal rcx, 1
	rep stosb

	; initialize the buffer pointers
	mov rax, [buffer_ptr]
	mov [src_ptr], rax
	add rax, [buffer_size]
	mov [dst_ptr], rax

	; initialize the source buffer with the pattern from the .cells file 
	; bitmaps are flipped vertically in the file format. So to get the pattern to look the
	; right way up, it is written from the bottom of the buffer to the top.
	; try to put the pattern in the middle of the allocated buffer
	; compute horizontal offset
	mov r8, [row_length]
	sub r8, [pattern_cols]
	sar r8, 1

	; compute vertical offset
	mov r9, [total_rows]
	sub r9, [pattern_rows]
	sar r9, 1
	mov rax, [total_rows]			
	sub rax, r9
	mul qword [row_length]			; convert from row offset to byte offset

	mov rsi, [pattern_ptr] 			; the source pattern
	mov rbx, [src_ptr]			    ; write it to the first buffer
	add rbx, rax				    ; add vertical offset		
	mov rdx, [pattern_rows]			; the number of rows in the pattern

pattern_init_loop:		
	mov rdi, rbx			 
	add rdi, r8				        ; add horizontal offset each row
	mov rcx, [pattern_cols]
	rep movsb				        ; copy a row
	sub rbx, [row_length]			; move 'up' a row
	dec rdx					        ; row counter
	jnz pattern_init_loop

	; run the pattern with the Game of Life rules

life_loop:
	; 'snapshot' the source buffer to a .bmp file
	mov rsi, [src_ptr]
	call snapshot_buffer

	mov rdi, [dst_ptr]	    ; rdi points to output buffer

	; there is a 1 cell border around the whole board which does not get processed.
	; (it is forced to be dead cells). The board is supposed to be infinite but can't
	; do that in this memory representation.
	; move rsi and rdi past the first row in the buffers.
	; use r10 and r11 as pointers as well for previous and next row of cells
	; this is to save having to compute offsets all the time from rsi
	mov r10, rsi		    ; r10 = previous row (cell above)
	add rsi, [row_length]	; rsi = current row (current cell)
	mov r11, rsi		
	add r11, [row_length]	; r11 = next row (cell below)

	add rdi, [row_length]	; rdi is destination cell.
	mov rdx, [total_rows]	; total number of rows to do
	sub rdx, 2		        ; not processing first and last row
    
next_row_loop:

	; move past first pixel of the row (dead cell border).
	inc rsi
	inc r10
	inc r11
	inc rdi
    
	mov rcx, [row_length]
	sub rcx, 2		        ; not processing last cell of the row

in_row_loop:

	; for each cell
	; sum up alive neighbours according to source buffer
	mov bx, 0
	add bl, [r10 - 1]  	    ; top left corner cell
	add bl, [r10]      	    ; top middle cell
	add bl, [r10 + 1]  	    ; top right corner cell
	add bl, [rsi - 1]       ; middle left cell
	add bl, [rsi + 1]       ; middle right cell
	add bl, [r11 - 1]  	    ; bottom left corner cell
	add bl, [r11]      	    ; bottom middle cell
	add bl, [r11 + 1]  	    ; bottom right corner cell
    
	; the number of alive neighbours is in bl
	; if the number of neighbours is 3 then set to 1 in the destination buffer
	cmp bl, 3
	sete [rdi]
	je GOL_continue         ; nothing more to do for this cell, move on
	; if live and there are 2 neighbours then set to 1 in the destination buffer
	; otherwise reset to 0
	cmp byte [rsi], 1
	sete [rdi]
	cmp bl, 2
	sete bh
	and [rdi], bh

GOL_continue:

	inc rsi		; move pointers on to next cell
	inc r10
	inc r11
	inc rdi
    
	dec rcx 	; handle counter
	jnz in_row_loop

	; move pointers past the pixel border at the end of the row
	inc rsi
	inc r10
	inc r11
	inc rdi
    
	; go onto the next row
	dec rdx
	jnz next_row_loop

	; done one tick. 
	; the destination buffer becomes the source and vice versa
	mov r12, [src_ptr]
	xchg r12, [dst_ptr]
	mov [src_ptr], r12

	; increase the iteration counter
	inc qword [iteration]	
	mov r9, [num_iterations]
	cmp qword [iteration], r9	; quit if reached the specified number of ticks to run for
	jle life_loop

exit:

	mov rax, 0x3c
	mov rdi, 0
	syscall

;;;;;;;;;;
; some error messages

usage:
	mov rsi, usage_str
	mov rdx, usage_len
	call write_out
	jmp exit

mmap_failed:
	mov rsi, mmap_error_str
	mov rdx, mmap_error_len
	call write_out
	jmp exit

fnf_error:

	mov rsi, FNF_error_str
	mov rdx, FNF_error_len
	jmp write_out

denied_error:

	mov rsi, denied_error_str
	mov rdx, denied_error_len
	jmp write_out

file_write_error:
	mov rsi, file_error_str
	mov rdx, file_error_len
	call write_out
	jmp exit

number_error:
	mov rsi, number_error_str
	mov rdx, number_error_len
	call write_out
	jmp exit

write_out:
	mov rax, 1
	mov rdi, 1
	syscall
	ret

;;;;;;;;;

snapshot_buffer:
	; save to bitmap
	; rsi should point to the start of the buffer to write
	push rsi

	; make sure the filename is correct
	call update_filename

	; open the specified file for writing. 
	; Create and truncate the file if necessary and apply some basic permissions
	mov rax, 2
	mov rdi, out_file
	mov rsi, 1101o ; flags: O_TRUNC | O_CREAT | O_WRONLY
	mov rdx, 0664o	; permissions: -rw-rw-r--
	syscall

	cmp rax, -13
	je exit

	mov r8, rax			; file descriptor

	; write out the file
	; header first
	mov rsi, BMP_ident
	mov rdx, 62 
	mov rax, 1
	mov rdi, r8 
	syscall

	; check to make sure the entire header was written
	cmp rax, 62
	jne file_write_error

	; write out the image data
	pop rsi
	mov rdx, [buffer_size]
	mov rax, 1
	mov rdi, r8
	syscall

	; check to make sure all the image data was written
	cmp rax, [buffer_size]
	jne file_write_error

	; close the file
	mov rax, 3
	mov rdi, r8
	syscall

	; done
	ret

update_filename:
	mov rax, [iteration]	; the output bitmap filename is based on the iteration number
	mov r8, 10		    	; we divide repeatedly by 10 to convert number to string
	mov rdi, file_ext		; start from the end and work back
	mov rcx, 0		    	; this will contain the final number of chars
itoa_inner:
	dec rdi			    	; going backwards in memory
	mov rdx, 0		    	; set up the division: rax already set
	div r8			    	; divide by ten
	add rdx, 0x30	    	; offset the remainder of the division to get the required ascii char
	mov [rdi], dl			; write the ascii char to the buffer
	inc rcx			    	; keep track of the number of chars produced
	cmp rcx, file_num_len	; try not to overfeed the buffer
	je itoa_done			; break out if we reach the end of the buffer 
	cmp rax, 0		    	; otherwise keep dividing until nothing left 
	jne itoa_inner
itoa_done:
	ret


string_to_num:
	mov rcx, 0		            ; rcx will be the final number
atoi_loop:
	movzx rbx, byte [rsi]       ; get the char pointed to by rsi
	cmp rbx, 0x30               ; Check if char is below '0' 
	jl number_error
	cmp rbx, 0x39               ; Check if char is above '9'
	jg number_error
	sub rbx, 0x30               ; adjust to actual number by subtracting ASCII offset to 0
	add rcx, rbx                ; accumulate number in rcx
	movzx rbx, byte [rsi+1]     ; check the next char to see if the string continues
	cmp rbx, 0                  ; string should be null-terminated
	je done_string			    ; if it's null we're done converting
	imul rcx, 10                ; multiply rcx by ten
	inc rsi                     ; increment pointer to get next char when we loop
	jmp atoi_loop
done_string:
	; rcx is the number
	ret


				
				

Game of Life in a terminal

An alternative to saving the frames to bitmap files is to use the terminal as a framebuffer to display the pattern as it runs. This is possible because terminals can often be controlled using ANSI escape sequences. The version of the program below uses ANSI escape sequences to clear the terminal and display the latest frame of the evolving pattern. The downside to this is that it can't handle large patterns. The sys_nanosleep syscall is used to create a small delay between each frame.


; Game of Life in a terminal
; Open the specified pattern in a .cells file and run 
; it in the terminal for a specified number of ticks
; nasm -f elf64 game_of_life_terminal.asm
; ld -o game_of_life_terminal game_of_life_terminal.o
; e.g.:- ./game_of_life_terminal ./106p135.cells 100

section .data
	
	; terminal sizes can vary greatly. These might need adjusting for other terminals.
	row_length      	equ     130	
	total_rows		    equ   	50 

	; ANSI sequence to clear and reset the terminal
	cls_code		    db	    0x1b, '[2J', 0x1b, '[H'
	cls_len			    equ	    $-cls_code

	; various error messages
	usage_str		    db	    'Please enter pattern filename (.cells format) and number of iterations', 0x0a
	usage_len		    equ	    $-usage_str

	mmap_error_str		db	    'mmap failed.', 0x0a
	mmap_error_len		equ	    $-mmap_error_str

	FNF_error_str       db      'File not found.', 0x0a
	FNF_error_len    	equ     $-FNF_error_str

	denied_error_str    db      'Permission denied.', 0x0a
	denied_error_len 	equ     $-denied_error_str

	number_error_str	db	    'Error in number of iterations.', 0x0a
	number_error_len	equ	    $-number_error_str

	size_error_str		db	    'Pattern is too big.', 0x0a
	size_error_len		equ	    $-size_error_str

	; for display in the terminal, a dead cell becomes a space 
	; and a live cell a O 
	conversion_table	db	    ' ', 'O'
	
	iteration		    dq	    1
	num_iterations		dq	    0	

	; for nanosleep
	; struct timespec
	tv_sec			    dq	    0
	tv_nsec			    dq	    250000000	; 0.25 seconds in nanoseconds

section .bss

	; pointers to passed parameters
	pattern_file_ptr	resq	1
	ticks_ptr		    resq	1

	; for reading in the pattern (.cells) file
	PT_file_desc		resq	1
	PT_char_buf		    resb	1

	pattern_start		resq	1
	pattern_cols		resq	1
	pattern_rows		resq	1
	pattern_size		resq	1
	pattern_ptr		    resq	1

	buffer_1		    resb	row_length*total_rows
	buffer_2		    resb	row_length*total_rows
	buffer_3		    resb	row_length*total_rows

	src_ptr			    resq	1
	dst_ptr			    resq	1

section .text

global _start

_start:

	; process input parameters
	pop rax			
	cmp rax, 3 
	jne usage

	pop rsi				; skip past program name

	; get pointers to the strings entered for input pattern and number of ticks
	pop rsi				; filename
	mov [pattern_file_ptr], rsi
	pop rsi				; ticks
	mov [ticks_ptr], rsi

	; convert ticks passed into the program, into a number
	mov rsi, [ticks_ptr]
	call string_to_num
	mov [num_iterations], rcx
	
	; read in the pattern
	call read_pattern

	; check to make sure the pattern isn't too big for the terminal
	cmp qword [pattern_cols], row_length
	jge size_error
	cmp qword [pattern_rows], total_rows
	jge size_error

	; zero and set up the buffers
	mov rdi, buffer_1
	mov rax, 0
	mov rcx, 3*row_length*total_rows
	rep stosb

	mov qword [src_ptr], buffer_1
	mov qword [dst_ptr], buffer_2

	; initialize the source buffer with the pattern
	; try to center the pattern in the terminal
	; calculate horizontal offset
	mov r8, row_length
	sub r8, [pattern_cols]
	sar r8, 1

	; calcuate vertical offset
	mov rax, total_rows
	sub rax, [pattern_rows]
	sar rax, 1
	mov r9, row_length
	mul r9

	mov rsi, [pattern_ptr] 			; the source pattern
	mov rbx, [src_ptr]			    ; write it to the first buffer
	add rbx, rax				    ; space at the top
	mov rdx, [pattern_rows]			; the number of rows in the pattern

pattern_init_loop:				    ; use rbx to index into a row to keep rdi clean
	mov rdi, rbx			 
	add rdi, r8				        ; horizontal offset
	mov rcx, [pattern_cols]
	rep movsb
	add rbx, row_length		
	dec rdx					        ; next row
	jnz pattern_init_loop

	; run the pattern with the Game of Life rules
life_loop:
	; 'snapshot' the source buffer to terminal 
	mov rsi, [src_ptr]
	call snapshot_buffer

	mov rdi, [dst_ptr]	            ; rdi points to output buffer
	mov rax, row_length	            ; use rax is index into buffer; here we skip first row
	mov rdx, total_rows	            ; total number of rows to do
	sub rdx, 2		                ; not doing first and last row
    
next_row_loop:

	; move past first pixel of the row.
	; there is a 1 pixel border around the whole frame which does not get processed
	inc rax		
    
	mov rcx, row_length
	sub rcx, 2

in_row_loop:

	; for each cell
	; sum up alive neighbours according to source buffer
	mov bx, 0
	add bl, [rsi + rax - row_length - 1]  ; top left corner cell
	add bl, [rsi + rax - row_length]      ; top middle cell
	add bl, [rsi + rax - row_length + 1]  ; top right corner cell
	add bl, [rsi + rax - 1]               ; middle left cell
	add bl, [rsi + rax + 1]               ; middle right cell
	add bl, [rsi + rax + row_length - 1]  ; bottom left corner cell
	add bl, [rsi + rax + row_length]      ; bottom middle cell
	add bl, [rsi + rax + row_length + 1]  ; bottom right corner cell
    
	; the number of alive neighbours is in bl
	; if the number of neighbours is 3 then set to 1 in the destination buffer
	cmp bl, 3
	sete [rdi + rax]
	je GOL_continue   ; nothing more to do for this cell, move on
	; if live and there are 2 neighbours then set to 1 in the destination buffer
	; otherwise reset to 0
	cmp byte [rsi + rax], 1
	sete [rdi + rax]
	cmp bl, 2
	sete bh
	and [rdi + rax], bh

GOL_continue:

	inc rax ; next cell along
    
	dec rcx ; handle counter
	jnz in_row_loop

	; move past the pixel border at the end of the row
	inc rax
    
	; go onto the next row
	dec rdx
	jnz next_row_loop

	; done one tick. 
	; the destination buffer becomes the source and vice versa
	mov [src_ptr], rdi
	mov [dst_ptr], rsi

	inc qword [iteration]		; increase the iteration counter and loop for next frame
	mov r9, [num_iterations]	
	cmp qword [iteration], r9
	jle life_loop

	; finished
exit:

	mov rax, 0x3c
	mov rdi, 0
	syscall

;;;;;;;;;
; some messages

usage:
	mov rsi, usage_str
	mov rdx, usage_len
	call write_out
	jmp exit

size_error:
	mov rsi, size_error_str
	mov rdx, size_error_len
	call write_out
	jmp exit

mmap_failed:
	mov rsi, mmap_error_str
	mov rdx, mmap_error_len
	call write_out
	jmp exit

fnf_error:

	mov rsi, FNF_error_str
	mov rdx, FNF_error_len
	jmp write_out

denied_error:

	mov rsi, denied_error_str
	mov rdx, denied_error_len
	jmp write_out

number_error:
	mov rsi, number_error_str
	mov rdx, number_error_len
	call write_out
	jmp exit

write_out:
	mov rax, 1
	mov rdi, 1
	syscall
	ret


;;;;;;;;;;

snapshot_buffer:
	; convert the GoL data in rsi to text for writing out
	; 0 becomes space. 1 becomes O. Last column becomes newline
	push rsi

	mov rdi, buffer_3
	mov rbx, conversion_table
	mov rdx, total_rows
do_row:
	mov rcx, row_length-1
do_cells:
	mov al, [rsi]
	xlatb		        ; perform table lookup to convert the cell state to a char
	mov [rdi], al
	inc rsi
	inc rdi
	dec rcx
	jnz do_cells
	; add a newline character to end of row
	mov byte [rdi], 0x0a
	inc rsi
	inc rdi
	dec rdx
	jnz do_row

	; send out this frame.
	; first clear screen
	mov rax, 1          ; write
	mov rdi, 1          ; to stdout
	mov rsi, cls_code   ; ANSI control sequence
	mov rdx, cls_len    ; number of characters to write
	syscall             ; execute

	; then write out the buffer
	mov rax, 1          ; write
	mov rdi, 1          ; to stdout
	mov rsi, buffer_3   ; stringified GoL board
	mov rdx, row_length*total_rows
	syscall             ; execute

	; use nanosleep to pause for a moment so it doesn't all flash by instantly
	mov rax, 0x23
	mov rdi, tv_sec
	mov rsi, 0
	syscall

	pop rsi
	ret
	

read_pattern:
	; load the .cells file
	mov rdi, [pattern_file_ptr]
	mov rax, 2              ; 'open'
	mov rsi, 0              ; flags: O_RDONLY
	mov rdx, 0              
	syscall

	; check for open errors
	cmp rax, -2             
	je fnf_error            ; error: file not found

	cmp rax, -13
	je denied_error         ; error: permission denied
    
PT_proceed:

	; use a simple state machine to process the .cells file.
	; these files can have comments as well as data.
	; There are two passes through the file. In the first pass,
	; try to determine the dimensions of the board in the file.
	; Use those dimensions to allocate some memory, then rewind
	; the file and read the actual board data into the allocated
	; memory.
	; This file loader makes a lot of assumptions about the 
	; format of the input file (e.g. the ! char is at the 
	; beginning of a line) but it seems to work so far

	mov [PT_file_desc], rax    	; file descriptor
	mov r9, 0			; state. Start in state=0
	mov r10, 0			; columns
	mov r11, 0			; rows
	mov r12, 0			; temp for current row column counter
	mov r13, 0			; offset into file where the board data begins

PT_read_loop:
	push r9				        ; save vars across syscall
	push r10
	push r11
	push r12
	push r13
	xor eax, eax            	; read
	mov rdi, [PT_file_desc]    	; from .cells file
	mov rsi, PT_char_buf		; memory location to read to
	mov rdx, 1			        ; read 1 char from the file
	syscall
	pop r13				        ; restore vars
	pop r12
	pop r11
	pop r10
	pop r9

	cmp rax, 1             		; check for EOF
	jne PT_d1

	; current state determines how to process the char just read
	cmp r9, 0		    ; state 0 is a kind of holding state
	je PT_state_0
	cmp r9, 1		    ; state 1 means reading a comment
	je PT_state_1
	; assume state 2 - reading the cell data block
PT_state_2:
	; check for end of line
	cmp byte [PT_char_buf], 0x0a
	je PT_state_2_0
	; skip past 0x0d if present
	cmp byte [PT_char_buf], 0x0d
	je PT_read_loop
	; assume char_buf has valid cell data
	inc r12			    ; keep track of the number of columns
	jmp PT_read_loop
PT_state_2_0:
	inc r11			    ; increment row counter
	mov r9, 0		    ; reset state
	cmp r12, r10		; keep track of the maximum row length in r10
	cmova r10, r12		; since .cells files can have a variable row length
	mov r12, 0
	jmp PT_read_loop
PT_state_0: 
	; check to see if it's the start of a comment
	cmp byte [PT_char_buf], '!'	; start of a comment
	je PT_state_0_0
	mov r9, 2			; assume we've entered a cell data row, set state=2
	jmp PT_state_2			; process the data cell just read
PT_state_0_0:
	mov r9, 1			; set state=1, 'reading comment' state
	inc r13
	jmp PT_read_loop
PT_state_1:	
	; reading a comment. Just pull in bytes until the newline
	inc r13
	cmp byte [PT_char_buf], 0x0a
	jne PT_read_loop
	mov r9, 0		; reset to state=0
	jmp PT_read_loop
PT_d1:
	; final row might not have a newline at the end.
	; Catch up with any final data cells
	cmp r12, 0	
	je PT_d2
	cmp r12, r10
	cmova r10, r12
	inc r11
PT_d2:
	; at this point, r10 should be the number of data columns, r11 the number of data rows
	; save these
	mov [pattern_cols], r10
	mov [pattern_rows], r11
	mov [pattern_start], r13
	mov rax, r10
	mul r11
	mov [pattern_size], rax

	; now have to allocate memory and read the file again to extract the data
	mov rax, 9			; mmap is syscall 9
	mov rdi, 0 			; let the kernel choose where the memory starts
	mov esi, [pattern_size]		; the size in bytes that we want
	mov rdx, 3 			; memory protection: PROT_READ | PROT_WRITE
	mov r10, 34 			; flags: MAP_PRIVATE | MAP_ANONYMOUS
	mov r8, -1 			; no file descriptor
	mov r9, 0 			; no offset
	syscall	

	cmp rax, -1			; if returns -1 then map failed. Otherwise contains the starting address of the memory we wanted
	je mmap_failed
	mov [pattern_ptr], rax		; save address

	; seek back in the file
	mov rax, 8			        ; sys_lseek
	mov rdi, [PT_file_desc]		; file descriptor
	mov rsi, [pattern_start]	; offset into the file where data begins
	mov rdx, 0			        ; SEEK_SET
	syscall

	; read the pattern data into the buffer just created
	xor ecx, ecx		        ; rcx is index into column
	mov rdi, [pattern_ptr]	    ; pointer to the beginning of the memory buffer
PT_pattern_in:
	push rcx
	push rdi
	xor eax, eax                ; read
	mov rdi, [PT_file_desc]	    ; from .cells file
	mov rsi, PT_char_buf	    ; memory location to read to
	mov rdx, 1		            ; read 1 byte
	syscall
	pop rdi
	pop rcx

	cmp rax, 1		            ; check for end of file
	jne PT_d3

	cmp byte [PT_char_buf], 0x0d	; ignore
	je PT_pattern_in
	
	cmp byte [PT_char_buf], 0x0a	; increase row counter if newline
	jne PT_set_cell

	add rdi, qword [pattern_cols]
	xor ecx, ecx
	jmp PT_pattern_in

PT_set_cell:
	; set the byte in the destination buffer if 'O' or '*' (sometimes used in .cells files apparently)
	cmp byte [PT_char_buf], 'O'
	sete [rdi+rcx]	
	cmp byte [PT_char_buf], '*'
	sete al
	or [rdi+rcx], al

	inc rcx
	jmp PT_pattern_in

PT_d3:
	; file has been read into memory.
	ret


string_to_num:
	mov rcx, 0		            ; rcx will be the final number
atoi_loop:
	movzx rbx, byte [rsi]       ; get the char pointed to by rsi
	cmp rbx, 0x30               ; Check if char is below '0' 
	jl number_error
	cmp rbx, 0x39               ; Check if char is above '9'
	jg number_error
	sub rbx, 0x30               ; adjust to actual number by subtracting ASCII offset to 0
	add rcx, rbx                ; accumulate number in rcx
	movzx rbx, byte [rsi+1]     ; check the next char to see if the string continues
	cmp rbx, 0                  ; string should be null-terminated
	je done_string		        ; if it's null we're done converting
	imul rcx, 10                ; multiply rcx by ten
	inc rsi                     ; increment pointer to get next char when we loop
	jmp atoi_loop
done_string:
	; rcx is the number
	ret