π in assembly (spigot algorithm)

Posted by jgrant, 2010-07-22 09:25:30




//   pi_spigot.s - calculates Pi using a spigot algorithm
//                 as an array of n digits in base 10000.
//                 http://mathworld.wolfram.com/SpigotAlgorithm.html
//
//  x86-64/SSE3 with for Linux, Intel, gnu assembler, gcc
//
//  assemble: as pi_spigot.s -o pi_spigot.o
//  link:     gcc -o pi_spigot pi_spigot.o
//  example run:      ./pi_spigot 100
//  output: 3.14159265358979323846264338327950288419716939937510582097494459230 ...
//        ... 78164062862089986280348253421170679
//

	.section	.rodata
.LC0:
	.string	"%d."
.LC1:
	.string	"%04d"
	.text
.globl print
	.type	print, @function
print:
.LFB0:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	movq	%rsp, %rbp
	.cfi_offset 6, -16
	.cfi_def_cfa_register 6
	subq	$32, %rsp
	movq	%rdi, -24(%rbp)
	movl	%esi, -28(%rbp)
	movq	-24(%rbp), %rax
	addq	$2, %rax
	movzwl	(%rax), %eax
	movzwl	%ax, %edx
	movl	$.LC0, %eax
	movl	%edx, %esi
	movq	%rax, %rdi
	movl	$0, %eax
	call	printf
	movl	$2, -4(%rbp)
	jmp	.L2
.L3:
	movl	-4(%rbp), %eax
	cltq
	addq	%rax, %rax
	addq	-24(%rbp), %rax
	movzwl	(%rax), %eax
	movzwl	%ax, %edx
	movl	$.LC1, %eax
	movl	%edx, %esi
	movq	%rax, %rdi
	movl	$0, %eax
	call	printf
	addl	$1, -4(%rbp)
.L2:
	movl	-28(%rbp), %eax
	subl	$1, %eax
	cmpl	-4(%rbp), %eax
	jg	.L3
	movl	$10, %edi
	call	putchar
	leave
	ret
	.cfi_endproc
.LFE0:
	.size	print, .-print
.globl main
	.type	main, @function
main:
.LFB1:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	movq	%rsp, %rbp
	.cfi_offset 6, -16
	.cfi_def_cfa_register 6
	pushq	%rbx
	subq	$56, %rsp
	movl	%edi, -52(%rbp)
	movq	%rsi, -64(%rbp)
	cmpl	$1, -52(%rbp)
	jle	.L6
	.cfi_offset 3, -24
	movq	-64(%rbp), %rax
	addq	$8, %rax
	movq	(%rax), %rax
	movq	%rax, %rdi
	call	atoi
	addl	$3, %eax
	leal	3(%rax), %edx
	testl	%eax, %eax
	cmovs	%edx, %eax
	sarl	$2, %eax
	addl	$3, %eax
	jmp	.L7
.L6:
	movl	$253, %eax
.L7:
	movl	%eax, -20(%rbp)
	movl	-20(%rbp), %eax
	cltq
	addq	%rax, %rax
	movq	%rax, %rdi
	call	malloc
	movq	%rax, -40(%rbp)
	movl	-20(%rbp), %eax
	cltq
	leaq	(%rax,%rax), %rdx
	movq	-40(%rbp), %rax
	movl	$0, %esi
	movq	%rax, %rdi
	call	memset
	movq	-40(%rbp), %rax
	addq	$2, %rax
	movw	$4, (%rax)
	cvtsi2sd	-20(%rbp), %xmm0
	movsd	.LC2(%rip), %xmm1
	mulsd	%xmm1, %xmm0
	cvttsd2si	%xmm0, %eax
	movl	%eax, -24(%rbp)
	jmp	.L8
.L13:
	movl	$0, -32(%rbp)
	movl	-20(%rbp), %eax
	subl	$1, %eax
	movl	%eax, -28(%rbp)
	jmp	.L9
.L10:
	movl	-28(%rbp), %eax
	cltq
	addq	%rax, %rax
	addq	-40(%rbp), %rax
	movzwl	(%rax), %eax
	movzwl	%ax, %eax
	imull	-24(%rbp), %eax
	addl	%eax, -32(%rbp)
	movl	-28(%rbp), %eax
	cltq
	addq	%rax, %rax
	movq	%rax, %rbx
	addq	-40(%rbp), %rbx
	movl	-32(%rbp), %ecx
	movl	$1759218605, %edx
	movl	%ecx, %eax
	imull	%edx
	sarl	$12, %edx
	movl	%ecx, %eax
	sarl	$31, %eax
	movl	%edx, %esi
	subl	%eax, %esi
	movl	%esi, %eax
	imull	$10000, %eax, %eax
	movl	%ecx, %edx
	subl	%eax, %edx
	movl	%edx, %eax
	movw	%ax, (%rbx)
	movl	-32(%rbp), %ecx
	movl	$1759218605, %edx
	movl	%ecx, %eax
	imull	%edx
	sarl	$12, %edx
	movl	%ecx, %eax
	sarl	$31, %eax
	movl	%edx, %ecx
	subl	%eax, %ecx
	movl	%ecx, %eax
	movl	%eax, -32(%rbp)
	subl	$1, -28(%rbp)
.L9:
	cmpl	$0, -28(%rbp)
	jns	.L10
	movl	$0, -44(%rbp)
	movl	-44(%rbp), %eax
	movl	%eax, -48(%rbp)
	movl	$0, -28(%rbp)
	jmp	.L11
.L12:
	movl	-24(%rbp), %eax
	addl	%eax, %eax
	leal	1(%rax), %edx
	movl	-28(%rbp), %eax
	cltq
	addq	%rax, %rax
	addq	-40(%rbp), %rax
	movzwl	(%rax), %eax
	movzwl	%ax, %ecx
	movl	-44(%rbp), %eax
	imull	$10000, %eax, %eax
	leal	(%rcx,%rax), %eax
	movl	%edx, %esi
	movl	%eax, %edi
	call	div
	movq	%rax, -48(%rbp)
	movl	-28(%rbp), %eax
	cltq
	addq	%rax, %rax
	addq	-40(%rbp), %rax
	movl	-48(%rbp), %edx
	movw	%dx, (%rax)
	addl	$1, -28(%rbp)
.L11:
	movl	-28(%rbp), %eax
	cmpl	-20(%rbp), %eax
	jl	.L12
	movq	-40(%rbp), %rax
	addq	$2, %rax
	movq	-40(%rbp), %rdx
	addq	$2, %rdx
	movzwl	(%rdx), %edx
	addl	$2, %edx
	movw	%dx, (%rax)
	subl	$1, -24(%rbp)
.L8:
	cmpl	$0, -24(%rbp)
	jg	.L13
	movl	-20(%rbp), %edx
	movq	-40(%rbp), %rax
	movl	%edx, %esi
	movq	%rax, %rdi
	call	print
	movl	$0, %eax
	addq	$56, %rsp
	popq	%rbx
	leave
	ret
	.cfi_endproc
.LFE1:
	.size	main, .-main
	.section	.rodata
	.align 8
.LC2:
	.long	3161095930
	.long	1076532084