X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=kitten%2Farch%2Fx86_64%2Flib%2Fmemcpy.S;fp=kitten%2Farch%2Fx86_64%2Flib%2Fmemcpy.S;h=aa333e432a88353a2c326a04ae42438412216e45;hb=66a1a4c7a9edcd7d8bc207aca093d694a6e6b5b2;hp=0000000000000000000000000000000000000000;hpb=f7cf9c19ecb0a589dd45ae0d2c91814bd3c2acc2;p=palacios-OLD.git diff --git a/kitten/arch/x86_64/lib/memcpy.S b/kitten/arch/x86_64/lib/memcpy.S new file mode 100644 index 0000000..aa333e4 --- /dev/null +++ b/kitten/arch/x86_64/lib/memcpy.S @@ -0,0 +1,121 @@ +/* Copyright 2002 Andi Kleen */ + + #include +/* + * memcpy - Copy a memory block. + * + * Input: + * rdi destination + * rsi source + * rdx count + * + * Output: + * rax original destination + */ + + .globl __memcpy + .globl memcpy + .p2align 4 +__memcpy: +memcpy: + pushq %rbx + movq %rdi,%rax + + movl %edx,%ecx + shrl $6,%ecx + jz .Lhandle_tail + + .p2align 4 +.Lloop_64: + decl %ecx + + movq (%rsi),%r11 + movq 8(%rsi),%r8 + + movq %r11,(%rdi) + movq %r8,1*8(%rdi) + + movq 2*8(%rsi),%r9 + movq 3*8(%rsi),%r10 + + movq %r9,2*8(%rdi) + movq %r10,3*8(%rdi) + + movq 4*8(%rsi),%r11 + movq 5*8(%rsi),%r8 + + movq %r11,4*8(%rdi) + movq %r8,5*8(%rdi) + + movq 6*8(%rsi),%r9 + movq 7*8(%rsi),%r10 + + movq %r9,6*8(%rdi) + movq %r10,7*8(%rdi) + + leaq 64(%rsi),%rsi + leaq 64(%rdi),%rdi + jnz .Lloop_64 + +.Lhandle_tail: + movl %edx,%ecx + andl $63,%ecx + shrl $3,%ecx + jz .Lhandle_7 + .p2align 4 +.Lloop_8: + decl %ecx + movq (%rsi),%r8 + movq %r8,(%rdi) + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + jnz .Lloop_8 + +.Lhandle_7: + movl %edx,%ecx + andl $7,%ecx + jz .Lende + .p2align 4 +.Lloop_1: + movb (%rsi),%r8b + movb %r8b,(%rdi) + incq %rdi + incq %rsi + decl %ecx + jnz .Lloop_1 + +.Lende: + popq %rbx + ret +.Lfinal: + + /* Some CPUs run faster using the string copy instructions. + It is also a lot simpler. Use this when possible */ + + .section .altinstructions,"a" + .align 8 + .quad memcpy + .quad memcpy_c + .byte X86_FEATURE_REP_GOOD + .byte .Lfinal-memcpy + .byte memcpy_c_end-memcpy_c + .previous + + .section .altinstr_replacement,"ax" + /* rdi destination + * rsi source + * rdx count + */ +memcpy_c: + movq %rdi,%rax + movl %edx,%ecx + shrl $3,%ecx + andl $7,%edx + rep + movsq + movl %edx,%ecx + rep + movsb + ret +memcpy_c_end: + .previous