Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel'
[palacios.git] / kitten / arch / x86_64 / lib / memcpy.S
diff --git a/kitten/arch/x86_64/lib/memcpy.S b/kitten/arch/x86_64/lib/memcpy.S
new file mode 100644 (file)
index 0000000..aa333e4
--- /dev/null
@@ -0,0 +1,121 @@
+/* Copyright 2002 Andi Kleen */
+       
+       #include <arch/cpufeature.h>            
+/*
+ * memcpy - Copy a memory block.
+ *
+ * Input:      
+ * rdi destination
+ * rsi source
+ * rdx count
+ * 
+ * Output:
+ * rax original destination
+ */    
+
+       .globl __memcpy
+       .globl memcpy
+       .p2align 4
+__memcpy:
+memcpy:                
+       pushq %rbx
+       movq %rdi,%rax
+
+       movl %edx,%ecx
+       shrl $6,%ecx
+       jz .Lhandle_tail
+
+       .p2align 4
+.Lloop_64:
+       decl %ecx
+
+       movq (%rsi),%r11
+       movq 8(%rsi),%r8
+
+       movq %r11,(%rdi)
+       movq %r8,1*8(%rdi)
+
+       movq 2*8(%rsi),%r9
+       movq 3*8(%rsi),%r10
+
+       movq %r9,2*8(%rdi)
+       movq %r10,3*8(%rdi)
+
+       movq 4*8(%rsi),%r11
+       movq 5*8(%rsi),%r8
+
+       movq %r11,4*8(%rdi)
+       movq %r8,5*8(%rdi)
+
+       movq 6*8(%rsi),%r9
+       movq 7*8(%rsi),%r10
+
+       movq %r9,6*8(%rdi)
+       movq %r10,7*8(%rdi)
+
+       leaq 64(%rsi),%rsi
+       leaq 64(%rdi),%rdi
+       jnz  .Lloop_64
+
+.Lhandle_tail:
+       movl %edx,%ecx
+       andl $63,%ecx
+       shrl $3,%ecx
+       jz   .Lhandle_7
+       .p2align 4
+.Lloop_8:
+       decl %ecx
+       movq (%rsi),%r8
+       movq %r8,(%rdi)
+       leaq 8(%rdi),%rdi
+       leaq 8(%rsi),%rsi
+       jnz  .Lloop_8
+
+.Lhandle_7:
+       movl %edx,%ecx
+       andl $7,%ecx
+       jz .Lende
+       .p2align 4
+.Lloop_1:
+       movb (%rsi),%r8b
+       movb %r8b,(%rdi)
+       incq %rdi
+       incq %rsi
+       decl %ecx
+       jnz .Lloop_1
+
+.Lende:
+       popq %rbx
+       ret
+.Lfinal:
+
+       /* Some CPUs run faster using the string copy instructions.
+          It is also a lot simpler. Use this when possible */
+
+       .section .altinstructions,"a"
+       .align 8
+       .quad  memcpy
+       .quad  memcpy_c
+       .byte  X86_FEATURE_REP_GOOD
+       .byte  .Lfinal-memcpy
+       .byte  memcpy_c_end-memcpy_c
+       .previous
+
+       .section .altinstr_replacement,"ax"
+ /* rdi        destination
+  * rsi source
+  * rdx count
+  */
+memcpy_c:
+       movq %rdi,%rax
+       movl %edx,%ecx
+       shrl $3,%ecx
+       andl $7,%edx    
+       rep 
+       movsq 
+       movl %edx,%ecx
+       rep
+       movsb
+       ret
+memcpy_c_end:
+       .previous