void memcpy_fast( UBYTE *dest, UBYTE *src, ULONG size){ while (size--) { *dest++ = *src++; }
_memcpy_fast movem.l l60,-(a7) move.l (8+l62,a7),a3 move.l (4+l62,a7),a2 move.l (12+l62,a7),d1 bra l58 l57 move.l a3,a0 addq.l #1,a3 move.l a2,a1 addq.l #1,a2 move.b (a0),(a1) l58 move.l d1,d0 subq.l #1,d1 tst.l d0 bne l57 l59 l56 l60 reg a2/a3 movem.l (a7)+,a2/a3 l62 equ 8 rts
l57 move.l (24+l62,a7),(4+l62,a7) addq.l #1,(24+l62,a7) move.l (20+l62,a7),(8+l62,a7) addq.l #1,(20+l62,a7) move.l (4+l62,a7),a0 move.l (8+l62,a7),a1 move.b (a0),(a1) move.l (28+l62,a7),(12+l62,a7) subq.l #1,(28+l62,a7) tst.l (12+l62,a7) bne l57
while (size) { *dest = *src; src++; dest++; size--; }
l57 move.b (a1)+,(a0)+ subq.l #1,d0 l58 tst.l d0 bne l57
l57 move.l (8+l62,a7),a0 move.l (4+l62,a7),a1 move.b (a0),(a1) addq.l #1,(8+l62,a7) addq.l #1,(4+l62,a7) subq.l #1,(12+l62,a7) bne l57
l60 move.b (a1)+,(a0)+ subq.l #1,d0 bne l60
while(size--) { *dst++ = *src++; } while(size) { *dst = *src; dst++; src++; size--; } do { *dst++ = *src++; } while(--size);
.L3: move.l (%a1)+,(%a0)+ dbra %d0,.L3 clr.w %d0 subq.l #1,%d0 jcc .L3 .L9: move.l (%a1)+,(%a0)+ subq.l #1,%d0 jne .L9 .L14: move.l (%a1)+,(%a0)+ subq.l #1,%d0 jne .L14
.L3: move.l (%a1)+,(%a0)+ # ta pętla wykona się max. 65536 razy dbra %d0,.L3 clr.w %d0 # ta część wykona się (size / 65536) razy, więc się nie liczy subq.l #1,%d0 jcc .L3
@sanjyuubi, post #1
#include <exec/types.h> void memcopy_0(LONG *src, LONG *dst, WORD n) { WORD i; for (i = 0; i < n; i++) { dst[i] = src[i]; } } __regargs void memcopy_1(LONG *src, LONG *dst, WORD n) { WORD i; for (i = 0; i < n; i++) { *dst++ = *src++; } } __regargs void memcopy_2(LONG *src, LONG *dst, WORD n) { while (n--) { *dst++ = *src++; } }
@cahir, post #2
#include <sys/types.h> int memcmp(const void *s1, const void *s2, size_t n) { if (n != 0) { const unsigned char *p1 = s1, *p2 = s2; do { if (*p1++ != *p2++) return (*--p1 - *--p2); } while (--n != 0); } return 0; }
int ffs(int mask) { int bit; unsigned r = mask; static const int t[16] = { -28, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 }; bit = 0; if ((r & 0xffff) == 0) { bit += 16; r >>= 16; } if ((r & 0xff) == 0) { bit += 8; r >>= 8; } if ((r & 0xf) == 0) { bit += 4; r >>= 4; } return (bit + t[r & 0xf]); }
#include <sys/types.h> size_t strlcpy(char *dst, const char *src, size_t siz) { char *d = dst; const char *s = src; size_t n = siz; /* Copy as many bytes as will fit */ if (n != 0) { while (--n != 0) { if ((*d++ = *s++) == '\0') break; } } /* Not enough room in dst, add NUL and traverse rest of src */ if (n == 0) { if (siz != 0) *d = '\0'; /* NUL-terminate dst */ while (*s++) ; } return(s - src - 1); /* count does not include NUL */ }
#include <stddef.h> char *strsep(char **stringp, const char *delim) { char *s; const char *spanp; char c, sc; char *tok; if ((s = *stringp) == NULL) return NULL; for (tok = s;;) { c = *s++; spanp = delim; do { if ((sc = *spanp++) == c) { if (c == 0) s = NULL; else s[-1] = 0; *stringp = s; return tok; } } while (sc != 0); } }
@cahir, post #3
idnt "test.c" opt 0 opt NQLPSMRBT section "CODE",code public _memcmp cnop 0,4 _memcmp movem.l l11,-(a7) move.l (12+l13,a7),d0 beq l10 move.l (4+l13,a7),a3 move.l (8+l13,a7),a2 l9 move.l a3,a1 addq.l #1,a3 move.l a2,a0 addq.l #1,a2 move.l d0,(12+l13,a7) move.b (a1),d1 cmp.b (a0),d1 beq l8 moveq #0,d1 move.b -(a3),d1 moveq #0,d0 move.b -(a2),d0 neg.l d0 add.l d1,d0 bra l1 l8 subq.l #1,d0 bne l9 l10 moveq #0,d0 l1 l11 reg a2/a3 movem.l (a7)+,a2/a3 l13 equ 8 rts ; stacksize=8 opt 0 opt NQLPSMRBT public _ffs cnop 0,4 _ffs movem.l l22,-(a7) move.l (4+l24,a7),d1 moveq #0,d2 move.l #65535,d0 and.l d1,d0 bne l17 add.l #16,d2 moveq #16,d0 lsr.l d0,d1 l17 move.l #255,d0 and.l d1,d0 bne l19 addq.l #8,d2 lsr.l #8,d1 l19 moveq #15,d0 and.l d1,d0 bne l21 addq.l #4,d2 lsr.l #4,d1 l21 moveq #15,d0 and.l d1,d0 lsl.l #2,d0 lea l15,a0 add.l d0,a0 move.l d2,d0 add.l (a0),d0 l22 reg d2 movem.l (a7)+,d2 l24 equ 4 rts ; stacksize=4 cnop 0,4 l15 dc.l -28 dc.l 1 dc.l 2 dc.l 1 dc.l 3 dc.l 1 dc.l 2 dc.l 1 dc.l 4 dc.l 1 dc.l 2 dc.l 1 dc.l 3 dc.l 1 dc.l 2 dc.l 1 opt 0 opt NQLPSMRBT public _strlcpy cnop 0,4 _strlcpy movem.l l44,-(a7) move.l (12+l46,a7),d1 move.l (4+l46,a7),a3 move.l (8+l46,a7),a2 move.l d1,d0 beq l42 subq.l #1,d0 beq l42 l40 move.l a2,a1 addq.l #1,a2 move.l a3,a0 addq.l #1,a3 move.b (a1),(a0) beq l42 subq.l #1,d0 bne l40 l42 tst.l d0 bne l43 tst.l d1 beq l36 move.b #0,(a3) l36 move.l a2,a0 addq.l #1,a2 tst.b (a0) beq l43 l41 move.l a2,a0 addq.l #1,a2 tst.b (a0) bne l41 l43 move.l a2,d0 sub.l (8+l46,a7),d0 subq.l #1,d0 l44 reg a2/a3 movem.l (a7)+,a2/a3 l46 equ 8 rts ; stacksize=8 opt 0 opt NQLPSMRBT public _strsep cnop 0,4 _strsep movem.l l66,-(a7) move.l (8+l68,a7),a5 move.l (4+l68,a7),a3 move.l (a3),a1 move.l a1,d2 bne l49 moveq #0,d0 bra l64 l49 move.l a1,a4 l62 move.l a1,a0 addq.l #1,a1 move.b (a0),d0 move.l a5,a2 l63 move.l a2,a0 addq.l #1,a2 move.b (a0),d1 cmp.b d1,d0 bne l58 tst.b d0 bne l60 move.l #0,a1 bra l61 l60 move.b #0,(-1,a1) l61 move.l a1,(a3) move.l a4,d0 bra l64 l58 tst.b d1 bne l63 bra l62 l64 l66 reg a2/a3/a4/a5/d2 movem.l (a7)+,a2/a3/a4/a5/d2 l68 equ 20 rts ; stacksize=20
@sanjyuubi, post #4
_memcmp: movel d2,sp@- movel sp@(16),d1 jeq L3 movel sp@(8),a0 movel sp@(12),a1 moveq #0,d2 moveq #0,d0 L8: cmpmb a1@+,a0@+ jeq L6 moveb a0@(-1),d2 moveb a1@(-1),d0 subl d0,d2 movel d2,d0 jra L9 L6: subql #1,d1 jne L8 L3: moveq #0,d0 L9: movel sp@+,d2 rts
_t.3: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 _ffs: movel d2,sp@- movel sp@(8),d1 subl a1,a1 tstw d1 jne L3 movew #16,a1 clrw d1 swap d1 L3: tstb d1 jne L4 addql #8,a1 lsrl #8,d1 L4: movel d1,d0 moveq #15,d2 andl d2,d0 jne L5 addql #4,a1 lsrl #4,d1 L5: moveq #15,d0 andl d0,d1 lsll #2,d1 lea (_t.3),a0 movel a1,d0 addl a0@(d1:l),d0 movel sp@+,d2 rts
_strlcpy: movel d2,sp@- movel sp@(12),d2 movel sp@(16),d1 movel sp@(8),a1 movel d2,a0 movel d1,d0 jeq L12 L7: subql #1,d0 jeq L13 moveb a0@+,a1@ tstb a1@+ jne L7 jra L8 L13: tstl d1 jeq L12 clrb a1@ L12: tstb a0@+ jne L12 L8: subl d2,a0 movel a0,d0 subql #1,d0 movel sp@+,d2 rts
_strsep: moveml d2/d3/a2,sp@- movel sp@(16),a2 movel sp@(20),d3 movel a2@,a0 cmpw #0,a0 jne L3 moveq #0,d0 jra L14 L3: movel a0,d2 L4: moveb a0@+,d0 movel d3,a1 L13: moveb a1@+,d1 cmpb d1,d0 jne L9 tstb d1 jne L11 subl a0,a0 jra L12 L11: clrb a0@(-1) L12: movel a0,a2@ movel d2,d0 jra L14 L9: tstb d1 jne L13 jra L4 L14: moveml sp@+,d2/d3/a2 rts
@cahir, post #5
@XoR, post #7
@sanjyuubi, post #8
void memcpy_fast( ULONG *dest, ULONG *src, ULONG size){ size=size/4; while (size--) { *dest++ = *src++; }
memcpy_fast: link.w %fp,#0 move.l 12(%fp),%a0 move.l 16(%fp),%d1 lsr.l #2,%d1 move.l %d1,%d0 subq.l #1,%d0 tst.l %d1 jeq .L5 move.l 8(%fp),%a1 .L7: move.l (%a0)+,(%a1)+ dbra %d0,.L7 clr.w %d0 subq.l #1,%d0 jcc .L7 .L5: unlk %fp rts
@cahir, post #5
#NO_APP .file "test.c" .text .align 2 .globl memcmp .type memcmp, @function memcmp: move.l %d2,-(%sp) move.l 8(%sp),%a1 move.l 12(%sp),%a0 move.l 16(%sp),%d0 jeq .L5 .L4: move.b (%a1)+,%d1 move.b (%a0)+,%d2 cmp.b %d1,%d2 jne .L9 subq.l #1,%d0 jne .L4 move.l (%sp)+,%d2 rts .L5: moveq #0,%d0 move.l (%sp)+,%d2 rts .L9: moveq #0,%d0 move.b %d1,%d0 and.l #255,%d2 sub.l %d2,%d0 move.l (%sp)+,%d2 rts .size memcmp, .-memcmp .align 2 .globl ffs .type ffs, @function ffs: move.l %d2,-(%sp) move.l 8(%sp),%d2 tst.w %d2 jne .L14 clr.w %d2 swap %d2 moveq #16,%d0 .L11: tst.b %d2 jne .L12 addq.l #8,%d0 lsr.l #8,%d2 .L12: moveq #15,%d1 and.l %d2,%d1 jne .L13 addq.l #4,%d0 move.l %d2,%d1 lsr.l #4,%d1 moveq #15,%d2 and.l %d2,%d1 .L13: add.l %d1,%d1 add.l %d1,%d1 lea t.1549,%a0 add.l (%a0,%d1.l),%d0 move.l (%sp)+,%d2 rts .L14: moveq #0,%d0 jra .L11 .size ffs, .-ffs .align 2 .globl strlcpy .type strlcpy, @function strlcpy: move.l %a2,-(%sp) move.l 12(%sp),%d1 move.l 16(%sp),%a0 cmp.w #0,%a0 jeq .L24 move.l 8(%sp),%a1 lea -1(%a1,%a0.l),%a2 move.l %d1,%a0 .L19: cmp.l %a2,%a1 jeq .L35 move.b (%a0)+,%d0 move.b %d0,(%a1)+ jne .L19 .L20: sub.l %d1,%a0 move.l %a0,%d0 subq.l #1,%d0 move.l (%sp)+,%a2 rts .L24: move.l %d1,%a0 .L23: tst.b (%a0)+ jeq .L20 .L34: tst.b (%a0)+ jne .L23 jra .L20 .L35: clr.b (%a2) tst.b (%a0)+ jne .L34 jra .L20 .size strlcpy, .-strlcpy .align 2 .globl strsep .type strsep, @function strsep: movem.l #14368,-(%sp) move.l 20(%sp),%a2 move.l 24(%sp),%d4 move.l (%a2),%d0 jeq .L42 move.l %d0,%a1 move.l %a1,%d3 addq.l #1,%d3 move.b (%a1),%d2 move.l %d4,%a0 .L40: move.b (%a0)+,%d1 cmp.b %d2,%d1 jeq .L47 tst.b %d1 jne .L40 move.l %d3,%a1 move.l %a1,%d3 addq.l #1,%d3 move.b (%a1),%d2 move.l %d4,%a0 jra .L40 .L47: tst.b %d2 jeq .L43 clr.b (%a1) move.l %d3,(%a2) .L37: movem.l (%sp)+,#1052 rts .L43: moveq #0,%d3 move.l %d3,(%a2) jra .L37 .L42: moveq #0,%d0 movem.l (%sp)+,#1052 rts .size strsep, .-strsep .section .rodata .align 2 .type t.1549, @object .size t.1549, 64 t.1549: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .ident "GCC: (Tranaptic-2015/08/21-08:49:49) 5.2.0"
@sanjyuubi, post #12
@asman, post #11
@teh_KaiN, post #14
@cahir, post #18
_memcmp: movel d2,sp@- movel sp@(16),d1 jeq L2 movel sp@(8),a0 movel sp@(12),a1 moveq #0,d2 moveq #0,d0 .even L3: cmpmb a1@+,a0@+ jne L8 subql #1,d1 jne L3 L2: moveq #0,d0 jra L1 .even L8: moveb a0@(-1),d2 moveb a1@(-1),d0 subl d0,d2 movel d2,d0 .even L1: movel sp@+,d2 rts
t.0: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .even .globl _ffs _ffs: movel d2,sp@- movel sp@(8),d1 moveq #0,d2 tstw d1 jne L10 moveb #16,d2 clrw d1 swap d1 L10: tstb d1 jne L11 addql #8,d2 lsrl #8,d1 L11: moveq #15,d0 andl d1,d0 jne L12 addql #4,d2 movel d1,d0 lsrl #4,d0 moveq #15,d1 andl d1,d0 L12: lsll #2,d0 lea (t.0),a0 movel a0@(d0:l),d0 addl d2,d0 movel sp@+,d2 rts
_strlcpy: movel d3,sp@- movel d2,sp@- movel sp@(16),d3 movel sp@(20),d2 movel sp@(12),a1 movel d3,a0 movel d2,d1 jeq L15 .even L25: subql #1,d1 jeq L15 moveb a0@+,d0 moveb d0,a1@+ jne L25 .even L15: tstl d1 jne L20 tstl d2 jeq L22 clrb a1@ .even L22: tstb a0@+ jne L22 L20: subl d3,a0 movel a0,d0 subql #1,d0 movel sp@+,d2 movel sp@+,d3 rts
_strsep: moveml #0x3020,sp@- movel sp@(16),a2 movel sp@(20),d2 movel a2@,a1 moveq #0,d0 cmpw #0,a1 jeq L27 movel a1,d3 .even L29: moveb a1@+,d0 movel d2,a0 moveb a0@+,d1 cmpb d1,d0 jeq L39 .even L33: tstb d1 jeq L29 moveb a0@+,d1 cmpb d1,d0 jne L33 L39: tstb d1 jne L35 subl a1,a1 movel a1,a2@ movel d3,d0 jra L27 .even L35: clrb a1@(-1) movel a1,a2@ movel d3,d0 L27: moveml sp@+,#0x40c rts
@sanjyuubi, post #27
A da radę tym normalnie wygenerować plik wykonywalny dla AOS2.0+?
@teh_KaiN, post #14
może w końcu skończy się cały bulszit o wyższości jednego kompilatora nad drugim i znajdzie się niekwestionowany zwycięzca