void memcpy_fast( UBYTE *dest, UBYTE *src, ULONG size){
while (size--) {
*dest++ = *src++;
}_memcpy_fast movem.l l60,-(a7) move.l (8+l62,a7),a3 move.l (4+l62,a7),a2 move.l (12+l62,a7),d1 bra l58 l57 move.l a3,a0 addq.l #1,a3 move.l a2,a1 addq.l #1,a2 move.b (a0),(a1) l58 move.l d1,d0 subq.l #1,d1 tst.l d0 bne l57 l59 l56 l60 reg a2/a3 movem.l (a7)+,a2/a3 l62 equ 8 rts
l57 move.l (24+l62,a7),(4+l62,a7) addq.l #1,(24+l62,a7) move.l (20+l62,a7),(8+l62,a7) addq.l #1,(20+l62,a7) move.l (4+l62,a7),a0 move.l (8+l62,a7),a1 move.b (a0),(a1) move.l (28+l62,a7),(12+l62,a7) subq.l #1,(28+l62,a7) tst.l (12+l62,a7) bne l57
while (size) {
*dest = *src;
src++; dest++;
size--;
}l57 move.b (a1)+,(a0)+ subq.l #1,d0 l58 tst.l d0 bne l57
l57 move.l (8+l62,a7),a0 move.l (4+l62,a7),a1 move.b (a0),(a1) addq.l #1,(8+l62,a7) addq.l #1,(4+l62,a7) subq.l #1,(12+l62,a7) bne l57
l60 move.b (a1)+,(a0)+ subq.l #1,d0 bne l60
while(size--) {
*dst++ = *src++;
}
while(size) {
*dst = *src;
dst++; src++; size--;
}
do {
*dst++ = *src++;
} while(--size);.L3: move.l (%a1)+,(%a0)+ dbra %d0,.L3 clr.w %d0 subq.l #1,%d0 jcc .L3 .L9: move.l (%a1)+,(%a0)+ subq.l #1,%d0 jne .L9 .L14: move.l (%a1)+,(%a0)+ subq.l #1,%d0 jne .L14
.L3: move.l (%a1)+,(%a0)+ # ta pętla wykona się max. 65536 razy dbra %d0,.L3 clr.w %d0 # ta część wykona się (size / 65536) razy, więc się nie liczy subq.l #1,%d0 jcc .L3
@sanjyuubi, post #1
#include <exec/types.h>
void memcopy_0(LONG *src, LONG *dst, WORD n) {
WORD i;
for (i = 0; i < n; i++) {
dst[i] = src[i];
}
}
__regargs void memcopy_1(LONG *src, LONG *dst, WORD n) {
WORD i;
for (i = 0; i < n; i++) {
*dst++ = *src++;
}
}
__regargs void memcopy_2(LONG *src, LONG *dst, WORD n) {
while (n--) {
*dst++ = *src++;
}
}@cahir, post #2
#include <sys/types.h>
int memcmp(const void *s1, const void *s2, size_t n) {
if (n != 0) {
const unsigned char *p1 = s1, *p2 = s2;
do {
if (*p1++ != *p2++)
return (*--p1 - *--p2);
} while (--n != 0);
}
return 0;
}int ffs(int mask) {
int bit;
unsigned r = mask;
static const int t[16] = { -28, 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1 };
bit = 0;
if ((r & 0xffff) == 0) {
bit += 16;
r >>= 16;
}
if ((r & 0xff) == 0) {
bit += 8;
r >>= 8;
}
if ((r & 0xf) == 0) {
bit += 4;
r >>= 4;
}
return (bit + t[r & 0xf]);
}#include <sys/types.h>
size_t strlcpy(char *dst, const char *src, size_t siz) {
char *d = dst;
const char *s = src;
size_t n = siz;
/* Copy as many bytes as will fit */
if (n != 0) {
while (--n != 0) {
if ((*d++ = *s++) == '\0')
break;
}
}
/* Not enough room in dst, add NUL and traverse rest of src */
if (n == 0) {
if (siz != 0)
*d = '\0'; /* NUL-terminate dst */
while (*s++)
;
}
return(s - src - 1); /* count does not include NUL */
}#include <stddef.h>
char *strsep(char **stringp, const char *delim) {
char *s;
const char *spanp;
char c, sc;
char *tok;
if ((s = *stringp) == NULL)
return NULL;
for (tok = s;;) {
c = *s++;
spanp = delim;
do {
if ((sc = *spanp++) == c) {
if (c == 0)
s = NULL;
else
s[-1] = 0;
*stringp = s;
return tok;
}
} while (sc != 0);
}
} @cahir, post #3
idnt "test.c" opt 0 opt NQLPSMRBT section "CODE",code public _memcmp cnop 0,4 _memcmp movem.l l11,-(a7) move.l (12+l13,a7),d0 beq l10 move.l (4+l13,a7),a3 move.l (8+l13,a7),a2 l9 move.l a3,a1 addq.l #1,a3 move.l a2,a0 addq.l #1,a2 move.l d0,(12+l13,a7) move.b (a1),d1 cmp.b (a0),d1 beq l8 moveq #0,d1 move.b -(a3),d1 moveq #0,d0 move.b -(a2),d0 neg.l d0 add.l d1,d0 bra l1 l8 subq.l #1,d0 bne l9 l10 moveq #0,d0 l1 l11 reg a2/a3 movem.l (a7)+,a2/a3 l13 equ 8 rts ; stacksize=8 opt 0 opt NQLPSMRBT public _ffs cnop 0,4 _ffs movem.l l22,-(a7) move.l (4+l24,a7),d1 moveq #0,d2 move.l #65535,d0 and.l d1,d0 bne l17 add.l #16,d2 moveq #16,d0 lsr.l d0,d1 l17 move.l #255,d0 and.l d1,d0 bne l19 addq.l #8,d2 lsr.l #8,d1 l19 moveq #15,d0 and.l d1,d0 bne l21 addq.l #4,d2 lsr.l #4,d1 l21 moveq #15,d0 and.l d1,d0 lsl.l #2,d0 lea l15,a0 add.l d0,a0 move.l d2,d0 add.l (a0),d0 l22 reg d2 movem.l (a7)+,d2 l24 equ 4 rts ; stacksize=4 cnop 0,4 l15 dc.l -28 dc.l 1 dc.l 2 dc.l 1 dc.l 3 dc.l 1 dc.l 2 dc.l 1 dc.l 4 dc.l 1 dc.l 2 dc.l 1 dc.l 3 dc.l 1 dc.l 2 dc.l 1 opt 0 opt NQLPSMRBT public _strlcpy cnop 0,4 _strlcpy movem.l l44,-(a7) move.l (12+l46,a7),d1 move.l (4+l46,a7),a3 move.l (8+l46,a7),a2 move.l d1,d0 beq l42 subq.l #1,d0 beq l42 l40 move.l a2,a1 addq.l #1,a2 move.l a3,a0 addq.l #1,a3 move.b (a1),(a0) beq l42 subq.l #1,d0 bne l40 l42 tst.l d0 bne l43 tst.l d1 beq l36 move.b #0,(a3) l36 move.l a2,a0 addq.l #1,a2 tst.b (a0) beq l43 l41 move.l a2,a0 addq.l #1,a2 tst.b (a0) bne l41 l43 move.l a2,d0 sub.l (8+l46,a7),d0 subq.l #1,d0 l44 reg a2/a3 movem.l (a7)+,a2/a3 l46 equ 8 rts ; stacksize=8 opt 0 opt NQLPSMRBT public _strsep cnop 0,4 _strsep movem.l l66,-(a7) move.l (8+l68,a7),a5 move.l (4+l68,a7),a3 move.l (a3),a1 move.l a1,d2 bne l49 moveq #0,d0 bra l64 l49 move.l a1,a4 l62 move.l a1,a0 addq.l #1,a1 move.b (a0),d0 move.l a5,a2 l63 move.l a2,a0 addq.l #1,a2 move.b (a0),d1 cmp.b d1,d0 bne l58 tst.b d0 bne l60 move.l #0,a1 bra l61 l60 move.b #0,(-1,a1) l61 move.l a1,(a3) move.l a4,d0 bra l64 l58 tst.b d1 bne l63 bra l62 l64 l66 reg a2/a3/a4/a5/d2 movem.l (a7)+,a2/a3/a4/a5/d2 l68 equ 20 rts ; stacksize=20
@sanjyuubi, post #4
_memcmp: movel d2,sp@- movel sp@(16),d1 jeq L3 movel sp@(8),a0 movel sp@(12),a1 moveq #0,d2 moveq #0,d0 L8: cmpmb a1@+,a0@+ jeq L6 moveb a0@(-1),d2 moveb a1@(-1),d0 subl d0,d2 movel d2,d0 jra L9 L6: subql #1,d1 jne L8 L3: moveq #0,d0 L9: movel sp@+,d2 rts
_t.3: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 _ffs: movel d2,sp@- movel sp@(8),d1 subl a1,a1 tstw d1 jne L3 movew #16,a1 clrw d1 swap d1 L3: tstb d1 jne L4 addql #8,a1 lsrl #8,d1 L4: movel d1,d0 moveq #15,d2 andl d2,d0 jne L5 addql #4,a1 lsrl #4,d1 L5: moveq #15,d0 andl d0,d1 lsll #2,d1 lea (_t.3),a0 movel a1,d0 addl a0@(d1:l),d0 movel sp@+,d2 rts
_strlcpy: movel d2,sp@- movel sp@(12),d2 movel sp@(16),d1 movel sp@(8),a1 movel d2,a0 movel d1,d0 jeq L12 L7: subql #1,d0 jeq L13 moveb a0@+,a1@ tstb a1@+ jne L7 jra L8 L13: tstl d1 jeq L12 clrb a1@ L12: tstb a0@+ jne L12 L8: subl d2,a0 movel a0,d0 subql #1,d0 movel sp@+,d2 rts
_strsep: moveml d2/d3/a2,sp@- movel sp@(16),a2 movel sp@(20),d3 movel a2@,a0 cmpw #0,a0 jne L3 moveq #0,d0 jra L14 L3: movel a0,d2 L4: moveb a0@+,d0 movel d3,a1 L13: moveb a1@+,d1 cmpb d1,d0 jne L9 tstb d1 jne L11 subl a0,a0 jra L12 L11: clrb a0@(-1) L12: movel a0,a2@ movel d2,d0 jra L14 L9: tstb d1 jne L13 jra L4 L14: moveml sp@+,d2/d3/a2 rts
@cahir, post #5
@XoR, post #7
@sanjyuubi, post #8
void memcpy_fast( ULONG *dest, ULONG *src, ULONG size){
size=size/4;
while (size--) {
*dest++ = *src++;
}memcpy_fast: link.w %fp,#0 move.l 12(%fp),%a0 move.l 16(%fp),%d1 lsr.l #2,%d1 move.l %d1,%d0 subq.l #1,%d0 tst.l %d1 jeq .L5 move.l 8(%fp),%a1 .L7: move.l (%a0)+,(%a1)+ dbra %d0,.L7 clr.w %d0 subq.l #1,%d0 jcc .L7 .L5: unlk %fp rts
@cahir, post #5
#NO_APP .file "test.c" .text .align 2 .globl memcmp .type memcmp, @function memcmp: move.l %d2,-(%sp) move.l 8(%sp),%a1 move.l 12(%sp),%a0 move.l 16(%sp),%d0 jeq .L5 .L4: move.b (%a1)+,%d1 move.b (%a0)+,%d2 cmp.b %d1,%d2 jne .L9 subq.l #1,%d0 jne .L4 move.l (%sp)+,%d2 rts .L5: moveq #0,%d0 move.l (%sp)+,%d2 rts .L9: moveq #0,%d0 move.b %d1,%d0 and.l #255,%d2 sub.l %d2,%d0 move.l (%sp)+,%d2 rts .size memcmp, .-memcmp .align 2 .globl ffs .type ffs, @function ffs: move.l %d2,-(%sp) move.l 8(%sp),%d2 tst.w %d2 jne .L14 clr.w %d2 swap %d2 moveq #16,%d0 .L11: tst.b %d2 jne .L12 addq.l #8,%d0 lsr.l #8,%d2 .L12: moveq #15,%d1 and.l %d2,%d1 jne .L13 addq.l #4,%d0 move.l %d2,%d1 lsr.l #4,%d1 moveq #15,%d2 and.l %d2,%d1 .L13: add.l %d1,%d1 add.l %d1,%d1 lea t.1549,%a0 add.l (%a0,%d1.l),%d0 move.l (%sp)+,%d2 rts .L14: moveq #0,%d0 jra .L11 .size ffs, .-ffs .align 2 .globl strlcpy .type strlcpy, @function strlcpy: move.l %a2,-(%sp) move.l 12(%sp),%d1 move.l 16(%sp),%a0 cmp.w #0,%a0 jeq .L24 move.l 8(%sp),%a1 lea -1(%a1,%a0.l),%a2 move.l %d1,%a0 .L19: cmp.l %a2,%a1 jeq .L35 move.b (%a0)+,%d0 move.b %d0,(%a1)+ jne .L19 .L20: sub.l %d1,%a0 move.l %a0,%d0 subq.l #1,%d0 move.l (%sp)+,%a2 rts .L24: move.l %d1,%a0 .L23: tst.b (%a0)+ jeq .L20 .L34: tst.b (%a0)+ jne .L23 jra .L20 .L35: clr.b (%a2) tst.b (%a0)+ jne .L34 jra .L20 .size strlcpy, .-strlcpy .align 2 .globl strsep .type strsep, @function strsep: movem.l #14368,-(%sp) move.l 20(%sp),%a2 move.l 24(%sp),%d4 move.l (%a2),%d0 jeq .L42 move.l %d0,%a1 move.l %a1,%d3 addq.l #1,%d3 move.b (%a1),%d2 move.l %d4,%a0 .L40: move.b (%a0)+,%d1 cmp.b %d2,%d1 jeq .L47 tst.b %d1 jne .L40 move.l %d3,%a1 move.l %a1,%d3 addq.l #1,%d3 move.b (%a1),%d2 move.l %d4,%a0 jra .L40 .L47: tst.b %d2 jeq .L43 clr.b (%a1) move.l %d3,(%a2) .L37: movem.l (%sp)+,#1052 rts .L43: moveq #0,%d3 move.l %d3,(%a2) jra .L37 .L42: moveq #0,%d0 movem.l (%sp)+,#1052 rts .size strsep, .-strsep .section .rodata .align 2 .type t.1549, @object .size t.1549, 64 t.1549: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .ident "GCC: (Tranaptic-2015/08/21-08:49:49) 5.2.0"
@sanjyuubi, post #12
@asman, post #11
@teh_KaiN, post #14
@cahir, post #18
_memcmp: movel d2,sp@- movel sp@(16),d1 jeq L2 movel sp@(8),a0 movel sp@(12),a1 moveq #0,d2 moveq #0,d0 .even L3: cmpmb a1@+,a0@+ jne L8 subql #1,d1 jne L3 L2: moveq #0,d0 jra L1 .even L8: moveb a0@(-1),d2 moveb a1@(-1),d0 subl d0,d2 movel d2,d0 .even L1: movel sp@+,d2 rts
t.0: .long -28 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .long 4 .long 1 .long 2 .long 1 .long 3 .long 1 .long 2 .long 1 .even .globl _ffs _ffs: movel d2,sp@- movel sp@(8),d1 moveq #0,d2 tstw d1 jne L10 moveb #16,d2 clrw d1 swap d1 L10: tstb d1 jne L11 addql #8,d2 lsrl #8,d1 L11: moveq #15,d0 andl d1,d0 jne L12 addql #4,d2 movel d1,d0 lsrl #4,d0 moveq #15,d1 andl d1,d0 L12: lsll #2,d0 lea (t.0),a0 movel a0@(d0:l),d0 addl d2,d0 movel sp@+,d2 rts
_strlcpy: movel d3,sp@- movel d2,sp@- movel sp@(16),d3 movel sp@(20),d2 movel sp@(12),a1 movel d3,a0 movel d2,d1 jeq L15 .even L25: subql #1,d1 jeq L15 moveb a0@+,d0 moveb d0,a1@+ jne L25 .even L15: tstl d1 jne L20 tstl d2 jeq L22 clrb a1@ .even L22: tstb a0@+ jne L22 L20: subl d3,a0 movel a0,d0 subql #1,d0 movel sp@+,d2 movel sp@+,d3 rts
_strsep: moveml #0x3020,sp@- movel sp@(16),a2 movel sp@(20),d2 movel a2@,a1 moveq #0,d0 cmpw #0,a1 jeq L27 movel a1,d3 .even L29: moveb a1@+,d0 movel d2,a0 moveb a0@+,d1 cmpb d1,d0 jeq L39 .even L33: tstb d1 jeq L29 moveb a0@+,d1 cmpb d1,d0 jne L33 L39: tstb d1 jne L35 subl a1,a1 movel a1,a2@ movel d3,d0 jra L27 .even L35: clrb a1@(-1) movel a1,a2@ movel d3,d0 L27: moveml sp@+,#0x40c rts
@sanjyuubi, post #27
A da radę tym normalnie wygenerować plik wykonywalny dla AOS2.0+?
@teh_KaiN, post #14
może w końcu skończy się cały bulszit o wyższości jednego kompilatora nad drugim i znajdzie się niekwestionowany zwycięzca