summaryrefslogtreecommitdiff
path: root/arch/cris/arch-v10/lib/checksumcopy.S
blob: 35cbffb306fd857fa8c072727dd8c8f853146c66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/* $Id: checksumcopy.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
 * A fast checksum+copy routine using movem
 * Copyright (c) 1998, 2001 Axis Communications AB
 *
 * Authors:	Bjorn Wesen
 * 
 * csum_partial_copy_nocheck(const char *src, char *dst,
 *		             int len, unsigned int sum)
 */

	.globl	csum_partial_copy_nocheck
csum_partial_copy_nocheck:	
	
	;; r10 - src
	;; r11 - dst
	;; r12 - length
	;; r13 - checksum

	;; check for breakeven length between movem and normal word looping versions
	;; we also do _NOT_ want to compute a checksum over more than the 
	;; actual length when length < 40
	
	cmpu.w	80, $r12
	blo	_word_loop
	nop

	;; need to save the registers we use below in the movem loop
	;; this overhead is why we have a check above for breakeven length
	;; only r0 - r8 have to be saved, the other ones are clobber-able
	;; according to the ABI
	
	subq	9*4, $sp
	movem	$r8, [$sp]
	
	;; do a movem copy and checksum

	subq	10*4, $r12	; update length for the first loop
	
_mloop:	movem	[$r10+],$r9	; read 10 longwords
1:	;; A failing userspace access will have this as PC.
	movem	$r9,[$r11+]	; write 10 longwords

	;; perform dword checksumming on the 10 longwords
	
	add.d	$r0,$r13
	ax
	add.d	$r1,$r13
	ax
	add.d	$r2,$r13
	ax
	add.d	$r3,$r13
	ax
	add.d	$r4,$r13
	ax
	add.d	$r5,$r13
	ax
	add.d	$r6,$r13
	ax
	add.d	$r7,$r13
	ax
	add.d	$r8,$r13
	ax
	add.d	$r9,$r13

	;; fold the carry into the checksum, to avoid having to loop the carry
	;; back into the top
	
	ax
	addq	0,$r13
	ax			; do it again, since we might have generated a carry
	addq	0,$r13

	subq	10*4,$r12
	bge	_mloop
	nop

	addq	10*4,$r12	; compensate for last loop underflowing length

	movem	[$sp+],$r8	; restore regs

_word_loop:
	;; only fold if there is anything to fold.

	cmpq	0,$r13
	beq	_no_fold

	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
	;; r9 can be used as temporary.
	
	move.d	$r13,$r9
	lsrq	16,$r9		; r0 = checksum >> 16
	and.d	0xffff,$r13	; checksum = checksum & 0xffff
	add.d	$r9,$r13	; checksum += r0
	move.d	$r13,$r9	; do the same again, maybe we got a carry last add
	lsrq	16,$r9
	and.d	0xffff,$r13
	add.d	$r9,$r13
	
_no_fold:
	cmpq	2,$r12
	blt	_no_words
	nop
	
	;; copy and checksum the rest of the words
	
	subq	2,$r12
	
_wloop:	move.w	[$r10+],$r9
2:	;; A failing userspace access will have this as PC.
	addu.w	$r9,$r13
	subq	2,$r12
	bge	_wloop
	move.w	$r9,[$r11+]
	
	addq	2,$r12
		
_no_words:
	;; see if we have one odd byte more
	cmpq	1,$r12
	beq	_do_byte
	nop
	ret
	move.d	$r13, $r10

_do_byte:	
	;; copy and checksum the last byte
	move.b	[$r10],$r9
3:	;; A failing userspace access will have this as PC.
	addu.b	$r9,$r13
	move.b	$r9,[$r11]
	ret
	move.d	$r13, $r10