summaryrefslogtreecommitdiff
path: root/drivers/staging/skein/threefish256Block.c
blob: da3b8357e47f79cee8bda74a14b9ab4a43c42525 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#include <linux/string.h>
#include <threefishApi.h>


void threefishEncrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output)
{
	u64 b0 = input[0], b1 = input[1],
	  b2 = input[2], b3 = input[3];
	u64 k0 = keyCtx->key[0], k1 = keyCtx->key[1],
	  k2 = keyCtx->key[2], k3 = keyCtx->key[3],
	  k4 = keyCtx->key[4];
	u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1],
	  t2 = keyCtx->tweak[2];

	b1 += k1 + t0; b0 += b1 + k0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k3; b2 += b3 + k2 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k2 + t1; b0 += b1 + k1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k4 + 1; b2 += b3 + k3 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k3 + t2; b0 += b1 + k2; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k0 + 2; b2 += b3 + k4 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k4 + t0; b0 += b1 + k3; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k1 + 3; b2 += b3 + k0 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k0 + t1; b0 += b1 + k4; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k2 + 4; b2 += b3 + k1 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k1 + t2; b0 += b1 + k0; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k3 + 5; b2 += b3 + k2 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k2 + t0; b0 += b1 + k1; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k4 + 6; b2 += b3 + k3 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k3 + t1; b0 += b1 + k2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k0 + 7; b2 += b3 + k4 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k4 + t2; b0 += b1 + k3; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k1 + 8; b2 += b3 + k0 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k0 + t0; b0 += b1 + k4; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k2 + 9; b2 += b3 + k1 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k1 + t1; b0 += b1 + k0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k3 + 10; b2 += b3 + k2 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k2 + t2; b0 += b1 + k1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k4 + 11; b2 += b3 + k3 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k3 + t0; b0 += b1 + k2; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k0 + 12; b2 += b3 + k4 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k4 + t1; b0 += b1 + k3; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k1 + 13; b2 += b3 + k0 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k0 + t2; b0 += b1 + k4; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k2 + 14; b2 += b3 + k1 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k1 + t0; b0 += b1 + k0; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k3 + 15; b2 += b3 + k2 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	b1 += k2 + t1; b0 += b1 + k1; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
	b3 += k4 + 16; b2 += b3 + k3 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
	b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
	b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
	b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
	b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
	b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
	b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
	b1 += k3 + t2; b0 += b1 + k2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
	b3 += k0 + 17; b2 += b3 + k4 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
	b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
	b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
	b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
	b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
	b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
	b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;

	output[0] = b0 + k3;
	output[1] = b1 + k4 + t0;
	output[2] = b2 + k0 + t1;
	output[3] = b3 + k1 + 18;
}

void threefishDecrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output)
{
	u64 b0 = input[0], b1 = input[1],
	  b2 = input[2], b3 = input[3];
	u64 k0 = keyCtx->key[0], k1 = keyCtx->key[1],
	  k2 = keyCtx->key[2], k3 = keyCtx->key[3],
	  k4 = keyCtx->key[4];
	u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1],
	  t2 = keyCtx->tweak[2];

	u64 tmp;

	b0 -= k3;
	b1 -= k4 + t0;
	b2 -= k0 + t1;
	b3 -= k1 + 18;
	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k2; b1 -= k3 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k4 + t0; b3 -= k0 + 17;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k1; b1 -= k2 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k3 + t2; b3 -= k4 + 16;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k0; b1 -= k1 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k2 + t1; b3 -= k3 + 15;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k4; b1 -= k0 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k1 + t0; b3 -= k2 + 14;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k3; b1 -= k4 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k0 + t2; b3 -= k1 + 13;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k2; b1 -= k3 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k4 + t1; b3 -= k0 + 12;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k1; b1 -= k2 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k3 + t0; b3 -= k4 + 11;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k0; b1 -= k1 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k2 + t2; b3 -= k3 + 10;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k4; b1 -= k0 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k1 + t1; b3 -= k2 + 9;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k3; b1 -= k4 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k0 + t0; b3 -= k1 + 8;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k2; b1 -= k3 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k4 + t2; b3 -= k0 + 7;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k1; b1 -= k2 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k3 + t1; b3 -= k4 + 6;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k0; b1 -= k1 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k2 + t0; b3 -= k3 + 5;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k4; b1 -= k0 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k1 + t2; b3 -= k2 + 4;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k3; b1 -= k4 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k0 + t1; b3 -= k1 + 3;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k2; b1 -= k3 + t2;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k4 + t0; b3 -= k0 + 2;

	tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k1; b1 -= k2 + t1;
	tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k3 + t2; b3 -= k4 + 1;
	tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1;
	tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
	tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
	tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
	tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k0; b1 -= k1 + t0;
	tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k2 + t1; b3 -= k3;

	output[0] = b0;
	output[1] = b1;
	output[2] = b2;
	output[3] = b3;
}