14 files changed, 713 insertions, 21 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile
index 4dfe47d3cd91..f1fcbff3da25 100644
--- a/arch/microblaze/lib/Makefile
+++ b/arch/microblaze/lib/Makefile
@@ -11,3 +11,13 @@ lib-y += memcpy.o memmove.o
 endif
 
 lib-y += uaccess_old.o
+
+lib-y += ashldi3.o
+lib-y += ashrdi3.o
+lib-y += divsi3.o
+lib-y += lshrdi3.o
+lib-y += modsi3.o
+lib-y += muldi3.o
+lib-y += mulsi3.o
+lib-y += udivsi3.o
+lib-y += umodsi3.o
diff --git a/arch/microblaze/lib/ashldi3.c b/arch/microblaze/lib/ashldi3.c
new file mode 100644
index 000000000000..beb80f316095
--- /dev/null
+++ b/arch/microblaze/lib/ashldi3.c
@@ -0,0 +1,29 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+long long __ashldi3(long long u, word_type b)
+{
+	DWunion uu, w;
+	word_type bm;
+
+	if (b == 0)
+		return u;
+
+	uu.ll = u;
+	bm = 32 - b;
+
+	if (bm <= 0) {
+		w.s.low = 0;
+		w.s.high = (unsigned int) uu.s.low << -bm;
+	} else {
+		const unsigned int carries = (unsigned int) uu.s.low >> bm;
+
+		w.s.low = (unsigned int) uu.s.low << b;
+		w.s.high = ((unsigned int) uu.s.high << b) | carries;
+	}
+
+	return w.ll;
+}
+
+EXPORT_SYMBOL(__ashldi3);
diff --git a/arch/microblaze/lib/ashrdi3.c b/arch/microblaze/lib/ashrdi3.c
new file mode 100644
index 000000000000..c884a912b660
--- /dev/null
+++ b/arch/microblaze/lib/ashrdi3.c
@@ -0,0 +1,31 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+long long __ashrdi3(long long u, word_type b)
+{
+	DWunion uu, w;
+	word_type bm;
+
+	if (b == 0)
+		return u;
+
+	uu.ll = u;
+	bm = 32 - b;
+
+	if (bm <= 0) {
+		/* w.s.high = 1..1 or 0..0 */
+		w.s.high =
+		    uu.s.high >> 31;
+		w.s.low = uu.s.high >> -bm;
+	} else {
+		const unsigned int carries = (unsigned int) uu.s.high << bm;
+
+		w.s.high = uu.s.high >> b;
+		w.s.low = ((unsigned int) uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
+
+EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/microblaze/lib/divsi3.S b/arch/microblaze/lib/divsi3.S
new file mode 100644
index 000000000000..595b02d6e86b
--- /dev/null
+++ b/arch/microblaze/lib/divsi3.S
@@ -0,0 +1,73 @@
+#include <linux/linkage.h>
+
+/*
+* Divide operation for 32 bit integers.
+*	Input :	Dividend in Reg r5
+*		Divisor in Reg r6
+*	Output: Result in Reg r3
+*/
+	.text
+	.globl	__divsi3
+	.type __divsi3, @function
+	.ent __divsi3
+__divsi3:
+	.frame	r1, 0, r15
+
+	addik	r1, r1, -16
+	swi	r28, r1, 0
+	swi	r29, r1, 4
+	swi	r30, r1, 8
+	swi	r31, r1, 12
+
+	beqi	r6, div_by_zero /* div_by_zero - division error */
+	beqi	r5, result_is_zero /* result is zero */
+	bgeid	r5, r5_pos
+	xor	r28, r5, r6 /* get the sign of the result */
+	rsubi	r5, r5, 0 /* make r5 positive */
+r5_pos:
+	bgei	r6, r6_pos
+	rsubi	r6, r6, 0 /* make r6 positive */
+r6_pos:
+	addik	r30, r0, 0 /* clear mod */
+	addik	r3, r0, 0 /* clear div */
+	addik	r29, r0, 32 /* initialize the loop count */
+
+	/* first part try to find the first '1' in the r5 */
+div0:
+	blti	r5, div2 /* this traps r5 == 0x80000000 */
+div1:
+	add	r5, r5, r5 /* left shift logical r5 */
+	bgtid	r5, div1
+	addik	r29, r29, -1
+div2:
+	/* left shift logical r5 get the '1' into the carry */
+	add	r5, r5, r5
+	addc	r30, r30, r30 /* move that bit into the mod register */
+	rsub	r31, r6, r30 /* try to subtract (r30 a r6) */
+	blti	r31, mod_too_small
+	/* move the r31 to mod since the result was positive */
+	or	r30, r0, r31
+	addik	r3, r3, 1
+mod_too_small:
+	addik	r29, r29, -1
+	beqi	r29, loop_end
+	add	r3, r3, r3 /* shift in the '1' into div */
+	bri	div2 /* div2 */
+loop_end:
+	bgei	r28, return_here
+	brid	return_here
+	rsubi	r3, r3, 0 /* negate the result */
+div_by_zero:
+result_is_zero:
+	or	r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+	lwi	r28, r1, 0
+	lwi	r29, r1, 4
+	lwi	r30, r1, 8
+	lwi	r31, r1, 12
+	rtsd	r15, 8
+	addik	r1, r1, 16
+
+.size __divsi3, . - __divsi3
+.end __divsi3
diff --git a/arch/microblaze/lib/libgcc.h b/arch/microblaze/lib/libgcc.h
new file mode 100644
index 000000000000..05909d58e2fe
--- /dev/null
+++ b/arch/microblaze/lib/libgcc.h
@@ -0,0 +1,25 @@
+#ifndef __ASM_LIBGCC_H
+#define __ASM_LIBGCC_H
+
+#include <asm/byteorder.h>
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+#ifdef __BIG_ENDIAN
+struct DWstruct {
+	int high, low;
+};
+#elif defined(__LITTLE_ENDIAN)
+struct DWstruct {
+	int low, high;
+};
+#else
+#error I feel sick.
+#endif
+
+typedef union {
+	struct DWstruct s;
+	long long ll;
+} DWunion;
+
+#endif /* __ASM_LIBGCC_H */
diff --git a/arch/microblaze/lib/lshrdi3.c b/arch/microblaze/lib/lshrdi3.c
new file mode 100644
index 000000000000..dcf8d6810b7c
--- /dev/null
+++ b/arch/microblaze/lib/lshrdi3.c
@@ -0,0 +1,29 @@
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+long long __lshrdi3(long long u, word_type b)
+{
+	DWunion uu, w;
+	word_type bm;
+
+	if (b == 0)
+		return u;
+
+	uu.ll = u;
+	bm = 32 - b;
+
+	if (bm <= 0) {
+		w.s.high = 0;
+		w.s.low = (unsigned int) uu.s.high >> -bm;
+	} else {
+		const unsigned int carries = (unsigned int) uu.s.high << bm;
+
+		w.s.high = (unsigned int) uu.s.high >> b;
+		w.s.low = ((unsigned int) uu.s.low >> b) | carries;
+	}
+
+	return w.ll;
+}
+
+EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/microblaze/lib/memcpy.c b/arch/microblaze/lib/memcpy.c
index 014bac92bdff..cc495d7d99cc 100644
--- a/arch/microblaze/lib/memcpy.c
+++ b/arch/microblaze/lib/memcpy.c
@@ -33,17 +33,24 @@
 #include <asm/system.h>
 
 #ifdef __HAVE_ARCH_MEMCPY
+#ifndef CONFIG_OPT_LIB_FUNCTION
 void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 {
 	const char *src = v_src;
 	char *dst = v_dst;
-#ifndef CONFIG_OPT_LIB_FUNCTION
+
 	/* Simple, byte oriented memcpy. */
 	while (c--)
 		*dst++ = *src++;
 
 	return v_dst;
-#else
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
+void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
+{
+	const char *src = v_src;
+	char *dst = v_dst;
+
 	/* The following code tries to optimize the copy by using unsigned
 	 * alignment. This will work fine if both source and destination are
 	 * aligned on the same boundary. However, if they are aligned on
@@ -86,7 +93,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x1:	/* Unaligned - Off by 1 */
 			/* Word align the source */
 			i_src = (const void *) ((unsigned)src & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *i_src++ << 8;
 
@@ -95,7 +102,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 				*i_dst++ = buf_hold | value >> 24;
 				buf_hold = value << 8;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*i_src++ & 0xFFFFFF00) >>8;
 
+			for (; c >= 4; c -= 4) {
+				value = *i_src++;
+				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
+				buf_hold = (value & 0xFFFFFF00) >>8;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src -= 3;
@@ -103,7 +119,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x2:	/* Unaligned - Off by 2 */
 			/* Word align the source */
 			i_src = (const void *) ((unsigned)src & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *i_src++ << 16;
 
@@ -112,7 +128,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 				*i_dst++ = buf_hold | value >> 16;
 				buf_hold = value << 16;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*i_src++ & 0xFFFF0000 )>>16;
 
+			for (; c >= 4; c -= 4) {
+				value = *i_src++;
+				*i_dst++ = buf_hold | ((value & 0xFFFF)<<16);
+				buf_hold = (value & 0xFFFF0000) >>16;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src -= 2;
@@ -120,7 +145,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x3:	/* Unaligned - Off by 3 */
 			/* Word align the source */
 			i_src = (const void *) ((unsigned)src & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *i_src++ << 24;
 
@@ -129,7 +154,16 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 				*i_dst++ = buf_hold | value >> 8;
 				buf_hold = value << 24;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*i_src++ & 0xFF000000) >> 24;
 
+			for (; c >= 4; c -= 4) {
+				value = *i_src++;
+				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
+				buf_hold = (value & 0xFF000000) >> 24;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src -= 1;
@@ -150,7 +184,7 @@ void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
 	}
 
 	return v_dst;
-#endif
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memcpy);
 #endif /* __HAVE_ARCH_MEMCPY */
diff --git a/arch/microblaze/lib/memmove.c b/arch/microblaze/lib/memmove.c
index 0929198c5e68..123e3616f2dd 100644
--- a/arch/microblaze/lib/memmove.c
+++ b/arch/microblaze/lib/memmove.c
@@ -31,16 +31,12 @@
 #include <linux/string.h>
 
 #ifdef __HAVE_ARCH_MEMMOVE
+#ifndef CONFIG_OPT_LIB_FUNCTION
 void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 {
 	const char *src = v_src;
 	char *dst = v_dst;
 
-#ifdef CONFIG_OPT_LIB_FUNCTION
-	const uint32_t *i_src;
-	uint32_t *i_dst;
-#endif
-
 	if (!c)
 		return v_dst;
 
@@ -48,7 +44,6 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 	if (v_dst <= v_src)
 		return memcpy(v_dst, v_src, c);
 
-#ifndef CONFIG_OPT_LIB_FUNCTION
 	/* copy backwards, from end to beginning */
 	src += c;
 	dst += c;
@@ -58,7 +53,22 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 		*--dst = *--src;
 
 	return v_dst;
-#else
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
+void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
+{
+	const char *src = v_src;
+	char *dst = v_dst;
+	const uint32_t *i_src;
+	uint32_t *i_dst;
+
+	if (!c)
+		return v_dst;
+
+	/* Use memcpy when source is higher than dest */
+	if (v_dst <= v_src)
+		return memcpy(v_dst, v_src, c);
+
 	/* The following code tries to optimize the copy by using unsigned
 	 * alignment. This will work fine if both source and destination are
 	 * aligned on the same boundary. However, if they are aligned on
@@ -104,7 +114,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x1:	/* Unaligned - Off by 1 */
 			/* Word align the source */
 			i_src = (const void *) (((unsigned)src + 4) & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *--i_src >> 24;
 
@@ -113,7 +123,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 				*--i_dst = buf_hold << 8 | value;
 				buf_hold = value >> 24;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*--i_src & 0xFF) << 24;
 
+			for (; c >= 4; c -= 4) {
+				value = *--i_src;
+				*--i_dst = buf_hold | ((value & 0xFFFFFF00)>>8);
+				buf_hold = (value  & 0xFF) << 24;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src += 1;
@@ -121,7 +140,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x2:	/* Unaligned - Off by 2 */
 			/* Word align the source */
 			i_src = (const void *) (((unsigned)src + 4) & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *--i_src >> 16;
 
@@ -130,7 +149,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 				*--i_dst = buf_hold << 16 | value;
 				buf_hold = value >> 16;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*--i_src & 0xFFFF) << 16;
 
+			for (; c >= 4; c -= 4) {
+				value = *--i_src;
+				*--i_dst = buf_hold | ((value & 0xFFFF0000)>>16);
+				buf_hold = (value & 0xFFFF) << 16;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src += 2;
@@ -138,7 +166,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 		case 0x3:	/* Unaligned - Off by 3 */
 			/* Word align the source */
 			i_src = (const void *) (((unsigned)src + 4) & ~3);
-
+#ifndef __MICROBLAZEEL__
 			/* Load the holding buffer */
 			buf_hold = *--i_src >> 8;
 
@@ -147,7 +175,16 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 				*--i_dst = buf_hold << 24 | value;
 				buf_hold = value >> 8;
 			}
+#else
+			/* Load the holding buffer */
+			buf_hold = (*--i_src & 0xFFFFFF) << 8;
 
+			for (; c >= 4; c -= 4) {
+				value = *--i_src;
+				*--i_dst = buf_hold | ((value & 0xFF000000)>> 24);
+				buf_hold = (value & 0xFFFFFF) << 8;;
+			}
+#endif
 			/* Realign the source */
 			src = (const void *)i_src;
 			src += 3;
@@ -169,7 +206,7 @@ void *memmove(void *v_dst, const void *v_src, __kernel_size_t c)
 		*--dst = *--src;
 	}
 	return v_dst;
-#endif
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memmove);
 #endif /* __HAVE_ARCH_MEMMOVE */
diff --git a/arch/microblaze/lib/memset.c b/arch/microblaze/lib/memset.c
index ecfb663e1fc1..834565d1607e 100644
--- a/arch/microblaze/lib/memset.c
+++ b/arch/microblaze/lib/memset.c
@@ -31,17 +31,30 @@
 #include <linux/string.h>
 
 #ifdef __HAVE_ARCH_MEMSET
+#ifndef CONFIG_OPT_LIB_FUNCTION
+void *memset(void *v_src, int c, __kernel_size_t n)
+{
+	char *src = v_src;
+
+	/* Truncate c to 8 bits */
+	c = (c & 0xFF);
+
+	/* Simple, byte oriented memset or the rest of count. */
+	while (n--)
+		*src++ = c;
+
+	return v_src;
+}
+#else /* CONFIG_OPT_LIB_FUNCTION */
 void *memset(void *v_src, int c, __kernel_size_t n)
 {
 	char *src = v_src;
-#ifdef CONFIG_OPT_LIB_FUNCTION
 	uint32_t *i_src;
 	uint32_t w32 = 0;
-#endif
+
 	/* Truncate c to 8 bits */
 	c = (c & 0xFF);
 
-#ifdef CONFIG_OPT_LIB_FUNCTION
 	if (unlikely(c)) {
 		/* Make a repeating word out of it */
 		w32 = c;
@@ -72,12 +85,13 @@ void *memset(void *v_src, int c, __kernel_size_t n)
 
 		src  = (void *)i_src;
 	}
-#endif
+
 	/* Simple, byte oriented memset or the rest of count. */
 	while (n--)
 		*src++ = c;
 
 	return v_src;
 }
+#endif /* CONFIG_OPT_LIB_FUNCTION */
 EXPORT_SYMBOL(memset);
 #endif /* __HAVE_ARCH_MEMSET */
diff --git a/arch/microblaze/lib/modsi3.S b/arch/microblaze/lib/modsi3.S
new file mode 100644
index 000000000000..84e0bee6e8c7
--- /dev/null
+++ b/arch/microblaze/lib/modsi3.S
@@ -0,0 +1,73 @@
+#include <linux/linkage.h>
+
+/*
+* modulo operation for 32 bit integers.
+*	Input :	op1 in Reg r5
+*		op2 in Reg r6
+*	Output: op1 mod op2 in Reg r3
+*/
+
+	.text
+	.globl	__modsi3
+	.type __modsi3,  @function
+	.ent __modsi3
+
+__modsi3:
+	.frame	r1, 0, r15
+
+	addik	r1, r1, -16
+	swi	r28, r1, 0
+	swi	r29, r1, 4
+	swi	r30, r1, 8
+	swi	r31, r1, 12
+
+	beqi	r6, div_by_zero /* div_by_zero division error */
+	beqi	r5, result_is_zero /* result is zero */
+	bgeid	r5, r5_pos
+	/* get the sign of the result [ depends only on the first arg] */
+	add	r28, r5, r0
+	rsubi	r5, r5, 0	 /* make r5 positive */
+r5_pos:
+	bgei	r6, r6_pos
+	rsubi	r6, r6, 0	 /* make r6 positive */
+r6_pos:
+	addik	r3, r0, 0 /* clear mod */
+	addik	r30, r0, 0 /* clear div */
+	addik	r29, r0, 32 /* initialize the loop count */
+/* first part try to find the first '1' in the r5 */
+div1:
+	add	r5, r5, r5 /* left shift logical r5 */
+	bgeid	r5, div1
+	addik	r29, r29, -1
+div2:
+	/* left shift logical r5 get the '1' into the carry */
+	add	r5, r5, r5
+	addc	r3, r3, r3 /* move that bit into the mod register */
+	rsub	r31, r6, r3 /* try to subtract (r30 a r6) */
+	blti	r31, mod_too_small
+	/* move the r31 to mod since the result was positive */
+	or	r3, r0, r31
+	addik	r30, r30, 1
+mod_too_small:
+	addik	r29, r29, -1
+	beqi	r29, loop_end
+	add	r30, r30, r30 /* shift in the '1' into div */
+	bri	div2 /* div2 */
+loop_end:
+	bgei	r28, return_here
+	brid	return_here
+	rsubi	r3, r3, 0 /* negate the result */
+div_by_zero:
+result_is_zero:
+	or	r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+	lwi	r28, r1, 0
+	lwi	r29, r1, 4
+	lwi	r30, r1, 8
+	lwi	r31, r1, 12
+	rtsd	r15, 8
+	addik	r1, r1, 16
+
+.size __modsi3,  . - __modsi3
+.end __modsi3
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
new file mode 100644
index 000000000000..ceeaa8c407f2
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.S
@@ -0,0 +1,121 @@
+#include <linux/linkage.h>
+
+/*
+ * Multiply operation for 64 bit integers, for devices with hard multiply
+ *	Input :	Operand1[H] in Reg r5
+ *		Operand1[L] in Reg r6
+ *		Operand2[H] in Reg r7
+ *		Operand2[L] in Reg r8
+ *	Output: Result[H] in Reg r3
+ *		Result[L] in Reg r4
+ *
+ * Explaination:
+ *
+ *	Both the input numbers are divided into 16 bit number as follows
+ *		op1 = A B C D
+ *		op2 = E F G H
+ *	result = D * H
+ *		 + (C * H + D * G) << 16
+ *		 + (B * H + C * G + D * F) << 32
+ *		 + (A * H + B * G + C * F + D * E) << 48
+ *
+ *	Only 64 bits of the output are considered
+ */
+
+	.text
+	.globl	__muldi3
+	.type __muldi3, @function
+	.ent __muldi3
+
+__muldi3:
+	addi	r1, r1, -40
+
+/* Save the input operands on the caller's stack */
+	swi	r5, r1, 44
+	swi	r6, r1, 48
+	swi	r7, r1, 52
+	swi	r8, r1, 56
+
+/* Store all the callee saved registers */
+	sw	r20, r1, r0
+	swi	r21, r1, 4
+	swi	r22, r1, 8
+	swi	r23, r1, 12
+	swi	r24, r1, 16
+	swi	r25, r1, 20
+	swi	r26, r1, 24
+	swi	r27, r1, 28
+
+/* Load all the 16 bit values for A thru H */
+	lhui	r20, r1, 44 /* A */
+	lhui	r21, r1, 46 /* B */
+	lhui	r22, r1, 48 /* C */
+	lhui	r23, r1, 50 /* D */
+	lhui	r24, r1, 52 /* E */
+	lhui	r25, r1, 54 /* F */
+	lhui	r26, r1, 56 /* G */
+	lhui	r27, r1, 58 /* H */
+
+/* D * H ==> LSB of the result on stack ==> Store1 */
+	mul	r9, r23, r27
+	swi	r9, r1, 36 /* Pos2 and Pos3 */
+
+/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
+/* Store the carry generated in position 2 for Pos 3 */
+	lhui	r11, r1, 36 /* Pos2 */
+	mul	r9, r22, r27 /* C * H */
+	mul	r10, r23, r26 /* D * G */
+	add	r9, r9, r10
+	addc	r12, r0, r0
+	add	r9, r9, r11
+	addc	r12, r12, r0 /* Store the Carry */
+	shi	r9, r1, 36 /* Store Pos2 */
+	swi	r9, r1, 32
+	lhui	r11, r1, 32
+	shi	r11, r1, 34 /* Store Pos1 */
+
+/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
+	mul	r9, r21, r27 /* B * H */
+	mul	r10, r22, r26 /* C * G */
+	mul	r7, r23, r25 /* D * F */
+	add	r9, r9, r11
+	add	r9, r9, r10
+	add	r9, r9, r7
+	swi	r9, r1, 32 /* Pos0 and Pos1 */
+
+/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
+	lhui	r11, r1, 32 /* Pos0 */
+	mul	r9, r20, r27 /* A * H */
+	mul	r10, r21, r26 /* B * G */
+	mul	r7, r22, r25 /* C * F */
+	mul	r8, r23, r24 /* D * E */
+	add	r9, r9, r11
+	add 	r9, r9, r10
+	add	r9, r9, r7
+	add	r9, r9, r8
+	sext16	r9, r9 /* Sign extend the MSB */
+	shi	r9, r1, 32
+
+/* Move results to r3 and r4 */
+	lhui	r3, r1, 32
+	add	r3, r3, r12
+	shi	r3, r1, 32
+	lwi	r3, r1, 32 /* Hi Part */
+	lwi	r4, r1, 36 /* Lo Part */
+
+/* Restore Callee saved registers */
+	lw	r20, r1, r0
+	lwi	r21, r1, 4
+	lwi	r22, r1, 8
+	lwi	r23, r1, 12
+	lwi	r24, r1, 16
+	lwi	r25, r1, 20
+	lwi	r26, r1, 24
+	lwi	r27, r1, 28
+
+/* Restore Frame and return */
+	rtsd	r15, 8
+	addi	r1, r1, 40
+
+.size __muldi3, . - __muldi3
+.end __muldi3
diff --git a/arch/microblaze/lib/mulsi3.S b/arch/microblaze/lib/mulsi3.S
new file mode 100644
index 000000000000..90bd7b93afe6
--- /dev/null
+++ b/arch/microblaze/lib/mulsi3.S
@@ -0,0 +1,46 @@
+#include <linux/linkage.h>
+
+/*
+ * Multiply operation for 32 bit integers.
+ *	Input :	Operand1 in Reg r5
+ *		Operand2 in Reg r6
+ *	Output: Result [op1 * op2] in Reg r3
+ */
+	.text
+	.globl	__mulsi3
+	.type __mulsi3,  @function
+	.ent __mulsi3
+
+__mulsi3:
+	.frame	r1, 0, r15
+	add	r3, r0, r0
+	beqi	r5, result_is_zero /* multiply by zero */
+	beqi	r6, result_is_zero /* multiply by zero */
+	bgeid	r5, r5_pos
+	xor	r4, r5, r6 /* get the sign of the result */
+	rsubi	r5, r5, 0 /* make r5 positive */
+r5_pos:
+	bgei	r6, r6_pos
+	rsubi	r6, r6, 0 /* make r6 positive */
+r6_pos:
+	bri	l1
+l2:
+	add	r5, r5, r5
+l1:
+	srl	r6, r6
+	addc	r7, r0, r0
+	beqi	r7, l2
+	bneid	r6, l2
+	add	r3, r3, r5
+	blti	r4, negateresult
+	rtsd	r15, 8
+	nop
+negateresult:
+	rtsd	r15, 8
+	rsub	r3, r3, r0
+result_is_zero:
+	rtsd	r15, 8
+	addi	r3, r0, 0
+
+.size __mulsi3,  . - __mulsi3
+.end __mulsi3
diff --git a/arch/microblaze/lib/udivsi3.S b/arch/microblaze/lib/udivsi3.S
new file mode 100644
index 000000000000..64cf57e4bb85
--- /dev/null
+++ b/arch/microblaze/lib/udivsi3.S
@@ -0,0 +1,84 @@
+#include <linux/linkage.h>
+
+/*
+* Unsigned divide operation.
+*	Input :	Divisor in Reg r5
+*		Dividend in Reg r6
+*	Output: Result in Reg r3
+*/
+
+	.text
+	.globl	__udivsi3
+	.type __udivsi3, @function
+	.ent __udivsi3
+
+__udivsi3:
+
+	.frame	r1, 0, r15
+
+	addik	r1, r1, -12
+	swi	r29, r1, 0
+	swi	r30, r1, 4
+	swi	r31, r1, 8
+
+	beqi	r6, div_by_zero /* div_by_zero /* division error */
+	beqid	r5, result_is_zero /* result is zero */
+	addik	r30, r0, 0 /* clear mod */
+	addik	r29, r0, 32 /* initialize the loop count */
+
+/* check if r6 and r5 are equal - if yes, return 1 */
+	rsub	r18, r5, r6
+	beqid	r18, return_here
+	addik	r3, r0, 1
+
+/* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */
+	xor	r18, r5, r6
+	bgeid	r18, 16
+	add	r3, r0, r0 /* we would anyways clear r3 */
+	blti	r6, return_here /* r6[bit 31 = 1] hence is greater */
+	bri	checkr6
+	rsub	r18, r6, r5 /* microblazecmp */
+	blti	r18, return_here
+
+/* if r6 [bit 31] is set, then return result as 1 */
+checkr6:
+	bgti	r6, div0
+	brid	return_here
+	addik	r3, r0, 1
+
+/* first part try to find the first '1' in the r5 */
+div0:
+	blti	r5, div2
+div1:
+	add	r5, r5, r5 /* left shift logical r5 */
+	bgtid	r5, div1
+	addik	r29, r29, -1
+div2:
+/* left shift logical r5 get the '1' into the carry */
+	add	r5, r5, r5
+	addc	r30, r30, r30 /* move that bit into the mod register */
+	rsub	r31, r6, r30 /* try to subtract (r30 a r6) */
+	blti	r31, mod_too_small
+/* move the r31 to mod since the result was positive */
+	or	r30, r0, r31
+	addik	r3, r3, 1
+mod_too_small:
+	addik	r29, r29, -1
+	beqi	r29, loop_end
+	add	r3, r3, r3 /* shift in the '1' into div */
+	bri	div2 /* div2 */
+loop_end:
+	bri	return_here
+div_by_zero:
+result_is_zero:
+	or	r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+	lwi	r29, r1, 0
+	lwi	r30, r1, 4
+	lwi	r31, r1, 8
+	rtsd	r15, 8
+	addik	r1, r1, 12
+
+.size __udivsi3, . - __udivsi3
+.end __udivsi3
diff --git a/arch/microblaze/lib/umodsi3.S b/arch/microblaze/lib/umodsi3.S
new file mode 100644
index 000000000000..17d16bafae58
--- /dev/null
+++ b/arch/microblaze/lib/umodsi3.S
@@ -0,0 +1,86 @@
+#include <linux/linkage.h>
+
+/*
+ * Unsigned modulo operation for 32 bit integers.
+ *	Input :	op1 in Reg r5
+ *		op2 in Reg r6
+ *	Output: op1 mod op2 in Reg r3
+ */
+
+	.text
+	.globl	__umodsi3
+	.type __umodsi3, @function
+	.ent __umodsi3
+
+__umodsi3:
+	.frame	r1, 0, r15
+
+	addik	r1, r1, -12
+	swi	r29, r1, 0
+	swi	r30, r1, 4
+	swi	r31, r1, 8
+
+	beqi	r6, div_by_zero /* div_by_zero - division error */
+	beqid	r5, result_is_zero /* result is zero */
+	addik	r3, r0, 0 /* clear div */
+	addik	r30, r0, 0 /* clear mod */
+	addik	r29, r0, 32 /* initialize the loop count */
+
+/* check if r6 and r5 are equal /* if yes, return 0 */
+	rsub	r18, r5, r6
+	beqi	r18, return_here
+
+/* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */
+	xor	r18, r5, r6
+	bgeid	r18, 16
+	addik	r3, r5, 0
+	blti	r6, return_here
+	bri	$lcheckr6
+	rsub	r18, r5, r6 /* microblazecmp */
+	bgti	r18, return_here
+
+/* if r6 [bit 31] is set, then return result as r5-r6 */
+$lcheckr6:
+	bgtid	r6, div0
+	addik	r3, r0, 0
+	addik	r18, r0, 0x7fffffff
+	and	r5, r5, r18
+	and	r6, r6, r18
+	brid	return_here
+	rsub	r3, r6, r5
+/* first part: try to find the first '1' in the r5 */
+div0:
+	blti	r5, div2
+div1:
+	add	r5, r5, r5 /* left shift logical r5 */
+	bgeid	r5, div1
+	addik	r29, r29, -1
+div2:
+	/* left shift logical r5 get the '1' into the carry */
+	add	r5, r5, r5
+	addc	r3, r3, r3 /* move that bit into the mod register */
+	rsub	r31, r6, r3 /* try to subtract (r3 a r6) */
+	blti	r31, mod_too_small
+	/* move the r31 to mod since the result was positive */
+	or	r3, r0, r31
+	addik	r30, r30, 1
+mod_too_small:
+	addik	r29, r29, -1
+	beqi	r29, loop_end
+	add	r30, r30, r30 /* shift in the '1' into div */
+	bri	div2 /* div2 */
+loop_end:
+	bri	return_here
+div_by_zero:
+result_is_zero:
+	or	r3, r0, r0 /* set result to 0 */
+return_here:
+/* restore values of csrs and that of r3 and the divisor and the dividend */
+	lwi	r29, r1, 0
+	lwi	r30, r1, 4
+	lwi	r31, r1, 8
+	rtsd	r15, 8
+	addik	r1, r1, 12
+
+.size __umodsi3, . - __umodsi3
+.end __umodsi3