Big Integer Arithmetic
Most of computer applications operates on small integers which can be represented by 32 bit integers. However, there are times we’ll need to deal with extremely big integers which can only be represented by hundreds of bits. One of such application is cryptography.
This post gives two simple implementations of the big integer arithmetic. Both implementations use char array to store the big integers. The first one stores the digits of the integer as char, and the second one stores the digits as single digit integer values.
String Representation
We define a big integer as the structure below,
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
For example, given two input integers “111222333444555” and “12345”, we can store them into two BNs a and b, with a.length = 15, a.dg[0] = ‘1’, a.dg[1]=’1’ …, a.dg[14]=’5’, a.sign = 1; b.length=5, b.dg[0]=’1’, b.dg[1]=’2’…a.dg[4]=’5’, b.sign=1.
Addition: We add the two integers digit by digit from right to left. If the sum of the two digits is bigger than ‘9’. Then a carry of 1 is produced for the next position. And the carry is added to the sum of the digits at the next position. We store the digits of the result from left to right and then reverse it after it’s done.
Subtraction: Subtraction is similar to addition. We minus the digits from the two integers from right to left. If the resulted digit is less than 0, we borrow 1 from the next position.
Multiplication: A naive approach is to use addition to handle multiplication. But if both inputs are big, then the algorithm is very slow.
A better approach is to handle a single digit multiplication using addition, then shift the multiplicand by 1 (which is equivalent to x10) and handle the next digit multiplication.
For example, if the two inputs are 12345 and 57, we treat the multiplication as 12345 x 7 + 123450 x 5, then each of the two multiplications can be handled using additions. In this way, the number of additions needed are reduced to the sum of all digits of the multiplier.
Division: Again the naive approach is to use subtraction. A better alternative is trying to subtract the divisor from part of the dividend from right to left. For example, to compute 234 / 11, we start by computing 23 / 11 using subtraction. The quotient is 2 and the remainder is 1. We then compute (1*10 + 4) / 11 using subtraction, the quotient will be 1. So the result is 21.
Below is a C implementation of the algorithms described above,
/**
array implementation of bignum
**/
#include <stdio.h>
#include <string.h>
#define MAXDG 500
#define PLUS 1
#define MINUS -1
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
int cmp(BN a, BN b);
void sub(BN a, BN b, BN *c);
void add(BN a, BN b, BN *c);
void reverse(BN *c);
void rm_tzeros(BN *c);
void rm_lzeros(BN *c);
void init(BN *a);
int cmp(BN a, BN b) {
if (a.sign == PLUS && b.sign == MINUS) {
return 1;
} else if (a.sign == MINUS && b.sign == PLUS) {
return -1;
} else {
//a and b have same sign
if (a.length > b.length) {
return 1*a.sign;
} else if (a.length < b.length) {
return -1*a.sign;
} else {
//a and b have same sign, same length
int i;
for (i = 0; i < a.length; ++i) {
if (a.dg[i] > b.dg[i]) {
return 1*a.sign;
} else if (a.dg[i] < b.dg[i]) {
return -1*a.sign;
}
}
return 0; //equal
}
}
}
void reverse(BN *c) {
char tmp;
int i;
for (i = 0; i < c->length/2; ++i) {
tmp = c->dg[i];
c->dg[i] = c->dg[c->length-i-1];
c->dg[c->length-i-1] = tmp;
}
}
void rm_lzeros(BN *c) {
int i, j;
for (i = 0; i < c->length; ++i) {
if (c->dg[i] != '0') {
break;
}
}
for (j = 0; i < c->length; ++i, ++j) {
c->dg[j] = c->dg[i];
}
c->length = j == 0?1:j;
c->dg[c->length] = '\0';
}
void rm_tzeros(BN *c) {
int i, j = 0;
for (i = c->length-1; i > 0; --i) {
if (c->dg[i] == '0') {
j++;
} else {
break;
}
}
c->length -= j;
c->dg[c->length] = '\0';
}
void sub(BN a, BN b, BN *c) {
int i, j, k;
int borrow = 0;
if (a.sign == MINUS) {
if (b.sign == MINUS) {
sub(b, a, c);
c->sign *= -1;
} else {
b.sign = MINUS;
add(a, b, c);
}
return;
} else {
if (b.sign == MINUS) {
b.sign = PLUS;
add(a, b, c);
return;
}
}
//a.sign == PLUS, b.sign == PLUS
if (cmp(a, b) < 0) {
sub(b, a, c);
c->sign = MINUS;
return;
} else if (cmp(a, b) == 0) {
c->dg[0] = '0';
c->dg[1] = '\0';
c->length = 1;
c->sign = PLUS;
return;
}
//a.sign == PLUS, b.sign == PLUS, a > b
c->sign = PLUS;
for (i = a.length-1, j = b.length-1, k = 0; i >= 0 && j >= 0; --i, --j, ++k) {
c->dg[k] = a.dg[i] - borrow - b.dg[j];
if (c->dg[k] < 0) {
c->dg[k] += 10;
borrow = 1;
} else {
borrow = 0;
}
c->dg[k] += '0';
}
for (; i >= 0; --i, ++k) {
c->dg[k] = a.dg[i] - borrow;
if (c->dg[k] - '0' < 0) {
c->dg[k] += 10;
borrow = 1;
} else {
borrow = 0;
}
}
c->dg[k] = '\0';
c->length = k;
//printf("%s\n", c->dg);
rm_tzeros(c);
//printf("%s\n", c->dg);
reverse(c);
}
void add(BN a, BN b, BN *c) {
int i, j, k;
int carry = 0;
// printf("%d:%d\n", a.length, b.length);
if (a.sign == b.sign) {
c->sign = a.sign;
} else {
if (a.sign == MINUS) {
a.sign = PLUS; //tmp
sub(b, a, c);
} else {
b.sign = PLUS; //tmp
sub(a, b, c);
}
return;
}
for (i = a.length-1, j = b.length-1, k = 0; i >= 0 && j >= 0; --i, --j, ++k) {
c->dg[k] = a.dg[i] -'0' + b.dg[j] - '0' + carry;
//printf("%d %d %c %c\n",k, c->dg[k], a.dg[i], b.dg[j]);
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
// printf("%d %c\n", k, (*c->dg[k]);
}
for (; i >= 0; --i, ++k){
if (carry != 0) {
c->dg[k] = carry + a.dg[i] - '0';
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
} else {
c->dg[k] = a.dg[i];
}
}
for (; j >= 0; --j, ++k){
if (carry != 0) {
c->dg[k] = carry + b.dg[j] - '0';
carry = c->dg[k]/10;
c->dg[k] = c->dg[k]%10 + '0';
} else {
c->dg[k] = b.dg[j];
}
}
if (carry != 0) {
c->dg[k++] = carry + '0';
}
c->dg[k] = '\0';
c->length = k;
//printf("%d %s\n", c->length, c->dg);
reverse(c);
//printf("%d %s\n", c->length, c->dg);
}
void lshift(BN *a, int num) {
int i;
if ((a->length == 1) && (a->dg[0] == '0')) {
return;
}
for (i = 0; i < num; ++i) {
a->dg[i+a->length] = '0';
}
a->length += num;
a->dg[a->length] = '\0';
}
void mul(BN a, BN b, BN *c) {
int i, j;
init(c);
for (i = a.length - 1; i >= 0; --i) {
for (j = 0; j < a.dg[i]-'0'; ++j) {
//printf("add %s to %s\n", b.dg, c->dg);
add(*c, b, c);
}
lshift(&b, 1);
//printf("%s %s\n", c->dg, b.dg);
}
c->sign = a.sign*b.sign;
}
void div(BN a, BN b, BN *c) {
int i;
BN tmp;
init(c);
init(&tmp);
for (i = 0; i < a.length; ++i) {
//printf("bf: %s\n", tmp.dg);
if (i > 0) {
lshift(&tmp, 1);
} else {
tmp.length = 1;
}
//printf("af: %s\n", tmp.dg);
tmp.dg[tmp.length-1] = a.dg[i];
c->dg[i] = '0';
//printf("%d b: %d:%d:%s %d:%d:%s\n", i, tmp.sign, tmp.length, tmp.dg, b.sign, b.length, b.dg);
rm_lzeros(&tmp);
while (cmp(tmp, b) >= 0) {
//printf("sub: %s %s\n", tmp.dg, b.dg);
sub(tmp, b, &tmp);
c->dg[i]++;
}
}
c->dg[i] = '\0';
c->length = a.length;
//printf("%d %s\n", i, c->dg);
rm_lzeros(c);
//printf("%s\n", c->dg);
}
void init(BN *a) {
a->sign = PLUS;
a->length = 1;
a->dg[0] = '0';
a->dg[1] = '\0';
}
int main(int argc, char** argv) {
BN m, n, rv;
if (argv[1][0] == '-') {
m.sign = MINUS;
sprintf(m.dg, "%s", &(argv[1][1]));
} else {
m.sign = PLUS;
sprintf(m.dg, "%s", argv[1]);
}
m.length = strlen(m.dg);
if (argv[2][0] == '-') {
n.sign = MINUS;
sprintf(n.dg, "%s", &(argv[2][1]));
} else {
n.sign = PLUS;
sprintf(n.dg, "%s", argv[2]);
}
n.length = strlen(n.dg);
printf("m:%c%s\n", m.sign>0?' ':'-', m.dg);
printf("n:%c%s\n", n.sign>0?' ':'-', n.dg);
add(m, n, &rv);
printf("m + n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
sub(m, n, &rv);
printf("m - n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
lshift(&rv, 1);
printf("m << 1 = %c%s\n", rv.sign>0?' ':'-', rv.dg);
mul(m, n, &rv);
printf("m * n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
div(m, n, &rv);
printf("m / n = %c%s\n", rv.sign>0?' ':'-', rv.dg);
}
Save the code to bignumarray.c, and compile the code using the command below,
gcc -o bignumarray bignumarray.c
And here are some simple tests,
./bignumarray 34123432143214321 1342
m: 34123432143214321
n: 1342
m + n = 34123432143215663
m - n = 34123432143212979
m << 1 = 341234321432129790
m * n = 45793645936193618782
m / n = 25427296678997
./bignumarray 332 13424312432
m: 332
n: 13424312432
m + n = 13424312764
m - n = -13424312100
m << 1 = -134243121000
m * n = 4456871727424
m / n = 0
Digit Array Representation
The big integer is still defined as below,
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
But now the dg holds the values for each digit. For example, given two inputs 12345 and 678, then the two big integers are a, b, with a.length = 5, a.sign = 1, and a.dg[0]=5, a.dg[1]=4, a.dg[2]=3, a.dg[3]=2, a.dg[4]=1; b.length=3, b.sign = 1, and b.dg[0]=8, b.dg[1]=7, b.dg[2]=6. Note that the digits are stored in a reverse manner because it is more convenient to manipulate the end of an array than the start of an array (e.g. truncate the array by 1).
The arithmetic operations are similar to the previous implementation. Below is the sample C code.
#include <stdio.h>
#include <string.h>
#define MAXDG 500
#define PLUS 1
#define MINUS -1
typedef struct {
int sign;
int length;
char dg[MAXDG];
} BN;
void add(BN a, BN b, BN *c);
void mul(BN a, BN b, BN *c);
void div(BN a, BN b, BN *c);
void sub(BN a, BN b, BN *c);
void digit_shift(BN *a, int d);
int cmp(BN a, BN b);
void zero_justify(BN* a);
void init(BN *a);
void printbn(BN a);
void printbn(BN a) {
int i;
//printf("length: %d ", a.length);
if (a.sign == MINUS) {
printf("-");
}
for (i = a.length-1; i >= 0; --i) {
printf("%c", '0' + a.dg[i]);
}
printf("\n");
}
void init(BN *a) {
a->length = 1;
memset(a->dg, 0x00, sizeof(char)*MAXDG);
a->sign = PLUS;
}
void zero_justify(BN* a) {
while ((a->length > 1) && (a->dg[a->length-1] == 0)) {
(a->length)--;
}
//if -0, make it +0
if ((a->length == 1) && (a->dg[0] == 0)) {
a->sign = PLUS;
}
}
int cmp(BN a, BN b) {
if (a.sign == PLUS && b.sign == MINUS) {
return 1;
} else if (a.sign == MINUS && b.sign == PLUS) {
return -1;
} else {
//a and b have same sign
if (a.length > b.length) {
return 1*a.sign;
} else if (a.length < b.length) {
return -1*a.sign;
} else {
//a and b have same sign, same length
int i;
for (i = a.length-1; i >= 0; --i) {
if (a.dg[i] > b.dg[i]) {
return 1*a.sign;
} else if (a.dg[i] < b.dg[i]) {
return -1*a.sign;
}
}
return 0; //equal
}
}
}
void digit_shift(BN *a, int d) {
int i;
if ((a->length == 1) && (a->dg[0] == 0)) {
return;
}
for (i = a->length - 1; i >= 0; --i) {
a->dg[i+d] = a->dg[i];
}
for (i = 0; i < d; ++i) {
a->dg[i] = 0;
}
a->length = a->length + d;
}
void add(BN a, BN b, BN *c) {
int i;
int carry = 0;
init(c);
if (a.sign == b.sign) {
c->sign = a.sign;
} else {
if (a.sign == MINUS) {
a.sign = PLUS;
sub(b, a, c);
} else {
b.sign = PLUS;
sub(a, b, c);
}
return;
}
c->length = (a.length > b.length)?a.length+1:b.length+1;
for (i = 0; i < c->length; ++i) {
c->dg[i] = (char)(a.dg[i] + b.dg[i] + carry)%10;
carry = (a.dg[i] + b.dg[i] + carry)/10;
}
zero_justify(c);
}
void sub(BN a, BN b, BN *c) {
int borrow;
int v;
int i;
init(c);
if ((a.sign == MINUS) || (b.sign == MINUS)) {
b.sign = -1*b.sign;
add(a, b, c);
return;
}
if (cmp(a, b) < 0) {
sub(b, a, c);
c->sign = MINUS;
return;
}
//a > b, and both a and b are +
c->length = a.length > b.length?a.length:b.length;
borrow = 0;
for (i = 0; i < c->length; ++i) {
c->dg[i] = (a.dg[i] - borrow - b.dg[i]);
if (c->dg[i] < 0) {
c->dg[i] += 10;
borrow = 1;
} else {
borrow = 0;
}
}
zero_justify(c);
}
void mul(BN a, BN b, BN *c) {
BN row, tmp;
int i,j;
init(c);
for (i = 0; i < b.length; ++i) {
for (j = 0; j < b.dg[i]; ++j) {
add(*c, a, c);
}
digit_shift(&a, 1);
}
zero_justify(c);
}
void div(BN a, BN b, BN *c) {
int i;
BN tmp;
init(c);
init(&tmp);
c->sign = a.sign * b.sign;
a.sign = PLUS;
b.sign = PLUS;
c->length = a.length;
for (i = a.length - 1; i >= 0; --i) {
digit_shift(&tmp, 1);
//printf("%d:%d\n", tmp.length, b.length);
//printbn(tmp);
tmp.dg[0] = a.dg[i];
c->dg[i] = 0;
while (cmp(tmp, b) >= 0) {
(c->dg[i])++;
//printbn(tmp);
sub(tmp, b, &tmp);
//printbn(tmp);
}
//printf("div: %d\n", c->dg[i]);
}
zero_justify(c);
}
void readbn(char *s, BN *a) {
int i, j;
init(a);
// printf("readbn: %d\n", strlen(s));
for (i = strlen(s)-1, j = 0; i >= 0; --i) {
a->dg[j++] = s[i] - '0';
}
a->length = j;
}
int main(int argc, char** argv) {
BN m, n, rv;
if (argv[1][0] == '-') {
m.sign = MINUS;
readbn(&argv[1][1], &m);
} else {
m.sign = PLUS;
readbn(argv[1], &m);
}
if (argv[2][0] == '-') {
n.sign = MINUS;
readbn(&argv[2][1], &n);
} else {
n.sign = PLUS;
readbn(argv[2], &n);
}
printf("m:");
printbn(m);
printf("n:");
printbn(n);
add(m, n, &rv);
printf("m + n = ");
printbn(rv);
sub(m, n, &rv);
printf("m - n = ");
printbn(rv);
digit_shift(&rv, 1);
printf("m << 1 = ");
printbn(rv);
mul(m, n, &rv);
printf("m * n = ");
printbn(rv);
div(m, n, &rv);
printf("m / n = ");
printbn(rv);
}
Save the code to bignumarray2.c, and compile the code using the command below,
gcc -o bignumarray2 bignumarray2.c
And here are some simple tests,
./bignumarray2 34123432143214321 1342
m:34123432143214321
n:1342
m + n = 34123432143215663
m - n = 34123432143212979
m << 1 = 341234321432129790
m * n = 45793645936193618782
m / n = 25427296678997
./bignumarray2 332 13424312432
m:332
n:13424312432
m + n = 13424312764
m - n = -13424312100
m << 1 = -134243121000
m * n = 4456871727424
m / n = 0
Additional Notes
1. Dynamic array is necessary in order to support arbitrary length of big integers.
2. Using a single byte to represent a single digit is a huge waste. There’re better representations.
References:
1. Programming Challenges
Leave a Reply Cancel reply
40% Discount on My Book — Android NDK Cookbook
Android NDK Cookbook ebook 40% discount with promotion code MREANC40 at Packt Publishing The promotion code is valid until 15th June.Categories
- Android Apps (18)
- Android Audio Editor (1)
- TS 2 (3)
- Video Converter Android (8)
- Video2Gif (1)
- Android Tutorial (27)
- Android Dev Tools (1)
- API illustrated (8)
- Multimedia API (3)
- ffmpeg on Android (4)
- NDK (6)
- UI (6)
- Animation (2)
- Code Snippet (2)
- Coding Beyond Technique (18)
- a word, a world (4)
- Bug Rectified (4)
- Programming Habit (1)
- Software as a Career (1)
- Software as User Experience (1)
- Compilers and Related (2)
- ELF (2)
- Computer Languages (31)
- C/C++ (13)
- Java (9)
- JavaScript (2)
- PHP (1)
- Python (8)
- Data Structure & Algorithms (29)
- Bits (1)
- Data Structure (5)
- Integers (10)
- BigInteger (1)
- Prime (4)
- Search (3)
- Sorting (5)
- Strings (5)
- Database (1)
- SQLite (1)
- Digital Signal Processing (33)
- Distributed Systems (17)
- Apache Cassandra (6)
- Apache Hadoop (8)
- Apache Avro (3)
- Apache Nutch (3)
- Apache Solr (1)
- Linux Study Notes (40)
- crontab (1)
- Linux Kernel Programming (8)
- Linux Programming (12)
- IPC (2)
- Linux Network Programming (5)
- Linux Signals (2)
- Linux Shell Scripting (1)
- ssh (3)
- Machinery (30)
- misc (1)
- My Ideas (1)
- My Project (3)
- Mobile Caching (1)
- Selective Decoding (2)
- My Publication (1)
- My Readings (1)
- Networking (15)
- Program for Performance (8)
- Uncategorized (1)
- Virtual Machine (2)
- Web Dev (8)
- web components (3)
- Android Apps (18)
Recent Comments
Archives
- May 2013 (2)
- April 2013 (1)
- March 2013 (4)
- December 2012 (2)
- November 2012 (6)
- October 2012 (6)
- September 2012 (3)
- August 2012 (13)
- July 2012 (15)
- June 2012 (3)
- May 2012 (8)
- April 2012 (4)
- March 2012 (13)
- February 2012 (19)
- January 2012 (9)
- December 2011 (11)
- November 2011 (12)
- October 2011 (4)
- September 2011 (12)
- August 2011 (16)
- July 2011 (15)
- June 2011 (6)
- May 2011 (10)
- April 2011 (13)
- March 2011 (20)
- February 2011 (4)
- November 2010 (2)
- May 2010 (1)
- April 2010 (1)
- February 2010 (1)




