During school I made a Huffman encoder and decoder that has come back and haunted me on several notable occasions – namely, those interviews. Microsoft was especially excited to test me knowledge on how well I could improve this program. My interviewer was able to improve the running time so that the decoding was done in O(1) time using a pre-built array – that is something I have yet to implement in this version.
Hope you like it: hack it, modify it, find bugs, make bugs, do whatever with this. Just make sure you send me the upgraded versions!
Here is the full version here packaged with all the mentioned files and tests: Huffman.zip
Makefile
# Last Modified: Sat Jan 10 14:01:43 PST 2009 # @author sholsapp CC=gcc SUS3=-D_POSIX_SOURCE -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=600 HARDEN=-D_FORTIFY_SOURCE CFLAGS=-Wall -g -std=c99 -pedantic $(SUS3) $(HARDEN) LDFLAGS= ALL=huff-e huff-d all: $(ALL) huff-e: huff-e.o $(CC) $(LDFLAGS) -o $@ $^ huff-e.o: huff-e.c huff.h $(CC) $(CFLAGS) -c $< huff-d: huff-d.o $(CC) $(LDFLAGS) -o $@ $^ huff-d.o: huff-d.c huff.h $(CC) $(CFLAGS) -c $< test: MALLOC_CHECK=1 valgrind --leak-check=full --show-reachable=yes ./huff-e tests/test.in clean: rm -rf core* *.o *.gch $(ALL)
huff.h
/** * @file huff.h * @author sholsapp */ #include stdio.h #include stdlib.h #include unistd.h #include sys/types.h #include sys/stat.h #include fcntl.h #include ctype.h #include string.h #define TRUE 1 #define FALSE 0 #define CHARSET 256 #define EXTLEN 6 #define BYTE 8 /** * Defines thislnode to be also called lnode */ typedef struct thislnode lnode; /** * Defines the thislnode structure. * * Used for managing linked lists and trees. */ struct thislnode { char item; int count; /*list properties*/ lnode* next; /*tree properties*/ lnode* left; lnode* right; char isLeaf; /*encoded properties*/ char* pattern; }thislnode; /** * @param the lnode* pointing to the root of this tree * @param the char* containing the current string map * @param the int representing the depth of the tree * @param the char** to the lookup table we're creating */ void generate(lnode* root, char* pattern, int depth, char** lookup) { char temp[CHARSET]; strcpy(temp, pattern); if (root->left->left != NULL) { strcat(temp, "0"); generate(root->left, temp, depth+1, lookup); } if (root->left->isLeaf) { char* newpattern = (char*)malloc(sizeof(char)*depth+1); strcpy(newpattern, temp); strcat(newpattern, "0"); root->left->pattern = newpattern; lookup[(int)root->left->item] = newpattern; } if (root->right->left != NULL) { temp[depth-1] = '\0'; strcat(temp, "1"); generate(root->right, temp, depth + 1, lookup); } if (root->right->isLeaf) { temp[depth-1] = '\0'; char* newpattern = (char*)malloc(sizeof(char)*depth+1); strcpy(newpattern, temp); strcat(newpattern, "1"); root->right->pattern = newpattern; lookup[(int)root->right->item] = newpattern; } } /** * @param the char* to a pattern of BYTE many characters. */ int packbits(char* pattern) { int result = 0; int i = 0; for (i = 0; i < BYTE; i++) { if (pattern[i] == '1') { result = result | 1; result = result<<(i != (BYTE-1) ? 1 : 0); } else if (pattern[i] == '0') { result = result<<(i != (BYTE-1) ? 1 : 0); } } return result; } /** * @param the int to be converted to ascii 0 and 1. */ char* unpackbits(int bits) { char* result = (char*)malloc(sizeof(char)*BYTE); unsigned char mask = 0x80; int i = 0; for(i = 0; i < BYTE; i++) { result[i] = (bits&mask) == 0 ? '0' : '1'; bits = bits<<1; } return result; }
huff-e.c
/** * @file huff-e.c * * This program generates a binary file from an ASCII text file. * * @author sholsapp */ #include "huff.h" /** * The main thing. * @param the int argc the number of command line arguments. * @param the char* array holding command line tokens. */ int main (int argc, char* argv[]) { FILE* input = stdin; char** lookup = (char**)malloc(sizeof(char*)*CHARSET); int in; const char* temppath = "huff-e.orig.temp"; FILE* tempfile = fopen(temppath, "w+"); unlink(temppath); if (tempfile == NULL) { fprintf(stderr, "Error creating %s.\n", temppath); exit(EXIT_FAILURE); } lnode* staticlist = (lnode*)malloc(sizeof(lnode)); lnode* head = NULL; lnode* current = NULL; lnode* temp = NULL; lnode* working = NULL; lnode* prev = NULL; int done = 1; for (done = 1; done < argc; done++) { input = fopen(argv[done], "r"); if (input == NULL) { fprintf(stderr, "Error opening file %d\n", done); exit(EXIT_FAILURE); } int arr[CHARSET] = {0}; int count = 0; while (((in = fgetc(input))) != EOF) { if (fputc(in, tempfile) == EOF) { fprintf(stderr, "Error writing temp file.\n"); exit(EXIT_FAILURE); } arr[in] = arr[in] + 1; count++; } int itemcount = count-1; if(fseek(tempfile, 0, SEEK_SET)) { fprintf(stderr, "Error seeking on %s.\n", temppath); } int i = 0; int maxi = 0; int max = 0; while (count > 0) { for (i = 0; i < CHARSET; i++) { if (arr[i] > max && arr[i] != 0) { maxi = i; max = arr[i]; } } count = count - max; if (head == NULL) { head = (lnode*)malloc(sizeof(lnode)); head->item = maxi; head->count = max; head->isLeaf = TRUE; head->next = NULL; } else { current = (lnode*)malloc(sizeof(lnode)); current->item = maxi; current->count = max; current->isLeaf = TRUE; temp = head; staticlist = head = current; current->next = temp; } arr[maxi] = 0; max = 0; } //build a binary tree while (head->next != NULL) { temp = head; head = head->next; working = (lnode*)malloc(sizeof(lnode)); working->count = temp->count + head->count; working->left = temp; working->right = head; working->isLeaf = FALSE; current = head->next; prev = NULL; while (current != NULL && current->count <= working->count) { prev = current; current = current->next; } if (prev == NULL) { working->next = current; head = working; } else { prev->next = working; working->next = current; head = head->next; } } generate(head, "", 1, lookup); int pathlen = strlen(argv[done]); char* outname = (char*)malloc(sizeof(char)*pathlen + EXTLEN); strcpy(outname, argv[done]); strcat(outname, ".huff"); FILE* output = fopen(outname, "w"); fprintf(output, "%d ", itemcount); current = staticlist; while (current != NULL) { if (current->isLeaf) { fprintf(output, "%d %d ", current->item, current->count); } current = current->next; } fprintf(output, "\n"); char* asciioutpath = "huff-e.ascii.temp"; FILE* asciiout = fopen(asciioutpath, "w+"); unlink(asciioutpath); if (asciiout == NULL) { fprintf(stderr, "Error creating %s.\n", asciioutpath); exit(EXIT_FAILURE); } while (((in = fgetc(tempfile))) != EOF) { fputs(lookup[in], asciiout); } if(fseek(asciiout, 0, SEEK_SET)) { fprintf(stderr, "Error seeking on %s.\n", asciioutpath); } char* pattern = (char*)malloc(sizeof(char)*BYTE); int result = 0; count = 0; while ((in = fgetc(asciiout)) != EOF || count != 0) { if (feof(asciiout)) { while (count != BYTE) { pattern[count] = '0'; count++; } result = packbits(pattern); fprintf(output, "%c", result); break; } if (in == '1' || in == '0') { pattern[count] = in; count++; } if (count == BYTE) { result = packbits(pattern); fprintf(output, "%c", result); count = 0; } } fclose(output); fclose(input); } } </code>
huff-d.c
/** * @file huff-d.c * * This program generates an ASCII text file from a binary file. * * @author sholsapp */ #include "huff.h" /** * The main thing. * @param the int argc the number of command line arguments. * @param the char* array holding command line tokens. */ int main(int argc, char* argv[]) { FILE* input; int in; char* bits; lnode* staticlist = (lnode*)malloc(sizeof(lnode)); lnode* head = NULL; lnode* current = NULL; lnode* temp = NULL; lnode* working = NULL; lnode* prev = NULL; int done = 1; for (done = 1; done < argc; done++) { input = fopen(argv[done], "r"); if (input == NULL) { fprintf(stderr, "Error opening file %d\n", done); exit(EXIT_FAILURE); } int ascii = 0; int freq = 0; int itemcount = 0; fscanf(input, "%d ", &itemcount); while (fscanf(input, "%d %d", &ascii, &freq) != 0) { if (head == NULL) { head = (lnode*)malloc(sizeof(lnode)); head->item = ascii; head->count = freq; head->isLeaf = TRUE; current = staticlist = head; } else { current->next = (lnode*)malloc(sizeof(lnode)); current->next->item = ascii; current->next->count = freq; current->next->isLeaf = TRUE; current = current->next; } } while (head->next != NULL) { temp = head; head = head->next; working = (lnode*)malloc(sizeof(lnode)); working->count = temp->count + head->count; working->left = temp; working->right = head; working->isLeaf = FALSE; current = head->next; prev = NULL; while (current != NULL && current->count <= working->count) { prev = current; current = current->next; } if (prev == NULL) { working->next = current; head = working; } else { prev->next = working; working->next = current; head = head->next; } } int pathlen = strlen(argv[done]); char* outname = (char*)malloc(sizeof(char)*pathlen+1); strncpy(outname, argv[done], pathlen-5); //strcat(outname, ".deco"); FILE* output = fopen(outname, "w"); char* asciioutpath = "huff-d.ascii.temp"; FILE* asciiout = fopen(asciioutpath, "w+"); unlink(asciioutpath); if (asciiout == NULL) { fprintf(stderr, "Error creating %s.\n", asciioutpath); exit(EXIT_FAILURE); } int count = 0; while ((in = fgetc(input)) != EOF) { bits = unpackbits(in); fprintf(asciiout, "%s", bits); count++; } if(fseek(asciiout, 0, SEEK_SET)) { fprintf(stderr, "Error seeking on %s.\n", asciioutpath); exit(EXIT_FAILURE); } current = head; while ((in = fgetc(asciiout)) != EOF && itemcount > 0) { current = in == '1' ? current->right : current->left; if (current->isLeaf) { fputc(current->item, output); current = head; itemcount--; } } fputc('\n', output); fclose(input); fclose(output); } }
