Huffman encoder in C

During school I made a Huffman encoder and decoder that has come back and haunted me on several notable occasions – namely, those interviews. Microsoft was especially excited to test me knowledge on how well I could improve this program. My interviewer was able to improve the running time so that the decoding was done in O(1) time using a pre-built array – that is something I have yet to implement in this version.

Hope you like it: hack it, modify it, find bugs, make bugs, do whatever with this. Just make sure you send me the upgraded versions!

Here is the full version here packaged with all the mentioned files and tests: Huffman.zip

Makefile

#  Last Modified: Sat Jan 10 14:01:43 PST 2009
#  @author sholsapp
CC=gcc
SUS3=-D_POSIX_SOURCE -D_POSIX_C_SOURCE=200112L -D_XOPEN_SOURCE=600
HARDEN=-D_FORTIFY_SOURCE
CFLAGS=-Wall -g -std=c99 -pedantic $(SUS3) $(HARDEN)
LDFLAGS=
 
ALL=huff-e huff-d
 
all:	$(ALL)
 
huff-e: huff-e.o
	$(CC) $(LDFLAGS) -o $@ $^
 
huff-e.o: huff-e.c huff.h
	$(CC) $(CFLAGS) -c $<
 
huff-d: huff-d.o
	$(CC) $(LDFLAGS) -o $@ $^
 
huff-d.o: huff-d.c huff.h
	$(CC) $(CFLAGS) -c $<
 
test:
	MALLOC_CHECK=1 valgrind --leak-check=full --show-reachable=yes ./huff-e tests/test.in
 
clean:
	rm -rf core* *.o *.gch $(ALL)

huff.h

/**
 * @file huff.h
 * @author sholsapp
 */
 
#include stdio.h
#include stdlib.h
#include unistd.h
#include sys/types.h
#include sys/stat.h
#include fcntl.h
#include ctype.h
#include string.h
 
#define TRUE 1
#define FALSE 0
 
#define CHARSET 256
#define EXTLEN 6
#define BYTE 8
 
/**
 * Defines thislnode to be also called lnode
 */
typedef struct thislnode lnode;
 
/**
 * Defines the thislnode structure.
 *
 * Used for managing linked lists and trees.
 */
struct thislnode {
  char item;
  int count;
 
  /*list properties*/
  lnode* next;
 
  /*tree properties*/
  lnode* left;
  lnode* right;
  char isLeaf;
 
  /*encoded properties*/
  char* pattern;
 
}thislnode;
 
/**
 * @param the lnode* pointing to the root of this tree
 * @param the char* containing the current string map
 * @param the int representing the depth of the tree
 * @param the char** to the lookup table we're creating
 */
void generate(lnode* root, char* pattern, int depth, char** lookup) {
  char temp[CHARSET];
  strcpy(temp, pattern);
 
  if (root->left->left != NULL) {
    strcat(temp, "0");
    generate(root->left, temp, depth+1, lookup);
  }
 
  if (root->left->isLeaf) {
    char* newpattern = (char*)malloc(sizeof(char)*depth+1);
    strcpy(newpattern, temp);
    strcat(newpattern, "0");
 
    root->left->pattern = newpattern;
    lookup[(int)root->left->item] = newpattern;
  }
 
  if (root->right->left != NULL) {
    temp[depth-1] = '\0';
    strcat(temp, "1");
    generate(root->right, temp, depth + 1, lookup);
  }
 
  if (root->right->isLeaf) {
    temp[depth-1] = '\0';
    char* newpattern = (char*)malloc(sizeof(char)*depth+1);
    strcpy(newpattern, temp);
    strcat(newpattern, "1");
 
    root->right->pattern = newpattern;
    lookup[(int)root->right->item] = newpattern;
  }
}
 
/**
 * @param the char* to a pattern of BYTE many characters.
 */
int packbits(char* pattern) {
  int result = 0;
  int i = 0;
  for (i = 0; i < BYTE; i++) {
    if (pattern[i] == '1') {
      result = result | 1;
      result = result<<(i != (BYTE-1) ? 1 : 0);
    }
    else if (pattern[i] == '0') {
      result = result<<(i != (BYTE-1) ? 1 : 0);
    }
  }
  return result;
}
 
/**
 * @param the int to be converted to ascii 0 and 1.
 */
char* unpackbits(int bits) {
  char* result = (char*)malloc(sizeof(char)*BYTE);
  unsigned char mask = 0x80;
  int i = 0;
  for(i = 0; i < BYTE; i++) {
    result[i] = (bits&mask) == 0 ? '0' : '1';
    bits = bits<<1;
  }
  return result;
}

huff-e.c

/**
 * @file huff-e.c
 *
 * This program generates a binary file from an ASCII text file.
 *
 * @author sholsapp
 */
 
#include "huff.h"
 
/**
 * The main thing.
 * @param the int argc the number of command line arguments.
 * @param the char* array holding command line tokens.
 */
int
main (int argc, char* argv[]) {
  FILE* input = stdin;
  char** lookup = (char**)malloc(sizeof(char*)*CHARSET);
  int in;
 
  const char* temppath = "huff-e.orig.temp";
  FILE* tempfile = fopen(temppath, "w+");
  unlink(temppath);
  if (tempfile == NULL) {
    fprintf(stderr, "Error creating %s.\n", temppath);
    exit(EXIT_FAILURE);
  }
 
  lnode* staticlist = (lnode*)malloc(sizeof(lnode));
  lnode* head = NULL;
  lnode* current = NULL;
  lnode* temp = NULL;
  lnode* working = NULL;
  lnode* prev = NULL;
 
  int done = 1;
  for (done = 1; done < argc; done++) {
    input = fopen(argv[done], "r");
    if (input == NULL) {
      fprintf(stderr, "Error opening file %d\n", done);
      exit(EXIT_FAILURE);
    }
 
    int arr[CHARSET] = {0};
    int count = 0;
 
    while (((in = fgetc(input))) != EOF) {
      if (fputc(in, tempfile) == EOF) {
        fprintf(stderr, "Error writing temp file.\n");
        exit(EXIT_FAILURE);
      }
      arr[in] = arr[in] + 1;
      count++;
    }
 
    int itemcount = count-1;
 
    if(fseek(tempfile, 0, SEEK_SET)) {
      fprintf(stderr, "Error seeking on %s.\n", temppath);
    }
 
    int i = 0;
    int maxi = 0;
    int max = 0;
    while (count > 0) {
      for (i = 0; i < CHARSET; i++) {
        if (arr[i] > max && arr[i] != 0) {
          maxi = i;
          max = arr[i];
        }
      }
 
      count = count - max;
      if (head == NULL) {
        head = (lnode*)malloc(sizeof(lnode));
        head->item = maxi;
        head->count = max;
        head->isLeaf = TRUE;
        head->next = NULL;
      }
      else {
        current = (lnode*)malloc(sizeof(lnode));
        current->item = maxi;
        current->count = max;
        current->isLeaf = TRUE;
        temp = head;
        staticlist = head = current;
        current->next = temp;
      }
      arr[maxi] = 0;
      max = 0;
    }
 
    //build a binary tree
    while (head->next != NULL) {
      temp = head;
      head = head->next;
      working = (lnode*)malloc(sizeof(lnode));
      working->count = temp->count + head->count;
      working->left = temp;
      working->right = head;
      working->isLeaf = FALSE;
      current = head->next;
      prev = NULL;
      while (current != NULL && current->count <= working->count) {
        prev = current;
        current = current->next;
      } 
      if (prev == NULL) {
        working->next = current;
        head = working;
      }
      else {
        prev->next = working;
        working->next = current;
        head = head->next;
      }
    }
 
    generate(head, "", 1, lookup);
 
    int pathlen = strlen(argv[done]);
    char* outname = (char*)malloc(sizeof(char)*pathlen + EXTLEN);
    strcpy(outname, argv[done]);
    strcat(outname, ".huff");
    FILE* output = fopen(outname, "w");
 
    fprintf(output, "%d ", itemcount);
    current = staticlist;
    while (current != NULL) {
      if (current->isLeaf) {
        fprintf(output, "%d %d ", current->item, current->count);
      }
      current = current->next;
    }
    fprintf(output, "\n");
 
    char* asciioutpath = "huff-e.ascii.temp";
    FILE* asciiout = fopen(asciioutpath, "w+");
    unlink(asciioutpath);
    if (asciiout == NULL) {
      fprintf(stderr, "Error creating %s.\n", asciioutpath);
      exit(EXIT_FAILURE);
    }
 
    while (((in = fgetc(tempfile))) != EOF) {
      fputs(lookup[in], asciiout);
    }
 
    if(fseek(asciiout, 0, SEEK_SET)) {
      fprintf(stderr, "Error seeking on %s.\n", asciioutpath);
    }
 
    char* pattern = (char*)malloc(sizeof(char)*BYTE);
    int result = 0;
    count = 0;
 
    while ((in = fgetc(asciiout)) != EOF || count != 0) {
      if (feof(asciiout)) {
        while (count != BYTE) {
          pattern[count] = '0';
          count++;
        }
        result = packbits(pattern);
        fprintf(output, "%c", result);
        break;
      }
      if (in == '1' || in == '0') {
        pattern[count] = in;
        count++;
      }
      if (count == BYTE) {
        result = packbits(pattern);
        fprintf(output, "%c", result);
        count = 0;
      }
    }
    fclose(output);
    fclose(input);
  }
}
</code>

huff-d.c

/**
 * @file huff-d.c
 *
 * This program generates an ASCII text file from a binary file.
 *
 * @author sholsapp
 */
 
#include "huff.h"
 
/**
 * The main thing.
 * @param the int argc the number of command line arguments.
 * @param the char* array holding command line tokens.
 */
int
main(int argc, char* argv[]) {
  FILE* input;
  int in;
  char* bits;
 
  lnode* staticlist = (lnode*)malloc(sizeof(lnode));
  lnode* head = NULL;
  lnode* current = NULL;
  lnode* temp = NULL;
  lnode* working = NULL;
  lnode* prev = NULL;
 
  int done = 1;
  for (done = 1; done < argc; done++) {
    input = fopen(argv[done], "r");
    if (input == NULL) {
      fprintf(stderr, "Error opening file %d\n", done);
      exit(EXIT_FAILURE);
    }
 
    int ascii = 0;
    int freq = 0;
    int itemcount = 0;
    fscanf(input, "%d ", &itemcount);
    while (fscanf(input, "%d %d", &ascii, &freq) != 0) {
      if (head == NULL) {
        head = (lnode*)malloc(sizeof(lnode));
        head->item = ascii;
        head->count = freq;
        head->isLeaf = TRUE;
        current = staticlist = head;
      }
      else {
        current->next = (lnode*)malloc(sizeof(lnode));
        current->next->item = ascii;
        current->next->count = freq;
        current->next->isLeaf = TRUE;
        current = current->next;
      }
 
    }
 
    while (head->next != NULL) {
      temp = head;
      head = head->next;
      working = (lnode*)malloc(sizeof(lnode));
      working->count = temp->count + head->count;
      working->left = temp;
      working->right = head;
      working->isLeaf = FALSE;
      current = head->next;
      prev = NULL;
      while (current != NULL && current->count <= working->count) {
        prev = current;
        current = current->next;
      } 
      if (prev == NULL) {
        working->next = current;
        head = working;
      }
      else {
        prev->next = working;
        working->next = current;
        head = head->next;
      }
    }
 
    int pathlen = strlen(argv[done]);
    char* outname = (char*)malloc(sizeof(char)*pathlen+1);
    strncpy(outname, argv[done], pathlen-5);
    //strcat(outname, ".deco");
    FILE* output = fopen(outname, "w");
 
 
    char* asciioutpath = "huff-d.ascii.temp";
    FILE* asciiout = fopen(asciioutpath, "w+");
    unlink(asciioutpath);
    if (asciiout == NULL) {
      fprintf(stderr, "Error creating %s.\n", asciioutpath);
      exit(EXIT_FAILURE);
    }
 
    int count = 0;
    while ((in = fgetc(input)) != EOF) {
      bits = unpackbits(in);
      fprintf(asciiout, "%s", bits);
      count++;
    }
 
    if(fseek(asciiout, 0, SEEK_SET)) {
      fprintf(stderr, "Error seeking on %s.\n", asciioutpath);
      exit(EXIT_FAILURE);
    }
 
    current = head;
    while ((in = fgetc(asciiout)) != EOF && itemcount > 0) {
      current = in == '1' ? current->right : current->left;
      if (current->isLeaf) {
        fputc(current->item, output);
        current = head;
        itemcount--;
      }
    }
    fputc('\n', output);
    fclose(input);
    fclose(output);
  }
}
This entry was posted in Programming and tagged . Bookmark the permalink. Post a comment or leave a trackback: Trackback URL.