Return to Main Page
GeSHi © 2004, Nigel McNie.
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <memory.h>
  4. #include <math.h>
  5. #include <time.h>
  6. #include <string.h>
  7. #include "bitstream.h"
  8. #include "huffman.h"
  9.  
  10. //getBinString - This function converts an array into a corresponding
  11. // string of '1' and '0' for printing to the screen.
  12. char* getBinString(char data[], int length)
  13. {
  14. char* toRet = (char*)malloc(sizeof(char) * (length + 1));
  15. int i;
  16. for (i = 0; i < length; i++)
  17. {
  18. if (((data[i / 8] >> (i % 8)) & 0x01) == 0x01)
  19. {
  20. toRet[i] = '1';
  21. }
  22. else
  23. {
  24. toRet[i] = '0';
  25. }
  26. }
  27. toRet[length] = 0x00;
  28. return toRet;
  29. }
  30.  
  31. //getHexString - This function converts an array into a corresponding
  32. // string of hex values for showing on the screen.
  33. char* getHexString(char data[], int length)
  34. {
  35. int byteLength = (length / 8) + (length % 8 != 0 ? 1 : 0);
  36. char* toRet = (char*)malloc(sizeof(char) * (byteLength * 5) + 1);
  37. int i, j;
  38. char curByte = 0;
  39. int bitPos = 7;
  40. for (i = 0; i < (byteLength * 5) + 1; i++)
  41. {
  42. toRet[i] = 0;
  43. }
  44. if (length % 8 != 0)
  45. {
  46. bitPos = (length % 8) - 1;
  47. }
  48. j = 0;
  49. for (i = 0; i < length; i++)
  50. {
  51. if (((data[i / 8] >> (i % 8)) & 0x01) == 0x01)
  52. {
  53. curByte = curByte | (1 << bitPos--);
  54. }
  55. else
  56. {
  57. curByte = curByte & ~(1 << bitPos--);
  58. }
  59. if (bitPos == -1)
  60. {
  61. sprintf(&toRet[j++ * 5], "0x%02X ", (unsigned char)curByte);
  62. curByte = 0;
  63. bitPos = 7;
  64. }
  65. }
  66. toRet[byteLength * 5] = 0x00;
  67. return toRet;
  68. }
  69.  
  70. //print_usage - This function outputs the proper usage of the program to the screen.
  71. void print_usage()
  72. {
  73. printf("Program Usage:\n");
  74. printf("\tHuffman.exe -l (length) -c (size) -b -h p0 p1 p2...\n\n");
  75. printf("\t-l - Specifies that the next argument is the length\n");
  76. printf("\tlength - The number of symbols to generate for compression comparison\n");
  77. printf("\t-c - Input the vector of probabilities from the console instead of as arguments\n");
  78. printf("\tsize - The size of the vector that will input from the console\n");
  79. printf("\t-b - Prints the data in binary format\n");
  80. printf("\t-h - Prints the data in hexadecimal format\n");
  81. printf("\t-n - Does NOT print out any data, only the summaries\n");
  82. printf("\tpX - The vector of probabilities for the generated symbols\n");
  83. printf("\tnote: You may specify both -b and -h to see the data in both formats\n");
  84. printf("\t If the format is omitted, no data output is used by default\n");
  85. }
  86.  
  87. //checkArgs - This function processes the command line argument list to see what
  88. // options were specified by the user
  89. void checkArgs(int argc, char* argv[], bool* showHex, bool* showBin, bool* console, int* otherArgs, int* genLength, int* conLen)
  90. {
  91. for (int i = 1; i < argc; i++)
  92. {
  93. if (strcmp(argv[i], "-l") == 0)
  94. {
  95. *genLength = atoi(argv[i + 1]);
  96. i++;
  97. *otherArgs = i;
  98. }
  99. else if (strcmp(argv[i], "-b") == 0)
  100. {
  101. *showBin = true;
  102. *otherArgs = i;
  103. }
  104. else if (strcmp(argv[i], "-h") == 0)
  105. {
  106. *showHex = true;
  107. *otherArgs = i;
  108. }
  109. else if (strcmp(argv[i], "-n") == 0)
  110. {
  111. *showHex = false;
  112. *showBin = false;
  113. *otherArgs = i;
  114. }
  115. else if (strcmp(argv[i], "-c") == 0)
  116. {
  117. *conLen = atoi(argv[i + 1]);
  118. *console = true;
  119. i++;
  120. *otherArgs = i;
  121. }
  122. }
  123. }
  124.  
  125. //main - Main program
  126. void main(int argc, char* argv[])
  127. {
  128. int i, j;
  129. double* probs;
  130. char** codes;
  131. int* lengths;
  132. bool showHex = false;
  133. bool showBin = false;
  134. bool console = false;
  135. int conLen = 0;
  136. BitStream* rawData;
  137. BitStream* compData;
  138.  
  139. if (argc < 4)
  140. {
  141. print_usage();
  142. return;
  143. }
  144.  
  145. int otherArgs = 0;
  146.  
  147. int genLength = -1;
  148.  
  149. //Process command line arguments
  150. checkArgs(argc, argv, &showHex, &showBin, &console, &otherArgs, &genLength, &conLen);
  151.  
  152. if (genLength == -1)
  153. {
  154. print_usage();
  155. return;
  156. }
  157.  
  158. //Determine the length of the probability vector (in command line mode)
  159. int numSymbols = argc - (otherArgs + 1);
  160.  
  161. if (!console)
  162. {
  163. //Read in the probability vector from the command line
  164. probs = (double*)malloc(sizeof(double) * (numSymbols));
  165. for (i = otherArgs + 1; i < argc; i++)
  166. {
  167. probs[i - (otherArgs + 1)] = atof(argv[i]);
  168. }
  169. }
  170. else
  171. {
  172. //Read in the probability vector from the console
  173. numSymbols = conLen;
  174. probs = (double*)malloc(sizeof(double) * (numSymbols));
  175. for (i = 0; i < numSymbols; i++)
  176. {
  177. float tempProb;
  178. scanf("%f", &tempProb);
  179. probs[i] = (double)tempProb;
  180. }
  181. }
  182.  
  183. //Compute the codewords based on the probabilities
  184. generateCodes(&probs, &codes, &lengths, numSymbols);
  185.  
  186. //If any output is being shown, also show the codebook
  187. if (showBin || showHex)
  188. {
  189. printf("Code Book:\n");
  190. for (i = 0; i < numSymbols; i++)
  191. {
  192. printf("Prob %4.3f -> %s\n", probs[i], getBinString(codes[i], lengths[i]));
  193. }
  194. printf("\n");
  195. }
  196.  
  197. //Calculate the bits per symbol of the uncompressed data
  198. int bitsPerSymbol = (int)ceil(log((double)numSymbols) / log(2.0));
  199.  
  200. //Calculate the entropy and the expected code length
  201. double entropy = 0.0;
  202. double expectedLen = 0.0;
  203. for (i = 0; i < numSymbols; i++)
  204. {
  205. if (probs[i] > 0)
  206. entropy += -1 * probs[i] * (log(probs[i]) / log(2.0));
  207. expectedLen += probs[i] * lengths[i];
  208. }
  209.  
  210. //Generate the random data according the the given probabilities
  211. srand((unsigned)time(NULL));
  212. rawData = new BitStream();
  213. for (i = 0; i < genLength; i++)
  214. {
  215. double curProb = 0.0;
  216. double genProb = (double)rand() / (double)RAND_MAX;
  217. int index;
  218. for (j = 0; j < numSymbols; j++)
  219. {
  220. curProb += probs[j];
  221. if (genProb <= curProb)
  222. {
  223. index = j;
  224. break;
  225. }
  226. }
  227. rawData->write((char*)(&index), bitsPerSymbol);
  228. }
  229.  
  230. //Output the data that was generated
  231. char* rawBytes;
  232. int rawLength;
  233. rawData->read(&rawBytes, &rawLength);
  234. if (showHex)
  235. printf("Raw Data:\n%s\n", getHexString(rawBytes, rawLength));
  236. if (showBin)
  237. printf("Binary Form:\n%s\n", getBinString(rawBytes, rawLength));
  238. printf("Raw Length: %d bits\n", rawLength);
  239.  
  240. //Compress the generated data
  241. //The code words have to be reversed back and forth so that they
  242. //print correctly on the screen, but also are placed into the BitStream
  243. //in the correct order.
  244. for (i = 0; i < numSymbols; i++)
  245. {
  246. reverseBits(&(codes[i]), lengths[i]);
  247. }
  248. compData = new BitStream();
  249. for (i = 0; i < genLength; i++)
  250. {
  251. int index = 0;
  252. char* data;
  253. rawData->read(&data, i * bitsPerSymbol, bitsPerSymbol);
  254. reverseBits(&data, bitsPerSymbol);
  255. for (j = 0; j < (bitsPerSymbol / 8) + (((bitsPerSymbol % 8) != 0) ? 1 : 0); j++)
  256. {
  257. index = index | ((unsigned char)data[j] << (j * 8));
  258. }
  259. compData->write(codes[index], lengths[index]);
  260. }
  261. for (i = 0; i < numSymbols; i++)
  262. {
  263. reverseBits(&(codes[i]), lengths[i]);
  264. }
  265.  
  266. //Print the compressed data to the screen
  267. char* compBytes;
  268. int compLength;
  269. compData->read(&compBytes, &compLength);
  270. if (showHex)
  271. printf("Compressed Data:\n%s\n", getHexString(compBytes, compLength));
  272. if (showBin)
  273. printf("Binary Form:\n%s\n", getBinString(compBytes, compLength));
  274. printf("Compressed Length: %d bits\n", compLength);
  275.  
  276. //Show the results
  277. printf("Entropy: %6.3f\n", entropy);
  278. printf("Expected Length: %6.3f\n", expectedLen);
  279. printf("Actual Average: %6.3f\n", (double)compLength / (double)genLength);
  280. }
  281.  
Parsed in 0.039 seconds, using GeSHi 1.0.7.20