/* compile with: gcc -g -Wall char-hist-pairs.c -o char-hist-pairs */

/* here's one way to run and visualize the results:
$ ./char-hist-pairs  < antony-cleopatra.txt | grep FOR_PLOT | sed 's/FOR_PLOT://' > char-hist-pairs.dat
$ gnuplot
gnuplot> set pm3d
gnuplot> set contour base
gnuplot> set style data lines
gnuplot> splot [33:] [33:] 'char-hist-pairs.dat' using 1:2:3
*/

#include <stdio.h>
#include <ctype.h>

int main()
{
  int c, c_prev, i, j;
  long int counts[255][255];
  char letter_min, letter_max;

  for (c = 0; c < 255; ++c) {
    counts[c_prev][c] = 0;
  }

  while ((c = getchar()) != EOF) {
    /* for each character we read we add a count to the histogram */
    ++counts[c_prev][c];
    c_prev = c;
  }

  /* now we need to only look at relevant character pairs -- we just
     use the *letters* (and numbers and punctuation) from the ascii
     table.  we will simplify things here by using the contiguous
     ranges in the ASCII table */
  letter_min = ' ';
  letter_max = 'z';

  /* output a table */
  for (i = letter_min; i <= letter_max; ++i) {
    for (j = letter_min; j <= letter_max; ++j) {
      printf("%c%c    %ld\n", i, j, counts[i][j]);
    }
  }

  /* now output the counts in a different way */
  printf("   ");
  for (i = letter_min; i <= letter_max; ++i) {
    printf("%c ", i);
  }
  for (i = letter_min; i <= letter_max; ++i) {
    printf("%c: ", i);
    for (j = letter_min; j <= letter_max; ++j) {
      printf("%4ld ", counts[i][j]);
    }
    printf("\n");
  }

  /* output a table for plotting */
  for (i = letter_min; i <= letter_max; ++i) {
    printf("FOR_PLOT:\n");
    for (j = letter_min; j <= letter_max; ++j) {
      printf("FOR_PLOT: %3d  %3d    %ld\n", i, j, counts[i][j]);
    }
  }

  return 0;
}

