/* compile with: gcc -g -Wall char-hist-pairs.c -o char-hist-pairs */ /* here's one way to run and visualize the results: $ ./char-hist-pairs < antony-cleopatra.txt | grep FOR_PLOT | sed 's/FOR_PLOT://' > char-hist-pairs.dat $ gnuplot gnuplot> set pm3d gnuplot> set contour base gnuplot> set style data lines gnuplot> splot [33:] [33:] 'char-hist-pairs.dat' using 1:2:3 */ #include #include int main() { int c, c_prev, i, j; long int counts[255][255]; char letter_min, letter_max; for (c = 0; c < 255; ++c) { counts[c_prev][c] = 0; } while ((c = getchar()) != EOF) { /* for each character we read we add a count to the histogram */ ++counts[c_prev][c]; c_prev = c; } /* now we need to only look at relevant character pairs -- we just use the *letters* (and numbers and punctuation) from the ascii table. we will simplify things here by using the contiguous ranges in the ASCII table */ letter_min = ' '; letter_max = 'z'; /* output a table */ for (i = letter_min; i <= letter_max; ++i) { for (j = letter_min; j <= letter_max; ++j) { printf("%c%c %ld\n", i, j, counts[i][j]); } } /* now output the counts in a different way */ printf(" "); for (i = letter_min; i <= letter_max; ++i) { printf("%c ", i); } for (i = letter_min; i <= letter_max; ++i) { printf("%c: ", i); for (j = letter_min; j <= letter_max; ++j) { printf("%4ld ", counts[i][j]); } printf("\n"); } /* output a table for plotting */ for (i = letter_min; i <= letter_max; ++i) { printf("FOR_PLOT:\n"); for (j = letter_min; j <= letter_max; ++j) { printf("FOR_PLOT: %3d %3d %ld\n", i, j, counts[i][j]); } } return 0; }