/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 4; tab-width: 4 -*- */
/*
 * readability.c
 * Copyright (C) 2012 DMGualtieri <gualtieri@ieee.org>
 * 
 * readability is free software: you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * readability is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

 //This program calculates the Flesch Reading Ease and the
 //Flesch Kincaid Grade Level of a text file

#include <stdio.h>
#include <stddef.h>
#define debug 0

const char delimiters[] = " .,;:!-";
char *token;
int vowel_flag, old_vowel_flag;
int i;
int sentence_count, word_count, syllable_count;
char str[2048];
char fn1[64];
char fn2[64];
FILE *indata;
FILE *outdata;

/* Prototypes */
char *strtok(char *newstring, const char *delimiters);
char *strcpy(char *dest, const char *src);
char *fgets(char *s, int n, FILE * f);
size_t strlen(const char *s);
void parse(char *string);
void sentences(char *string);
void syllables(char *string);
float Flesch_Kincaid_Ease(void);
float Flesch_Kincaid_Grade(void);
void exit(int status);
/* end of prototypes */

void parse(char *string)
{
    char *token1;

//First call to strtok
    token1 = strtok(string, delimiters);

    while (token1 != NULL) {

	for (i = 0; i <= strlen(token1); i++) {
	    if (token1[i] > 47)	//filter LF, CR, etc.
	    {
		if (debug) {
		    printf("%c", token1[i]);
		    fprintf(outdata, "%c", token1[i]);
		}
	    }

	}

	syllables(token1);

//increment word count
//Check for stray non-alphanumerics

	if ((((token1[strlen(token1) - 1] > 47)
	      && (token1[strlen(token1) - 1] < 58))
	     || ((token1[strlen(token1) - 1] > 64)
		 && (token1[strlen(token1) - 1] < 91))
	     || ((token1[strlen(token1) - 1] > 96)
		 && (token1[strlen(token1) - 1] < 123)))) {
	    word_count++;
	    if (debug) {
		printf("*");
		fprintf(outdata, "*");
	    }			//word separator

	}

	token1 = strtok(NULL, delimiters);
    }

}

void sentences(char *string)
{
    for (i = 0; i < strlen(string); i++) {
	if ((string[i] == '.') || (string[i] == '!') || (string[i] == '?'))
	    sentence_count++;
    }
}

void syllables(char *word)
//Returns number of syllables in a word using vowel count.
//Subtracts any silent endings and double vowels.
//With a few exceptions, we get the number of syllables.
{
    old_vowel_flag = 0;
    for (i = 0; i <= strlen(word); i++) {
	vowel_flag = 0;
	if ((word[i] == 'a') || (word[i] == 'A'))
	    vowel_flag = 1;
	if ((word[i] == 'e') || (word[i] == 'E'))
	    vowel_flag = 1;
	if ((word[i] == 'i') || (word[i] == 'I'))
	    vowel_flag = 1;
	if ((word[i] == 'o') || (word[i] == 'O'))
	    vowel_flag = 1;
	if ((word[i] == 'u') || (word[i] == 'U'))
	    vowel_flag = 1;
	if ((word[i] == 'y') || (word[i] == 'Y'))
	    vowel_flag = 1;
	if ((vowel_flag == 1) && (old_vowel_flag != 1)) {
	    syllable_count++;
	}
	old_vowel_flag = vowel_flag;
    }
//taking care of e, es and ed at word end
    if ((strlen(word) > 2) && (word[strlen(word) - 1] == 'e'))
	syllable_count--;
    if ((strlen(word) > 2) && (word[strlen(word) - 2] == 'e')
	&& (word[strlen(word) - 1] == 's'))
	syllable_count--;
    if ((strlen(word) > 2) && (word[strlen(word) - 2] == 'e')
	&& (word[strlen(word) - 1] == 'd'))
	syllable_count--;

    if (debug) {
	printf("[%d]", syllable_count);
	fprintf(outdata, "[%d]", syllable_count);
    }
}

float Flesch_Kincaid_Ease(void)
{
    return (205.835 -
	    (1.015 * ((float) word_count / (float) sentence_count))) -
	(84.6 * ((float) syllable_count / (float) word_count));
}

float Flesch_Kincaid_Grade(void)
{
    return (0.39 * ((float) word_count / (float) sentence_count)) +
	(11.8 * ((float) syllable_count / (float) word_count)) - 15.59;
}

int main(int argc, char *argv[])
{

    if (argc < 2) {
	printf("Usage: readability input.txt output.txt\n");
	exit(1);
    }

    strcpy(fn1, argv[1]);
    printf("\nInput file selected = %s\n", fn1);

    if ((indata = fopen(fn1, "rb")) == NULL) {
	printf("\nInput file cannot be opened.\n");
	exit(1);
    }

    if (argc < 3) {
	strcpy(fn2, "output.txt");
    } else {
	strcpy(fn2, argv[2]);
    }

    printf("Output file selected = %s\n", fn2);

    if ((outdata = fopen(fn2, "w")) == NULL) {
	printf("\nOutput file cannot be opened.\n");
	exit(1);
    }

    fprintf(outdata, "\nInput file selected = %s\n", fn1);

    sentence_count = 0;
    word_count = 0;
    syllable_count = 0;

    while (fgets(str, 2048, indata) != NULL) {

	sentences(str);

    }

//Back to file start
    fseek(indata, 0, SEEK_SET);

    while (fgets(str, 2048, indata) != NULL) {

	parse(str);

    }

    printf("Word count = %d\n", word_count);
    fprintf(outdata, "Word count = %d\n", word_count);
    printf("Sentence count = %d\n", sentence_count);
    fprintf(outdata, "Sentence count = %d\n", sentence_count);
    printf("Syllable count = %d\n", syllable_count);
    fprintf(outdata, "Syllable count = %d\n", syllable_count);
    printf("Flesch Reading Ease = %f\n", Flesch_Kincaid_Ease());
    printf
	("(90->100 = 11 year olds, 60->70 = 13-15 year olds, and 0->30 = college graduate.)\n");
    fprintf(outdata, "Flesch Reading Ease = %f\n", Flesch_Kincaid_Ease());
    fprintf(outdata,
	    "(90->100 = 11 year olds, 60->70 = 13-15 year olds, and 0->30 = college graduate.)\n");
    printf("Flesch-Kincaid Grade Level = %f\n", Flesch_Kincaid_Grade());
    fprintf(outdata, "Flesch-Kincaid Grade Level = %f\n",
	    Flesch_Kincaid_Grade());


    fclose(indata);
    fclose(outdata);

    return (0);
}
