From 61b4313d03a93f50c5cb9d7320f3db7c21caa9f3 Mon Sep 17 00:00:00 2001 From: Joseph Hunkeler Date: Sun, 5 Dec 2021 18:16:30 -0500 Subject: Improvements, and heavy optimization to string search * Add error checking to all allocs * Add general format checking * Add acronym format checking * Consolidate dictionary creation into different helper functions * Add ability to get a random word of a type without scanning the entire dictionary * Added function doc strings (incomplete) --- dictionary.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- jdtalk.h | 12 +++-- main.c | 48 ++++++++++++++--- strings.c | 41 ++++++++++++-- talk.c | 161 ++++++++++++++++++++++++++++++++++++++++--------------- 5 files changed, 373 insertions(+), 60 deletions(-) diff --git a/dictionary.c b/dictionary.c index 0d953d9..a0c8f72 100644 --- a/dictionary.c +++ b/dictionary.c @@ -1,15 +1,28 @@ #include "jdtalk.h" +/** + * Initializes a dictionary + * @return Empty initialized Dictionary structure + */ struct Dictionary *dictionary_new() { struct Dictionary *dict; + dict = malloc(1 * sizeof(*dict)); + if (!dict) { + perror("Unable to initialize new dictionary"); + exit(1); + } dict->words = malloc(DICT_INITIAL_SIZE * sizeof(**dict->words)); + if (!dict->words) { + perror("Unable to initialize array of dictionary words"); + exit(1); + } dict->nelem_alloc = DICT_INITIAL_SIZE; dict->nelem_inuse = 0; return dict; } -void dictionary_append(struct Dictionary **dict, char *s, unsigned type) { +void dictionary_grow_as_needed(struct Dictionary **dict) { if ((*dict)->nelem_inuse + 1 > (*dict)->nelem_alloc) { struct Word **tmp; (*dict)->nelem_alloc += DICT_INITIAL_SIZE; @@ -20,18 +33,74 @@ void dictionary_append(struct Dictionary **dict, char *s, unsigned type) { } (*dict)->words = tmp; } +} + +void dictionary_alloc_word_record(struct Dictionary **dict) { (*dict)->words[(*dict)->nelem_inuse] = malloc(1 * sizeof(**(*dict)->words)); + if (!(*dict)->words[(*dict)->nelem_inuse]) { + perror("Unable to allocate dictionary word list record"); + exit(1); + } +} + +void dictionary_new_word(struct Dictionary **dict, char *s, unsigned type) { (*dict)->words[(*dict)->nelem_inuse]->word = strdup(s); + if (!(*dict)->words[(*dict)->nelem_inuse]->word) { + perror("Unable to allocate dictionary word in list"); + exit(1); + } (*dict)->words[(*dict)->nelem_inuse]->nchar = strlen(s) - 1; *((*dict)->words[(*dict)->nelem_inuse]->word + ((*dict)->words[(*dict)->nelem_inuse]->nchar)) = '\0'; (*dict)->words[(*dict)->nelem_inuse]->type = type; +} + +struct Dictionary *dictionary_of(struct Dictionary **src, unsigned type) { + struct Dictionary *dest; + dest = dictionary_new(); + dictionary_grow_as_needed(&dest); + for (size_t i = 0, x = 0; i < (*src)->nelem_inuse; i++) { + if ((*src)->words[i]->type != type) + continue; + dictionary_alloc_word_record(&dest); + dest->words[x] = malloc(sizeof(**(*src)->words)); + dest->words[x] = (*src)->words[i]; + dest->nelem_inuse++; + x++; + } + return dest; +} + +/** + * Add a word to the dictionary + * + * @param dict pointer to dictionary + * @param s string to append to word list + * @param type type of word (WT_NOUN, WT_VERB, WT_ADVERB, WT_ADJECTIVE) + */ +void dictionary_append(struct Dictionary **dict, char *s, unsigned type) { + dictionary_grow_as_needed(dict); + dictionary_alloc_word_record(dict); + dictionary_new_word(dict, s, type); (*dict)->nelem_inuse++; } +/** + * Extract words from a file and append them to a Dictionary + * structure with a specific type + * + * @param fp raw dictionary file handle + * @param dict pointer to Dictionary structure + * @param type type of words in raw dictionary file + * @return 0=success or value of errno on failure + */ int dictionary_read(FILE *fp, struct Dictionary **dict, unsigned type) { char *buf; buf = malloc(DICT_WORD_SIZE_MAX * sizeof(char)); + if (!buf) { + perror("Unable to allocate buffer for dictionary_read"); + exit(1); + } while ((fgets(buf, DICT_WORD_SIZE_MAX - 1, fp) != NULL)) { if (errno) { return errno; @@ -44,6 +113,11 @@ int dictionary_read(FILE *fp, struct Dictionary **dict, unsigned type) { return 0; } +/** + * Consume all dictionary files + * + * @return fully populated dictionary of words + */ struct Dictionary *dictionary_populate() { FILE *fp; struct Dictionary *dict; @@ -55,6 +129,7 @@ struct Dictionary *dictionary_populate() { NULL, }; + // Types of words expected by files[] array const unsigned files_type[] = { WT_NOUN, WT_ADJECTIVE, @@ -62,7 +137,10 @@ struct Dictionary *dictionary_populate() { WT_VERB, }; + // Initialize the dictionary dict = dictionary_new(); + + // Consume each dictionary in files[] for (size_t i = 0; files[i] != NULL; i++) { char *datadir; char filename[PATH_MAX]; @@ -80,18 +158,92 @@ struct Dictionary *dictionary_populate() { fprintf(stderr, "Unable to open dictionary: %s", filename); exit(1); } + + // Append the contents of file[i] to dictionary dictionary_read(fp, &dict, files_type[i]); fclose(fp); } return dict; } +/** + * Get the types of a word + * @param dict pointer to dictionary + * @param s dictionary word to search for + * @return a string containing the word types (i.e. n,a,d,v) + */ +char *dictionary_word_formats(struct Dictionary *dict, char *s) { + static char buf[OUTPUT_SIZE_MAX]; + buf[0] = '\0'; + + for (size_t i = 0; i < dict->nelem_inuse; i++) { + if (strcmp(dict->words[i]->word, s) != 0) { + continue; + } + switch (dict->words[i]->type) { + case WT_NOUN: + strcat(buf, "n"); + break; + case WT_ADJECTIVE: + strcat(buf, "a"); + break; + case WT_ADVERB: + strcat(buf, "d"); + break; + case WT_VERB: + strcat(buf, "v"); + break; + default: + strcat(buf, "x"); + break; + } + } + if (!strlen(buf)) { + return NULL; + } + return buf; +} + +/** + * Search dictionary for a word + * + * int result; + * + * // "beef" exists and is a verb + * result = dictionary_contains(dict, "beef", WT_VERB); + * // 1 + * + * // "beef" exists and is a noun + * result = dictionary_contains(dict, "beef", WT_NOUN); + * // 1 + * + * // "beef" is not an adjective + * result = dictionary_contains(dict, "beef", WT_ADJECTIVE); + * // 0 + * + * // "Beef" does not exist as written (case sensitive search) + * result = dictionary_contains(dict, "Beef", WT_NOUN); + * // 0 + * + * // "Beef" exists (case insensitive search) and is a noun + * result = dictionary_contains(dict, "Beef", WT_NOUN | WT_ICASE); + * // 1 + * + * // "Beef" exists (case insensitive search), matching any type of word + * result = dictionary_contains(dict, "Beef", WT_ANY | WT_ICASE); + * // 1 + * + * @param dict pointer to populated dictionary + * @param s pointer to pattern string + * @param type type of word (WT_NOUN, WT_VERB, WT_ADVERB, WT_ADJECTIVE) || (WT_ANY, WT_ICASE) + * @return 0=not found, !0=found + */ int dictionary_contains(struct Dictionary *dict, const char *s, unsigned type) { int result; unsigned icase; - icase = type & WT_ICASE; - type &= 0x7f; + icase = type & WT_ICASE; // Determine case-sensitivity of the search function + type &= 0x7f; // Strip case-insensitive flag from type result = 0; for (size_t i = 0; i < dict->nelem_inuse; i++) { @@ -118,6 +270,15 @@ int dictionary_contains(struct Dictionary *dict, const char *s, unsigned type) { return result; } +/** + * Produce a random word from the dictionary of type + * + * When type is WT_ANY, a random word irrespective of type will be produced + * + * @param dict pointer to dictionary + * @param type type of word to produce + * @return pointer to dictionary word + */ char *dictionary_word(struct Dictionary *dict, unsigned type) { struct Word *word; while (1) { @@ -129,6 +290,10 @@ char *dictionary_word(struct Dictionary *dict, unsigned type) { } } +/** + * Free a dictionary + * @param dict pointer to dictionary + */ void dictionary_free(struct Dictionary *dict) { for (size_t i = 0; i < dict->nelem_inuse; i++) { free(dict->words[i]->word); diff --git a/jdtalk.h b/jdtalk.h index f28f3c8..1e7a1ad 100644 --- a/jdtalk.h +++ b/jdtalk.h @@ -40,6 +40,8 @@ int dictionary_read(FILE *fp, struct Dictionary **dict, unsigned type); struct Dictionary *dictionary_populate(); int dictionary_contains(struct Dictionary *dict, const char *s, unsigned type); char *dictionary_word(struct Dictionary *dict, unsigned type); +char *dictionary_word_formats(struct Dictionary *dict, char *s); +struct Dictionary *dictionary_of(struct Dictionary **src, unsigned type); void dictionary_free(struct Dictionary *dict); char *str_random_case(char *s); @@ -48,10 +50,12 @@ char *str_leet(char *s); char *str_title_case(char *s); char *str_randomize_words(char *s); char *str_reverse(char *s); +char *str_album(char *s, char **parts); -char *talkf(struct Dictionary *dict, const char *fmt, char **parts); -char *talk_salad(struct Dictionary *dict, size_t limit, char **parts); -char *talk_acronym(struct Dictionary *dict, const char *fmt, char *s, char **parts); -int acronym_safe(const char *acronym, const char *pattern); +char *talkf(struct Dictionary *dict[], char *fmt, char **parts, size_t parts_max); +char *talk_salad(struct Dictionary *dict[], size_t limit, char **parts, size_t parts_max); +char *talk_acronym(struct Dictionary *dict[], __attribute__((unused)) char *fmt, char *s, char **parts, size_t parts_max); +int acronym_safe(struct Dictionary *dict, const char *acronym, const char *pattern, const char *fmt); +int format_safe(char *s); #endif //JDTALKC_JDTALK_H diff --git a/main.c b/main.c index 07cdafd..81ce730 100644 --- a/main.c +++ b/main.c @@ -19,11 +19,16 @@ static const char *usage_text = \ " -t Produce title-case strings (Title Case)\n" "\n"; +/** + * Print usage statement + * @param name program name + */ static void usage(char *name) { char *begin; char buf[PATH_MAX]; buf[0] = '\0'; + // Get the basename of name strcpy(buf, name); begin = strrchr(buf, '/'); if (begin && strlen(begin)) { @@ -31,21 +36,31 @@ static void usage(char *name) { } else { begin = buf; } + printf(usage_text, begin); } +/** + * Validate s against possible arguments + * @param possible short options + * @param s input string to validate + * @return 0=invalid, 1=valid + */ static int argv_validate(const char *possible, char *s) { if (strlen(s) > 1) { + // s is a short option (i.e. -c) for (size_t i = 0; i < strlen(possible); i++) { if (possible[i] == *(s + 1)) + // s is a valid short option return 1; } } + // s is an invalid short option return 0; } #define ARG(X) strcmp(option, X) == 0 -static const char *args_valid = "abcefhHlprRsSt"; +static const char *args_valid = "AabcefhHlprRsSt"; int main(int argc, char *argv[]) { struct Dictionary *dict; @@ -67,6 +82,7 @@ int main(int argc, char *argv[]) { int salad_limit; int do_shuffle; int do_reverse; + int do_format; size_t limit; float start_time; float end_time; @@ -83,6 +99,7 @@ int main(int argc, char *argv[]) { do_title_case = 0; do_shuffle = 0; do_reverse = 0; + do_format = 0; limit = 0; salad_limit = 10; @@ -162,6 +179,7 @@ int main(int argc, char *argv[]) { continue; } if (ARG( "-f")) { + do_format = 1; strcpy(format, option_value); i++; continue; @@ -189,13 +207,27 @@ int main(int argc, char *argv[]) { } dict = dictionary_populate(); + struct Dictionary *dicts[6] = { + dict, // ALL + dictionary_of(&dict, WT_NOUN), + dictionary_of(&dict, WT_ADJECTIVE), + dictionary_of(&dict, WT_ADVERB), + dictionary_of(&dict, WT_VERB), + NULL, + }; if (do_pattern && !dictionary_contains(dict, pattern, WT_ANY)) { fprintf(stderr, "Word not found in dictionary: %s\n", pattern); exit(1); } - if ((do_pattern && do_acronym) && !acronym_safe(acronym, pattern)) { + if (!format_safe(format)) { + fprintf(stderr, "Invalid format: %s\n", format); + exit(1); + } + + if ((do_pattern && do_acronym) && !acronym_safe(dict, acronym, pattern, do_format ? NULL: format)) { + fprintf(stderr, "Using format: %s\n", format); fprintf(stderr, "Word will never appear in acronym, '%s': %s\n", acronym, pattern); exit(1); } @@ -207,23 +239,23 @@ int main(int argc, char *argv[]) { memset(part, 0, sizeof(part) / sizeof(*part) * sizeof(char *)); if (do_salad) { - strcpy(buf, talk_salad(dict, salad_limit, part)); + strcpy(buf, talk_salad(dicts, salad_limit, part, OUTPUT_PART_MAX)); } else if (do_acronym) { - strcpy(buf, talk_acronym(dict, format, acronym, part)); + strcpy(buf, talk_acronym(dicts, format, acronym, part, OUTPUT_PART_MAX)); } else { - strcpy(buf, talkf(dict, format, part)); + strcpy(buf, talkf(dicts, format, part, OUTPUT_PART_MAX)); } if (do_pattern) { found = 0; for (size_t z = 0; part[z] != NULL; z++) { - if (!do_exact) { - if (strstr(buf, pattern)) { + if (do_exact) { + if (strcmp(part[z], pattern) == 0) { found = 1; break; } } else { - if (strcmp(part[z], pattern) == 0) { + if (strstr(buf, pattern)) { found = 1; break; } diff --git a/strings.c b/strings.c index f3bcb86..5701b13 100644 --- a/strings.c +++ b/strings.c @@ -1,16 +1,26 @@ #include "jdtalk.h" +/** + * Change case of a character... sometimes + * @param s input string (modified) + * @return pointer to s + */ char *str_random_case(char *s) { size_t len; len = strlen(s); for (size_t i = 0; i < len; i++) { - if ((rand() % 100) >= 50) { + if ((random() % 100) >= 50) { s[i] = (char)toupper(s[i]); } } return s; } +/** + * Capitalize every other character + * @param s input string (modified) + * @return pointer to s + */ char *str_hill_case(char *s) { size_t len; len = strlen(s); @@ -22,6 +32,11 @@ char *str_hill_case(char *s) { return s; } +/** + * Translate characters to 1337 + * @param s input string + * @return pointer to local storage (don't free it) + */ char *str_leet(char *s) { size_t len; static char buf[OUTPUT_SIZE_MAX]; @@ -141,6 +156,11 @@ char *str_leet(char *s) { return buf; } +/** + * Capitalize first character in each word + * @param s input string modified + * @return pointer to s + */ char *str_title_case(char *s) { size_t len; size_t i; @@ -156,6 +176,11 @@ char *str_title_case(char *s) { return s; } +/** + * Randomize characters in a string + * @param s input string (modified) + * @return pointer to s + */ char *str_randomize(char *s) { size_t len; char tmp = 0; @@ -169,6 +194,11 @@ char *str_randomize(char *s) { return s; } +/** + * Randomize words in a string + * @param s input string (modified) + * @return pointer to s + */ char *str_randomize_words(char *s) { char old[OUTPUT_SIZE_MAX]; char buf[OUTPUT_SIZE_MAX]; @@ -189,9 +219,14 @@ char *str_randomize_words(char *s) { return s; } +/** + * Reverse all characters in a string + * @param s input string (modified) + * @return pointer to s + */ char *str_reverse(char *s) { size_t dest; - char buf[1024]; + char buf[OUTPUT_SIZE_MAX]; buf[0] = '\0'; dest = 0; @@ -201,4 +236,4 @@ char *str_reverse(char *s) { buf[dest] = '\0'; strcpy(s, buf); return s; -} +} \ No newline at end of file diff --git a/talk.c b/talk.c index c4dfff8..456939f 100644 --- a/talk.c +++ b/talk.c @@ -1,6 +1,23 @@ #include "jdtalk.h" -char *talkf(struct Dictionary *dict, const char *fmt, char **parts) { +/** + * Produce an output string containing various user-defined types of words + * + * a = adjective + * d = adverb + * n = noun + * v = verb + * x = any + * + * char *parts[1024]; // probably more than enough, right? + * talkf(dict, "adnvx", &parts); + * + * @param dict pointer to dictionary array + * @param fmt + * @param parts + * @return + */ +char *talkf(struct Dictionary *dict[], char *fmt, char **parts, size_t parts_max) { static char buf[OUTPUT_SIZE_MAX]; buf[0] = '\0'; @@ -13,41 +30,48 @@ char *talkf(struct Dictionary *dict, const char *fmt, char **parts) { for (size_t i = 0; i < len; i++) { char *word = NULL; switch (fmt[i]) { + case 'x': + word = dictionary_word(dict[WT_ANY], WT_ANY); + break; case 'a': - word = dictionary_word(dict, WT_ADJECTIVE); + word = dictionary_word(dict[WT_ADJECTIVE], WT_ADJECTIVE); break; case 'd': - word = dictionary_word(dict, WT_ADVERB); + word = dictionary_word(dict[WT_ADVERB], WT_ADVERB); break; case 'n': - word = dictionary_word(dict, WT_NOUN); + word = dictionary_word(dict[WT_NOUN], WT_NOUN); break; case 'v': - word = dictionary_word(dict, WT_VERB); + word = dictionary_word(dict[WT_VERB], WT_VERB); break; default: break; } - if (parts) - parts[i] = word; - - strncat(buf, word, OUTPUT_SIZE_MAX); + if (parts) { + if (i < parts_max) { + parts[i] = word; + } else { + // We reached the maximum number of parts. Stop processing. + break; + } + } - if (i < len - 1) - strcat(buf, " "); + if (word) { + strncat(buf, word, OUTPUT_SIZE_MAX); + if (i < len - 1) + strcat(buf, " "); + } } return buf; } -char *talk_salad(struct Dictionary *dict, size_t limit, char **parts) { +char *talk_salad(struct Dictionary *dict[], size_t limit, char **parts, size_t parts_max) { static char buf[OUTPUT_SIZE_MAX]; buf[0] = '\0'; for (size_t i = 0; i < limit; i++) { - char *word = NULL; - word = dictionary_word(dict, WT_ANY); - parts[i] = word; - strncat(buf, word, OUTPUT_SIZE_MAX); + strncat(buf, talkf(dict, "x", parts, parts_max), OUTPUT_SIZE_MAX); if (i < limit - 1) { strcat(buf, " "); } @@ -55,47 +79,66 @@ char *talk_salad(struct Dictionary *dict, size_t limit, char **parts) { return buf; } -char *talk_acronym(struct Dictionary *dict, const char *fmt, char *s, char **parts) { - size_t len; +char *talk_acronym(struct Dictionary *dict[], char *fmt, char *s, char **parts, size_t parts_max) { + size_t s_len; + size_t format_len; + char format[INPUT_SIZE_MAX]; static char buf[OUTPUT_SIZE_MAX]; + static char *local_parts[OUTPUT_PART_MAX]; buf[0] = '\0'; + format[0] = '\0'; - len = strlen(s); - for (size_t i = 0; i < strlen(s); i++) { - char *word = NULL; + if (fmt) { + strcpy(format, fmt); + } else { + strcpy(format, "x"); + } + + s_len = strlen(s); + format_len = strlen(fmt); + if (format_len > s_len) { + *(format + s_len) = '\0'; + } + + size_t x; + x = 0; + for (size_t i = 0; i < s_len; i++) { + char word[OUTPUT_SIZE_MAX]; + word[0] = '\0'; while(1) { - word = dictionary_word(dict, WT_ANY); + char elem[2] = {0, 0}; + elem[0] = format[x]; + strcpy(word, talkf(dict, elem, &local_parts[i], parts_max)); if (*word == s[i]) { + strncat(buf, word, OUTPUT_SIZE_MAX); + if (i < s_len - 1) { + strcat(buf, " "); + } break; } - /* TODO: Formatted acronyms are too slow. Need a better way. - if (strlen(fmt) < strlen(s)) { - return NULL; - } - - char letter[2] = {'\0', '\0'}; - for (size_t f = 0; f < strlen(fmt); f++) { - *letter = fmt[f]; - word = talkf(dict, letter, NULL); - if (*word == s[i]) { - done = 1; - break; - } + } + if (parts) { + if (i < parts_max) { + //printf("parts[%zu]=%s\n", i, word); + parts[i] = local_parts[i]; + } else { + // We reached the maximum number of parts. Stop processing. + break; } - */ } - parts[i] = word; - strncat(buf, word, OUTPUT_SIZE_MAX); - if (i < len - 1) { - strcat(buf, " "); + if (x < format_len - 1) { + x++; } } return buf; } -int acronym_safe(const char *acronym, const char *pattern) { +int acronym_safe(struct Dictionary *dict, const char *acronym, const char *pattern, const char *fmt) { size_t acronym_len; + size_t fmt_len; + size_t types_len; int pattern_valid; + int format_valid; pattern_valid = 0; acronym_len = strlen(acronym); for (size_t i = 0; i < acronym_len; i++) { @@ -105,6 +148,40 @@ int acronym_safe(const char *acronym, const char *pattern) { } } - return pattern_valid; + format_valid = 1; + if (fmt) { + format_valid = 0; + char *types; + fmt_len = strlen(fmt); + types = dictionary_word_formats(dict, pattern); + types_len = strlen(types); + + for (size_t x = 0; x < types_len; x++) { + if (format_valid) break; + for (size_t i = 0; i < fmt_len; i++) { + if (types[x] == fmt[i]) { + format_valid = 1; + break; + } + } + + } + } + + return pattern_valid - format_valid == 0; } +int format_safe(char *s) { + size_t valid; + const char *formatter = "nadvx"; + + valid = 0; + for (size_t i = 0; i < strlen(formatter); i++) { + for (size_t x = 0; x < strlen(s); x++) { + if (s[x] == formatter[i]) { + valid++; + } + } + } + return valid == strlen(s); +} \ No newline at end of file -- cgit