#include "dictionary.h" #include "edit_distance.h" #include "word.h" #include #include #include #include #include #include using std::string; using std::vector; Dictionary::Dictionary() {} bool Dictionary::contains(const string &word) const { auto l = word.length(); Word w = Word(word); if (std::find(this->words[l].begin(), this->words[l].end(), w) != std::end(this->words[l])) { return true; } return false; } std::vector Dictionary::get_suggestions(const string &word) const { vector suggestions; add_trigram_suggestions(suggestions, word); trim_suggestions(suggestions, word); rank_suggestions(suggestions, word); return suggestions; } void Dictionary::add_trigram_suggestions(std::vector &suggestions, const std::string &word) const { // Get trigrams of the input word Word input_word(word); const std::vector &input_trigrams = input_word.get_triagrams(); // Iterate through all words in the dictionary for (int i = 0; i < MAXLEN; ++i) { for (const Word &dict_word : words[i]) { // Get the trigrams of the dictionary word const std::vector &dict_word_trigrams = dict_word.get_triagrams(); // Count how many trigrams match unsigned int match_count = dict_word.get_matches(input_trigrams); // If there are any matches, add the word to suggestions if (match_count > 0) { suggestions.push_back(dict_word.get_word()); } } } } void Dictionary::rank_suggestions(std::vector &suggestions, const std::string &word) const { // Sort suggestions based on the levenshtein distance std::sort(suggestions.begin(), suggestions.end(), [&](const std::string &a, const std::string &b) { auto dist_a = edit_distance(a, word); auto dist_b = edit_distance(b, word); return dist_a < dist_b; }); } void Dictionary::trim_suggestions(std::vector &suggestions, const std::string &word) const { // Remove duplicates using a set std::set unique_suggestions(suggestions.begin(), suggestions.end()); suggestions.assign(unique_suggestions.begin(), unique_suggestions.end()); // Remove the input word from the suggestions list (if present) suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word), suggestions.end()); auto l = word.length(); std::cout << "WTF" << l << std::endl; // Example: Remove any suggestions that are not within 1 string length suggestions.erase(std::remove_if(suggestions.begin(), suggestions.end(), [l](const std::string &s) { return s.length() > (l + 1) || s.length() < (l - 1); }), suggestions.end()); } int Dictionary::spit(path p) { std::ofstream file(p); if (!file.is_open()) { std::cerr << "Error opening file! " << std::endl; return 1; } for (int a = 0; a < MAXLEN; a++) { for (auto &word : words[a]) { file << word; file << std::endl; } } file.flush(); file.close(); return 0; } int Dictionary::slurp(path p) { std::ifstream file(p.string()); if (!file.is_open()) { std::cerr << "Error opening file! " << std::endl; return 1; } std::string line; while (std::getline(file, line)) { if (line.empty()) continue; // Words larger than max gets placed in the topmost bucket words[std::min(line.size(), static_cast(MAXLEN) - 1)].push_back( Word(line)); } file.close(); return 0; }