From 7a62bebf76759c3677564368484ce593132a4162 Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:50:09 +0100 Subject: [PATCH] Initial implementation of matcher --- lab2/dictionary.cc | 72 +++++++++++++++++++++++++++++++++++++++++++--- lab2/dictionary.h | 15 ++++++---- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/lab2/dictionary.cc b/lab2/dictionary.cc index 51d5ba7..dc97b04 100644 --- a/lab2/dictionary.cc +++ b/lab2/dictionary.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -21,14 +22,77 @@ bool Dictionary::contains(const string &word) const { return false; } -vector Dictionary::get_suggestions(const string &word) const { +std::vector Dictionary::get_suggestions(const string &word) const { vector suggestions; - // add_trigram_suggestions(suggestions, word); - // rank_suggestions(suggestions, word); - // trim_suggestions(suggestions); + add_trigram_suggestions(suggestions, word); + rank_suggestions(suggestions, word); + trim_suggestions(suggestions, word); return suggestions; } +void Dictionary::add_trigram_suggestions(std::vector &suggestions, + const std::string &word) const { + // Get trigrams of the input word + Word input_word(word); + const std::vector &input_trigrams = input_word.get_triagrams(); + + // Iterate through all words in the dictionary + for (int i = 0; i < MAXLEN; ++i) { + for (const Word &dict_word : words[i]) { + // Get the trigrams of the dictionary word + const std::vector &dict_word_trigrams = + dict_word.get_triagrams(); + + // Count how many trigrams match + unsigned int match_count = dict_word.get_matches(input_trigrams); + + // If there are any matches, add the word to suggestions + if (match_count > 0) { + suggestions.push_back(dict_word.get_word()); + } + } + } +} + +void Dictionary::rank_suggestions(std::vector &suggestions, + const std::string &word) const { + // Get trigrams of the input word + Word input_word(word); + const std::vector &input_trigrams = input_word.get_triagrams(); + + // Sort suggestions based on the number of matching trigrams + std::sort(suggestions.begin(), suggestions.end(), + [&](const std::string &a, const std::string &b) { + Word word_a(a); + Word word_b(b); + unsigned int match_a = word_a.get_matches(input_trigrams); + unsigned int match_b = word_b.get_matches(input_trigrams); + return match_a > + match_b; // Sort in descending order of match count + }); +} + +void Dictionary::trim_suggestions(std::vector &suggestions, + const std::string &word) const { + // Remove duplicates using a set + std::set unique_suggestions(suggestions.begin(), + suggestions.end()); + suggestions.assign(unique_suggestions.begin(), unique_suggestions.end()); + + // Remove the input word from the suggestions list (if present) + suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word), + suggestions.end()); + + // Example: Remove any suggestions that are too short + suggestions.erase( + std::remove_if(suggestions.begin(), suggestions.end(), + [](const std::string &s) { + return s.length() < + 3; // Remove words shorter than 3 characters + }), + suggestions.end()); +} + int Dictionary::spit(path p) { std::ofstream file(p); diff --git a/lab2/dictionary.h b/lab2/dictionary.h index a9c5519..31eada4 100644 --- a/lab2/dictionary.h +++ b/lab2/dictionary.h @@ -1,5 +1,4 @@ -#ifndef DICTIONARY_H -#define DICTIONARY_H +#pragma once #include "word.h" #include @@ -8,19 +7,23 @@ #define MAXLEN 30 -using std::vector; +// using std::vector; using std::filesystem::path; class Dictionary { public: Dictionary(); + void add_trigram_suggestions(std::vector &suggestions, + const std::string &word) const; + void rank_suggestions(std::vector &suggestions, + const std::string &word) const; + void trim_suggestions(std::vector &suggestions, + const std::string &word) const; bool contains(const std::string &word) const; std::vector get_suggestions(const std::string &word) const; int slurp(path p); int spit(path p); private: - vector words[MAXLEN]; + std::vector words[MAXLEN]; }; - -#endif