From 8c8930f5c57859abdee6c154efe206a0c02a2500 Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:45:34 +0100 Subject: [PATCH 1/6] Makefile targets for linting --- lab2/Makefile | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lab2/Makefile b/lab2/Makefile index 48c8fe4..0b87009 100644 --- a/lab2/Makefile +++ b/lab2/Makefile @@ -3,6 +3,7 @@ CXXFLAGS = -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Wold-style-cast #CXXFLAGS += -Werror SRC = $(wildcard *.cc) +HDR = $(wildcard *.h) OBJ = $(SRC:.cc=.o) all: spell edit $(OBJ) @@ -19,7 +20,18 @@ spell: spell.o word.o dictionary.o @echo "Building $@" @$(CXX) -c $(CXXFLAGS) $< -o $@ +lint: clang-tidy cppcheck clang-format + +clang-tidy: + clang-tidy $(SRC) -- $(CXXFLAGS) + +cppcheck: + cppcheck --enable=all --language=c++ --std=c++17 --suppress=missingIncludeSystem -I/usr/include $(SRC) $(HDR) + +clang-format: + clang-format -i $(SRC) $(HDR) + clean: rm -f *.o spell edit -.PHONY: clean +.PHONY: clean all lint clang-tidy cppcheck clang-format From 94d807fc677986add815d6c149189bea7c320e9b Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:46:55 +0100 Subject: [PATCH 2/6] Type casting fixes and bounding array access --- lab2/dictionary.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/lab2/dictionary.cc b/lab2/dictionary.cc index 6747a69..51d5ba7 100644 --- a/lab2/dictionary.cc +++ b/lab2/dictionary.cc @@ -1,7 +1,6 @@ #include "dictionary.h" #include "word.h" #include -#include #include #include #include @@ -13,7 +12,7 @@ using std::vector; Dictionary::Dictionary() {} bool Dictionary::contains(const string &word) const { - int l = word.length(); + auto l = word.length(); Word w = Word(word); if (std::find(this->words[l].begin(), this->words[l].end(), w) != std::end(this->words[l])) { @@ -60,9 +59,9 @@ int Dictionary::slurp(path p) { std::string line; while (std::getline(file, line)) { - if (line.size() > MAXLEN) - continue; - words[line.size()].push_back(Word(line)); + // Words larger than max gets placed in the topmost bucket + words[std::min(line.size(), static_cast(MAXLEN) - 1)].push_back( + Word(line)); } file.close(); From 7dd7f5610b1ecbbe5aef36e06beb3ed2d3d048b8 Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:47:45 +0100 Subject: [PATCH 3/6] Expose triagrams from word --- lab2/word.cc | 4 ++++ lab2/word.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/lab2/word.cc b/lab2/word.cc index 4e89ae4..c1f73f6 100644 --- a/lab2/word.cc +++ b/lab2/word.cc @@ -32,6 +32,10 @@ Word::Word(const std::string &w) : word(w) { string Word::get_word() const { return string(); } +vector Word::get_triagrams() const { + return triagrams; +} + unsigned int Word::get_matches(const vector &t) const { unsigned int matches = 0; diff --git a/lab2/word.h b/lab2/word.h index babe27e..eb927a2 100644 --- a/lab2/word.h +++ b/lab2/word.h @@ -17,6 +17,9 @@ class Word { /** Returns the word */ std::string get_word() const; + /** Returns triagrams */ + std::vector get_triagrams() const; + /** Returns how many of the trigrams in t that are present in this word's trigram vector */ unsigned int get_matches(const std::vector &t) const; From d10300509e75b5e73ee5e2df1dbc340fb031899c Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:47:51 +0100 Subject: [PATCH 4/6] Formatting --- lab2/test_edit_distance.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lab2/test_edit_distance.cc b/lab2/test_edit_distance.cc index 1f44719..982badf 100644 --- a/lab2/test_edit_distance.cc +++ b/lab2/test_edit_distance.cc @@ -8,8 +8,7 @@ #include -bool do_test(const std::string& x, const std::string& y, int expected) -{ +bool do_test(const std::string &x, const std::string &y, int expected) { auto actual = edit_distance(x, y); if (actual != expected) { std::cout << "*** WRONG: distance(" << x << ", " << y << ") was " @@ -19,8 +18,7 @@ bool do_test(const std::string& x, const std::string& y, int expected) return false; } -int main() -{ +int main() { int res = do_test("foobar", "foobar", 0); res += do_test("x", "x", 0); res += do_test("baz", "bar", 1); From 70170ea9954e7f698520f67abde224b817c9169d Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:49:45 +0100 Subject: [PATCH 5/6] Formatting --- lab2/edit_distance.cc | 20 ++++++++++---------- lab2/edit_distance.h | 11 +++++------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/lab2/edit_distance.cc b/lab2/edit_distance.cc index 905aeb4..7f635c9 100644 --- a/lab2/edit_distance.cc +++ b/lab2/edit_distance.cc @@ -1,14 +1,13 @@ -#include -#include -#include #include +#include +#include -int edit_distance(const std::string& s1, const std::string& s2) { +int edit_distance(const std::string &s1, const std::string &s2) { size_t m = s1.size(); size_t n = s2.size(); // Create a 2D DP table - std::vector> dp(m + 1, std::vector(n + 1)); + std::vector> dp(m + 1, std::vector(n + 1)); // Fill the base cases for (size_t i = 0; i <= m; ++i) @@ -23,13 +22,14 @@ int edit_distance(const std::string& s1, const std::string& s2) { if (s1[i - 1] == s2[j - 1]) { dp[i][j] = dp[i - 1][j - 1]; // No operation needed } else { - dp[i][j] = 1 + std::min({dp[i - 1][j], // Deletion - dp[i][j - 1], // Insertion - dp[i - 1][j - 1] // Substitution - }); + dp[i][j] = 1 + std::min({ + dp[i - 1][j], // Deletion + dp[i][j - 1], // Insertion + dp[i - 1][j - 1] // Substitution + }); } } } - return dp[m][n]; + return static_cast(dp[m][n]); } diff --git a/lab2/edit_distance.h b/lab2/edit_distance.h index 71765ad..6ddc48a 100644 --- a/lab2/edit_distance.h +++ b/lab2/edit_distance.h @@ -1,18 +1,17 @@ -#include -#include #include -#include /** * @brief Computes the edit distance (Levenshtein distance) between two strings. * * The edit distance is defined as the minimum number of single-character edits - * (insertions, deletions, or substitutions) required to transform one string into the other. + * (insertions, deletions, or substitutions) required to transform one string + * into the other. * - * This implementation uses dynamic programming to compute the distance efficiently. + * This implementation uses dynamic programming to compute the distance + * efficiently. * * @param s1 The first string. * @param s2 The second string. * @return The edit distance between the two strings. */ -int edit_distance(const std::string& s1, const std::string& s2); +int edit_distance(const std::string &s1, const std::string &s2); From 7a62bebf76759c3677564368484ce593132a4162 Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Thu, 21 Nov 2024 08:50:09 +0100 Subject: [PATCH 6/6] Initial implementation of matcher --- lab2/dictionary.cc | 72 +++++++++++++++++++++++++++++++++++++++++++--- lab2/dictionary.h | 15 ++++++---- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/lab2/dictionary.cc b/lab2/dictionary.cc index 51d5ba7..dc97b04 100644 --- a/lab2/dictionary.cc +++ b/lab2/dictionary.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -21,14 +22,77 @@ bool Dictionary::contains(const string &word) const { return false; } -vector Dictionary::get_suggestions(const string &word) const { +std::vector Dictionary::get_suggestions(const string &word) const { vector suggestions; - // add_trigram_suggestions(suggestions, word); - // rank_suggestions(suggestions, word); - // trim_suggestions(suggestions); + add_trigram_suggestions(suggestions, word); + rank_suggestions(suggestions, word); + trim_suggestions(suggestions, word); return suggestions; } +void Dictionary::add_trigram_suggestions(std::vector &suggestions, + const std::string &word) const { + // Get trigrams of the input word + Word input_word(word); + const std::vector &input_trigrams = input_word.get_triagrams(); + + // Iterate through all words in the dictionary + for (int i = 0; i < MAXLEN; ++i) { + for (const Word &dict_word : words[i]) { + // Get the trigrams of the dictionary word + const std::vector &dict_word_trigrams = + dict_word.get_triagrams(); + + // Count how many trigrams match + unsigned int match_count = dict_word.get_matches(input_trigrams); + + // If there are any matches, add the word to suggestions + if (match_count > 0) { + suggestions.push_back(dict_word.get_word()); + } + } + } +} + +void Dictionary::rank_suggestions(std::vector &suggestions, + const std::string &word) const { + // Get trigrams of the input word + Word input_word(word); + const std::vector &input_trigrams = input_word.get_triagrams(); + + // Sort suggestions based on the number of matching trigrams + std::sort(suggestions.begin(), suggestions.end(), + [&](const std::string &a, const std::string &b) { + Word word_a(a); + Word word_b(b); + unsigned int match_a = word_a.get_matches(input_trigrams); + unsigned int match_b = word_b.get_matches(input_trigrams); + return match_a > + match_b; // Sort in descending order of match count + }); +} + +void Dictionary::trim_suggestions(std::vector &suggestions, + const std::string &word) const { + // Remove duplicates using a set + std::set unique_suggestions(suggestions.begin(), + suggestions.end()); + suggestions.assign(unique_suggestions.begin(), unique_suggestions.end()); + + // Remove the input word from the suggestions list (if present) + suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word), + suggestions.end()); + + // Example: Remove any suggestions that are too short + suggestions.erase( + std::remove_if(suggestions.begin(), suggestions.end(), + [](const std::string &s) { + return s.length() < + 3; // Remove words shorter than 3 characters + }), + suggestions.end()); +} + int Dictionary::spit(path p) { std::ofstream file(p); diff --git a/lab2/dictionary.h b/lab2/dictionary.h index a9c5519..31eada4 100644 --- a/lab2/dictionary.h +++ b/lab2/dictionary.h @@ -1,5 +1,4 @@ -#ifndef DICTIONARY_H -#define DICTIONARY_H +#pragma once #include "word.h" #include @@ -8,19 +7,23 @@ #define MAXLEN 30 -using std::vector; +// using std::vector; using std::filesystem::path; class Dictionary { public: Dictionary(); + void add_trigram_suggestions(std::vector &suggestions, + const std::string &word) const; + void rank_suggestions(std::vector &suggestions, + const std::string &word) const; + void trim_suggestions(std::vector &suggestions, + const std::string &word) const; bool contains(const std::string &word) const; std::vector get_suggestions(const std::string &word) const; int slurp(path p); int spit(path p); private: - vector words[MAXLEN]; + std::vector words[MAXLEN]; }; - -#endif