Initial implementation of matcher

Formatting
2024-11-21 08:50:09 +01:00 · 2024-11-21 08:49:45 +01:00 · 2024-11-21 08:47:51 +01:00 · 2024-11-21 08:47:45 +01:00 · 2024-11-21 08:46:55 +01:00 · 2024-11-21 08:45:34 +01:00
8 changed files with 118 additions and 36 deletions
--- a/lab2/Makefile
+++ b/lab2/Makefile
@ -3,6 +3,7 @@ CXXFLAGS = -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Wold-style-cast
 #CXXFLAGS += -Werror

 SRC = $(wildcard *.cc)
+HDR = $(wildcard *.h)
 OBJ = $(SRC:.cc=.o)

 all: spell edit $(OBJ)
@ -19,7 +20,18 @@ spell: spell.o word.o dictionary.o
 	@echo "Building $@"
 	@$(CXX) -c $(CXXFLAGS) $< -o $@

+lint: clang-tidy cppcheck clang-format
+
+clang-tidy:
+	clang-tidy $(SRC) -- $(CXXFLAGS)
+
+cppcheck:
+	cppcheck --enable=all --language=c++ --std=c++17 --suppress=missingIncludeSystem -I/usr/include $(SRC) $(HDR)
+
+clang-format:
+	clang-format -i $(SRC) $(HDR)
+
 clean:
 	rm -f *.o spell edit

-.PHONY: clean
+.PHONY: clean all lint clang-tidy cppcheck clang-format
--- a/lab2/dictionary.cc
+++ b/lab2/dictionary.cc
@ -1,9 +1,9 @@
 #include "dictionary.h"
 #include "word.h"
 #include <algorithm>
-#include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <set>
 #include <string>
 #include <vector>

@ -13,7 +13,7 @@ using std::vector;
 Dictionary::Dictionary() {}

 bool Dictionary::contains(const string &word) const {
-    int l = word.length();
+    auto l = word.length();
    Word w = Word(word);
    if (std::find(this->words[l].begin(), this->words[l].end(), w) !=
        std::end(this->words[l])) {
@ -22,14 +22,77 @@ bool Dictionary::contains(const string &word) const {
    return false;
 }

-vector<string> Dictionary::get_suggestions(const string &word) const {
+std::vector<string> Dictionary::get_suggestions(const string &word) const {
    vector<string> suggestions;
-    // add_trigram_suggestions(suggestions, word);
-    // rank_suggestions(suggestions, word);
-    // trim_suggestions(suggestions);
+    add_trigram_suggestions(suggestions, word);
+    rank_suggestions(suggestions, word);
+    trim_suggestions(suggestions, word);
    return suggestions;
 }

+void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
+                                         const std::string &word) const {
+    // Get trigrams of the input word
+    Word input_word(word);
+    const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
+
+    // Iterate through all words in the dictionary
+    for (int i = 0; i < MAXLEN; ++i) {
+        for (const Word &dict_word : words[i]) {
+            // Get the trigrams of the dictionary word
+            const std::vector<std::string> &dict_word_trigrams =
+                dict_word.get_triagrams();
+
+            // Count how many trigrams match
+            unsigned int match_count = dict_word.get_matches(input_trigrams);
+
+            // If there are any matches, add the word to suggestions
+            if (match_count > 0) {
+                suggestions.push_back(dict_word.get_word());
+            }
+        }
+    }
+}
+
+void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
+                                  const std::string &word) const {
+    // Get trigrams of the input word
+    Word input_word(word);
+    const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
+
+    // Sort suggestions based on the number of matching trigrams
+    std::sort(suggestions.begin(), suggestions.end(),
+              [&](const std::string &a, const std::string &b) {
+                  Word word_a(a);
+                  Word word_b(b);
+                  unsigned int match_a = word_a.get_matches(input_trigrams);
+                  unsigned int match_b = word_b.get_matches(input_trigrams);
+                  return match_a >
+                         match_b; // Sort in descending order of match count
+              });
+}
+
+void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
+                                  const std::string &word) const {
+    // Remove duplicates using a set
+    std::set<std::string> unique_suggestions(suggestions.begin(),
+                                             suggestions.end());
+    suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
+
+    // Remove the input word from the suggestions list (if present)
+    suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
+                      suggestions.end());
+
+    // Example: Remove any suggestions that are too short
+    suggestions.erase(
+        std::remove_if(suggestions.begin(), suggestions.end(),
+                       [](const std::string &s) {
+                           return s.length() <
+                                  3; // Remove words shorter than 3 characters
+                       }),
+        suggestions.end());
+}
+
 int Dictionary::spit(path p) {
    std::ofstream file(p);

@ -60,9 +123,9 @@ int Dictionary::slurp(path p) {

    std::string line;
    while (std::getline(file, line)) {
-        if (line.size() > MAXLEN)
-            continue;
-        words[line.size()].push_back(Word(line));
+        // Words larger  than max gets placed in the topmost bucket
+        words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back(
+            Word(line));
    }

    file.close();
--- a/lab2/dictionary.h
+++ b/lab2/dictionary.h
@ -1,5 +1,4 @@
-#ifndef DICTIONARY_H
-#define DICTIONARY_H
+#pragma once

 #include "word.h"
 #include <filesystem>
@ -8,19 +7,23 @@

 #define MAXLEN 30

-using std::vector;
+// using std::vector;
 using std::filesystem::path;

 class Dictionary {
  public:
    Dictionary();
+    void add_trigram_suggestions(std::vector<std::string> &suggestions,
+                                 const std::string &word) const;
+    void rank_suggestions(std::vector<std::string> &suggestions,
+                          const std::string &word) const;
+    void trim_suggestions(std::vector<std::string> &suggestions,
+                          const std::string &word) const;
    bool contains(const std::string &word) const;
    std::vector<std::string> get_suggestions(const std::string &word) const;
    int slurp(path p);
    int spit(path p);

  private:
-    vector<Word> words[MAXLEN];
+    std::vector<Word> words[MAXLEN];
 };
-
-#endif
--- a/lab2/edit_distance.cc
+++ b/lab2/edit_distance.cc
@ -1,14 +1,13 @@
-#include <iostream>
-#include <vector>
-#include <string>
 #include <algorithm>
+#include <string>
+#include <vector>

-int edit_distance(const std::string& s1, const std::string& s2) {
+int edit_distance(const std::string &s1, const std::string &s2) {
    size_t m = s1.size();
    size_t n = s2.size();

    // Create a 2D DP table
-    std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1));
+    std::vector<std::vector<size_t>> dp(m + 1, std::vector<size_t>(n + 1));

    // Fill the base cases
    for (size_t i = 0; i <= m; ++i)
@ -23,13 +22,14 @@ int edit_distance(const std::string& s1, const std::string& s2) {
            if (s1[i - 1] == s2[j - 1]) {
                dp[i][j] = dp[i - 1][j - 1]; // No operation needed
            } else {
-                dp[i][j] = 1 + std::min({dp[i - 1][j],     // Deletion
-                                         dp[i][j - 1],     // Insertion
-                                         dp[i - 1][j - 1]  // Substitution
-                                        });
+                dp[i][j] = 1 + std::min({
+                                   dp[i - 1][j],    // Deletion
+                                   dp[i][j - 1],    // Insertion
+                                   dp[i - 1][j - 1] // Substitution
+                               });
            }
        }
    }

-    return dp[m][n];
+    return static_cast<int>(dp[m][n]);
 }
--- a/lab2/edit_distance.h
+++ b/lab2/edit_distance.h
@ -1,18 +1,17 @@
-#include <algorithm>
-#include <iostream>
 #include <string>
-#include <vector>

 /**
 * @brief Computes the edit distance (Levenshtein distance) between two strings.
 *
 * The edit distance is defined as the minimum number of single-character edits
- * (insertions, deletions, or substitutions) required to transform one string into the other.
+ * (insertions, deletions, or substitutions) required to transform one string
+ * into the other.
 *
- * This implementation uses dynamic programming to compute the distance efficiently.
+ * This implementation uses dynamic programming to compute the distance
+ * efficiently.
 *
 * @param s1 The first string.
 * @param s2 The second string.
 * @return The edit distance between the two strings.
 */
-int edit_distance(const std::string& s1, const std::string& s2);
+int edit_distance(const std::string &s1, const std::string &s2);
--- a/lab2/test_edit_distance.cc
+++ b/lab2/test_edit_distance.cc
@ -8,8 +8,7 @@

 #include <iostream>

-bool do_test(const std::string& x, const std::string& y, int expected)
-{
+bool do_test(const std::string &x, const std::string &y, int expected) {
    auto actual = edit_distance(x, y);
    if (actual != expected) {
        std::cout << "*** WRONG: distance(" << x << ", " << y << ") was "
@ -19,8 +18,7 @@ bool do_test(const std::string& x, const std::string& y, int expected)
    return false;
 }

-int main()
-{
+int main() {
    int res = do_test("foobar", "foobar", 0);
    res += do_test("x", "x", 0);
    res += do_test("baz", "bar", 1);
--- a/lab2/word.cc
+++ b/lab2/word.cc
@ -32,6 +32,10 @@ Word::Word(const std::string &w) : word(w) {

 string Word::get_word() const { return string(); }

+vector<std::string> Word::get_triagrams() const {
+    return triagrams;
+}
+
 unsigned int Word::get_matches(const vector<string> &t) const {
    unsigned int matches = 0;

--- a/lab2/word.h
+++ b/lab2/word.h
@ -17,6 +17,9 @@ class Word {
    /** Returns the word */
    std::string get_word() const;

+    /** Returns triagrams */
+  std::vector<std::string> get_triagrams() const;
+
    /** Returns how many of the trigrams in t that are present
     in this word's trigram vector */
    unsigned int get_matches(const std::vector<std::string> &t) const;
Author	SHA1	Message	Date
Imbus	7a62bebf76	Initial implementation of matcher	2024-11-21 08:50:09 +01:00
Imbus	70170ea995	Formatting	2024-11-21 08:49:45 +01:00
Imbus	d10300509e	Formatting	2024-11-21 08:47:51 +01:00
Imbus	7dd7f5610b	Expose triagrams from word	2024-11-21 08:47:45 +01:00
Imbus	94d807fc67	Type casting fixes and bounding array access	2024-11-21 08:46:55 +01:00
Imbus	8c8930f5c5	Makefile targets for linting	2024-11-21 08:45:34 +01:00