Compare commits
	
		
			No commits in common. "7a62bebf76759c3677564368484ce593132a4162" and "fe00d47e0236108359a2c034fe9a26bdee088397" have entirely different histories.
		
	
	
		
			7a62bebf76
			...
			fe00d47e02
		
	
		
					 8 changed files with 36 additions and 118 deletions
				
			
		|  | @ -3,7 +3,6 @@ CXXFLAGS = -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Wold-style-cast | ||||||
| #CXXFLAGS += -Werror
 | #CXXFLAGS += -Werror
 | ||||||
| 
 | 
 | ||||||
| SRC = $(wildcard *.cc) | SRC = $(wildcard *.cc) | ||||||
| HDR = $(wildcard *.h) |  | ||||||
| OBJ = $(SRC:.cc=.o) | OBJ = $(SRC:.cc=.o) | ||||||
| 
 | 
 | ||||||
| all: spell edit $(OBJ) | all: spell edit $(OBJ) | ||||||
|  | @ -20,18 +19,7 @@ spell: spell.o word.o dictionary.o | ||||||
| 	@echo "Building $@" | 	@echo "Building $@" | ||||||
| 	@$(CXX) -c $(CXXFLAGS) $< -o $@ | 	@$(CXX) -c $(CXXFLAGS) $< -o $@ | ||||||
| 
 | 
 | ||||||
| lint: clang-tidy cppcheck clang-format |  | ||||||
| 
 |  | ||||||
| clang-tidy: |  | ||||||
| 	clang-tidy $(SRC) -- $(CXXFLAGS) |  | ||||||
| 
 |  | ||||||
| cppcheck: |  | ||||||
| 	cppcheck --enable=all --language=c++ --std=c++17 --suppress=missingIncludeSystem -I/usr/include $(SRC) $(HDR) |  | ||||||
| 
 |  | ||||||
| clang-format: |  | ||||||
| 	clang-format -i $(SRC) $(HDR) |  | ||||||
| 
 |  | ||||||
| clean: | clean: | ||||||
| 	rm -f *.o spell edit | 	rm -f *.o spell edit | ||||||
| 
 | 
 | ||||||
| .PHONY: clean all lint clang-tidy cppcheck clang-format | .PHONY: clean | ||||||
|  |  | ||||||
|  | @ -1,9 +1,9 @@ | ||||||
| #include "dictionary.h" | #include "dictionary.h" | ||||||
| #include "word.h" | #include "word.h" | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
|  | #include <filesystem> | ||||||
| #include <fstream> | #include <fstream> | ||||||
| #include <iostream> | #include <iostream> | ||||||
| #include <set> |  | ||||||
| #include <string> | #include <string> | ||||||
| #include <vector> | #include <vector> | ||||||
| 
 | 
 | ||||||
|  | @ -13,7 +13,7 @@ using std::vector; | ||||||
| Dictionary::Dictionary() {} | Dictionary::Dictionary() {} | ||||||
| 
 | 
 | ||||||
| bool Dictionary::contains(const string &word) const { | bool Dictionary::contains(const string &word) const { | ||||||
|     auto l = word.length(); |     int l = word.length(); | ||||||
|     Word w = Word(word); |     Word w = Word(word); | ||||||
|     if (std::find(this->words[l].begin(), this->words[l].end(), w) != |     if (std::find(this->words[l].begin(), this->words[l].end(), w) != | ||||||
|         std::end(this->words[l])) { |         std::end(this->words[l])) { | ||||||
|  | @ -22,77 +22,14 @@ bool Dictionary::contains(const string &word) const { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::vector<string> Dictionary::get_suggestions(const string &word) const { | vector<string> Dictionary::get_suggestions(const string &word) const { | ||||||
|     vector<string> suggestions; |     vector<string> suggestions; | ||||||
|     add_trigram_suggestions(suggestions, word); |     // add_trigram_suggestions(suggestions, word);
 | ||||||
|     rank_suggestions(suggestions, word); |     // rank_suggestions(suggestions, word);
 | ||||||
|     trim_suggestions(suggestions, word); |     // trim_suggestions(suggestions);
 | ||||||
|     return suggestions; |     return suggestions; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                                          const std::string &word) const { |  | ||||||
|     // Get trigrams of the input word
 |  | ||||||
|     Word input_word(word); |  | ||||||
|     const std::vector<std::string> &input_trigrams = input_word.get_triagrams(); |  | ||||||
| 
 |  | ||||||
|     // Iterate through all words in the dictionary
 |  | ||||||
|     for (int i = 0; i < MAXLEN; ++i) { |  | ||||||
|         for (const Word &dict_word : words[i]) { |  | ||||||
|             // Get the trigrams of the dictionary word
 |  | ||||||
|             const std::vector<std::string> &dict_word_trigrams = |  | ||||||
|                 dict_word.get_triagrams(); |  | ||||||
| 
 |  | ||||||
|             // Count how many trigrams match
 |  | ||||||
|             unsigned int match_count = dict_word.get_matches(input_trigrams); |  | ||||||
| 
 |  | ||||||
|             // If there are any matches, add the word to suggestions
 |  | ||||||
|             if (match_count > 0) { |  | ||||||
|                 suggestions.push_back(dict_word.get_word()); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Dictionary::rank_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                                   const std::string &word) const { |  | ||||||
|     // Get trigrams of the input word
 |  | ||||||
|     Word input_word(word); |  | ||||||
|     const std::vector<std::string> &input_trigrams = input_word.get_triagrams(); |  | ||||||
| 
 |  | ||||||
|     // Sort suggestions based on the number of matching trigrams
 |  | ||||||
|     std::sort(suggestions.begin(), suggestions.end(), |  | ||||||
|               [&](const std::string &a, const std::string &b) { |  | ||||||
|                   Word word_a(a); |  | ||||||
|                   Word word_b(b); |  | ||||||
|                   unsigned int match_a = word_a.get_matches(input_trigrams); |  | ||||||
|                   unsigned int match_b = word_b.get_matches(input_trigrams); |  | ||||||
|                   return match_a > |  | ||||||
|                          match_b; // Sort in descending order of match count
 |  | ||||||
|               }); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Dictionary::trim_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                                   const std::string &word) const { |  | ||||||
|     // Remove duplicates using a set
 |  | ||||||
|     std::set<std::string> unique_suggestions(suggestions.begin(), |  | ||||||
|                                              suggestions.end()); |  | ||||||
|     suggestions.assign(unique_suggestions.begin(), unique_suggestions.end()); |  | ||||||
| 
 |  | ||||||
|     // Remove the input word from the suggestions list (if present)
 |  | ||||||
|     suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word), |  | ||||||
|                       suggestions.end()); |  | ||||||
| 
 |  | ||||||
|     // Example: Remove any suggestions that are too short
 |  | ||||||
|     suggestions.erase( |  | ||||||
|         std::remove_if(suggestions.begin(), suggestions.end(), |  | ||||||
|                        [](const std::string &s) { |  | ||||||
|                            return s.length() < |  | ||||||
|                                   3; // Remove words shorter than 3 characters
 |  | ||||||
|                        }), |  | ||||||
|         suggestions.end()); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int Dictionary::spit(path p) { | int Dictionary::spit(path p) { | ||||||
|     std::ofstream file(p); |     std::ofstream file(p); | ||||||
| 
 | 
 | ||||||
|  | @ -123,9 +60,9 @@ int Dictionary::slurp(path p) { | ||||||
| 
 | 
 | ||||||
|     std::string line; |     std::string line; | ||||||
|     while (std::getline(file, line)) { |     while (std::getline(file, line)) { | ||||||
|         // Words larger  than max gets placed in the topmost bucket
 |         if (line.size() > MAXLEN) | ||||||
|         words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back( |             continue; | ||||||
|             Word(line)); |         words[line.size()].push_back(Word(line)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     file.close(); |     file.close(); | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| #pragma once | #ifndef DICTIONARY_H | ||||||
|  | #define DICTIONARY_H | ||||||
| 
 | 
 | ||||||
| #include "word.h" | #include "word.h" | ||||||
| #include <filesystem> | #include <filesystem> | ||||||
|  | @ -7,23 +8,19 @@ | ||||||
| 
 | 
 | ||||||
| #define MAXLEN 30 | #define MAXLEN 30 | ||||||
| 
 | 
 | ||||||
| // using std::vector;
 | using std::vector; | ||||||
| using std::filesystem::path; | using std::filesystem::path; | ||||||
| 
 | 
 | ||||||
| class Dictionary { | class Dictionary { | ||||||
|   public: |   public: | ||||||
|     Dictionary(); |     Dictionary(); | ||||||
|     void add_trigram_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                                  const std::string &word) const; |  | ||||||
|     void rank_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                           const std::string &word) const; |  | ||||||
|     void trim_suggestions(std::vector<std::string> &suggestions, |  | ||||||
|                           const std::string &word) const; |  | ||||||
|     bool contains(const std::string &word) const; |     bool contains(const std::string &word) const; | ||||||
|     std::vector<std::string> get_suggestions(const std::string &word) const; |     std::vector<std::string> get_suggestions(const std::string &word) const; | ||||||
|     int slurp(path p); |     int slurp(path p); | ||||||
|     int spit(path p); |     int spit(path p); | ||||||
| 
 | 
 | ||||||
|   private: |   private: | ||||||
|     std::vector<Word> words[MAXLEN]; |     vector<Word> words[MAXLEN]; | ||||||
| }; | }; | ||||||
|  | 
 | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | @ -1,13 +1,14 @@ | ||||||
| #include <algorithm> | #include <iostream> | ||||||
| #include <string> |  | ||||||
| #include <vector> | #include <vector> | ||||||
|  | #include <string> | ||||||
|  | #include <algorithm> | ||||||
| 
 | 
 | ||||||
| int edit_distance(const std::string &s1, const std::string &s2) { | int edit_distance(const std::string& s1, const std::string& s2) { | ||||||
|     size_t m = s1.size(); |     size_t m = s1.size(); | ||||||
|     size_t n = s2.size(); |     size_t n = s2.size(); | ||||||
| 
 | 
 | ||||||
|     // Create a 2D DP table
 |     // Create a 2D DP table
 | ||||||
|     std::vector<std::vector<size_t>> dp(m + 1, std::vector<size_t>(n + 1)); |     std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1)); | ||||||
| 
 | 
 | ||||||
|     // Fill the base cases
 |     // Fill the base cases
 | ||||||
|     for (size_t i = 0; i <= m; ++i) |     for (size_t i = 0; i <= m; ++i) | ||||||
|  | @ -22,14 +23,13 @@ int edit_distance(const std::string &s1, const std::string &s2) { | ||||||
|             if (s1[i - 1] == s2[j - 1]) { |             if (s1[i - 1] == s2[j - 1]) { | ||||||
|                 dp[i][j] = dp[i - 1][j - 1]; // No operation needed
 |                 dp[i][j] = dp[i - 1][j - 1]; // No operation needed
 | ||||||
|             } else { |             } else { | ||||||
|                 dp[i][j] = 1 + std::min({ |                 dp[i][j] = 1 + std::min({dp[i - 1][j],     // Deletion
 | ||||||
|                                    dp[i - 1][j],    // Deletion
 |                                          dp[i][j - 1],     // Insertion
 | ||||||
|                                    dp[i][j - 1],    // Insertion
 |                                          dp[i - 1][j - 1]  // Substitution
 | ||||||
|                                    dp[i - 1][j - 1] // Substitution
 |                                         }); | ||||||
|                                }); |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return static_cast<int>(dp[m][n]); |     return dp[m][n]; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,17 +1,18 @@ | ||||||
|  | #include <algorithm> | ||||||
|  | #include <iostream> | ||||||
| #include <string> | #include <string> | ||||||
|  | #include <vector> | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * @brief Computes the edit distance (Levenshtein distance) between two strings. |  * @brief Computes the edit distance (Levenshtein distance) between two strings. | ||||||
|  * |  * | ||||||
|  * The edit distance is defined as the minimum number of single-character edits |  * The edit distance is defined as the minimum number of single-character edits | ||||||
|  * (insertions, deletions, or substitutions) required to transform one string |  * (insertions, deletions, or substitutions) required to transform one string into the other. | ||||||
|  * into the other. |  | ||||||
|  * |  * | ||||||
|  * This implementation uses dynamic programming to compute the distance |  * This implementation uses dynamic programming to compute the distance efficiently. | ||||||
|  * efficiently. |  | ||||||
|  * |  * | ||||||
|  * @param s1 The first string. |  * @param s1 The first string. | ||||||
|  * @param s2 The second string. |  * @param s2 The second string. | ||||||
|  * @return The edit distance between the two strings. |  * @return The edit distance between the two strings. | ||||||
|  */ |  */ | ||||||
| int edit_distance(const std::string &s1, const std::string &s2); | int edit_distance(const std::string& s1, const std::string& s2); | ||||||
|  |  | ||||||
|  | @ -8,7 +8,8 @@ | ||||||
| 
 | 
 | ||||||
| #include <iostream> | #include <iostream> | ||||||
| 
 | 
 | ||||||
| bool do_test(const std::string &x, const std::string &y, int expected) { | bool do_test(const std::string& x, const std::string& y, int expected) | ||||||
|  | { | ||||||
|     auto actual = edit_distance(x, y); |     auto actual = edit_distance(x, y); | ||||||
|     if (actual != expected) { |     if (actual != expected) { | ||||||
|         std::cout << "*** WRONG: distance(" << x << ", " << y << ") was " |         std::cout << "*** WRONG: distance(" << x << ", " << y << ") was " | ||||||
|  | @ -18,7 +19,8 @@ bool do_test(const std::string &x, const std::string &y, int expected) { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int main() { | int main() | ||||||
|  | { | ||||||
|     int res = do_test("foobar", "foobar", 0); |     int res = do_test("foobar", "foobar", 0); | ||||||
|     res += do_test("x", "x", 0); |     res += do_test("x", "x", 0); | ||||||
|     res += do_test("baz", "bar", 1); |     res += do_test("baz", "bar", 1); | ||||||
|  |  | ||||||
|  | @ -32,10 +32,6 @@ Word::Word(const std::string &w) : word(w) { | ||||||
| 
 | 
 | ||||||
| string Word::get_word() const { return string(); } | string Word::get_word() const { return string(); } | ||||||
| 
 | 
 | ||||||
| vector<std::string> Word::get_triagrams() const { |  | ||||||
|     return triagrams; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| unsigned int Word::get_matches(const vector<string> &t) const { | unsigned int Word::get_matches(const vector<string> &t) const { | ||||||
|     unsigned int matches = 0; |     unsigned int matches = 0; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -17,9 +17,6 @@ class Word { | ||||||
|     /** Returns the word */ |     /** Returns the word */ | ||||||
|     std::string get_word() const; |     std::string get_word() const; | ||||||
| 
 | 
 | ||||||
|     /** Returns triagrams */ |  | ||||||
|   std::vector<std::string> get_triagrams() const; |  | ||||||
| 
 |  | ||||||
|     /** Returns how many of the trigrams in t that are present
 |     /** Returns how many of the trigrams in t that are present
 | ||||||
|      in this word's trigram vector */ |      in this word's trigram vector */ | ||||||
|     unsigned int get_matches(const std::vector<std::string> &t) const; |     unsigned int get_matches(const std::vector<std::string> &t) const; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue