Compare commits
	
		
			6 commits
		
	
	
		
			fe00d47e02
			...
			7a62bebf76
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 7a62bebf76 | ||
|   | 70170ea995 | ||
|   | d10300509e | ||
|   | 7dd7f5610b | ||
|   | 94d807fc67 | ||
|   | 8c8930f5c5 | 
					 8 changed files with 118 additions and 36 deletions
				
			
		|  | @ -3,6 +3,7 @@ CXXFLAGS = -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Wold-style-cast | |||
| #CXXFLAGS += -Werror
 | ||||
| 
 | ||||
| SRC = $(wildcard *.cc) | ||||
| HDR = $(wildcard *.h) | ||||
| OBJ = $(SRC:.cc=.o) | ||||
| 
 | ||||
| all: spell edit $(OBJ) | ||||
|  | @ -19,7 +20,18 @@ spell: spell.o word.o dictionary.o | |||
| 	@echo "Building $@" | ||||
| 	@$(CXX) -c $(CXXFLAGS) $< -o $@ | ||||
| 
 | ||||
| lint: clang-tidy cppcheck clang-format | ||||
| 
 | ||||
| clang-tidy: | ||||
| 	clang-tidy $(SRC) -- $(CXXFLAGS) | ||||
| 
 | ||||
| cppcheck: | ||||
| 	cppcheck --enable=all --language=c++ --std=c++17 --suppress=missingIncludeSystem -I/usr/include $(SRC) $(HDR) | ||||
| 
 | ||||
| clang-format: | ||||
| 	clang-format -i $(SRC) $(HDR) | ||||
| 
 | ||||
| clean: | ||||
| 	rm -f *.o spell edit | ||||
| 
 | ||||
| .PHONY: clean | ||||
| .PHONY: clean all lint clang-tidy cppcheck clang-format | ||||
|  |  | |||
|  | @ -1,9 +1,9 @@ | |||
| #include "dictionary.h" | ||||
| #include "word.h" | ||||
| #include <algorithm> | ||||
| #include <filesystem> | ||||
| #include <fstream> | ||||
| #include <iostream> | ||||
| #include <set> | ||||
| #include <string> | ||||
| #include <vector> | ||||
| 
 | ||||
|  | @ -13,7 +13,7 @@ using std::vector; | |||
| Dictionary::Dictionary() {} | ||||
| 
 | ||||
| bool Dictionary::contains(const string &word) const { | ||||
|     int l = word.length(); | ||||
|     auto l = word.length(); | ||||
|     Word w = Word(word); | ||||
|     if (std::find(this->words[l].begin(), this->words[l].end(), w) != | ||||
|         std::end(this->words[l])) { | ||||
|  | @ -22,14 +22,77 @@ bool Dictionary::contains(const string &word) const { | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| vector<string> Dictionary::get_suggestions(const string &word) const { | ||||
| std::vector<string> Dictionary::get_suggestions(const string &word) const { | ||||
|     vector<string> suggestions; | ||||
|     // add_trigram_suggestions(suggestions, word);
 | ||||
|     // rank_suggestions(suggestions, word);
 | ||||
|     // trim_suggestions(suggestions);
 | ||||
|     add_trigram_suggestions(suggestions, word); | ||||
|     rank_suggestions(suggestions, word); | ||||
|     trim_suggestions(suggestions, word); | ||||
|     return suggestions; | ||||
| } | ||||
| 
 | ||||
| void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions, | ||||
|                                          const std::string &word) const { | ||||
|     // Get trigrams of the input word
 | ||||
|     Word input_word(word); | ||||
|     const std::vector<std::string> &input_trigrams = input_word.get_triagrams(); | ||||
| 
 | ||||
|     // Iterate through all words in the dictionary
 | ||||
|     for (int i = 0; i < MAXLEN; ++i) { | ||||
|         for (const Word &dict_word : words[i]) { | ||||
|             // Get the trigrams of the dictionary word
 | ||||
|             const std::vector<std::string> &dict_word_trigrams = | ||||
|                 dict_word.get_triagrams(); | ||||
| 
 | ||||
|             // Count how many trigrams match
 | ||||
|             unsigned int match_count = dict_word.get_matches(input_trigrams); | ||||
| 
 | ||||
|             // If there are any matches, add the word to suggestions
 | ||||
|             if (match_count > 0) { | ||||
|                 suggestions.push_back(dict_word.get_word()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Dictionary::rank_suggestions(std::vector<std::string> &suggestions, | ||||
|                                   const std::string &word) const { | ||||
|     // Get trigrams of the input word
 | ||||
|     Word input_word(word); | ||||
|     const std::vector<std::string> &input_trigrams = input_word.get_triagrams(); | ||||
| 
 | ||||
|     // Sort suggestions based on the number of matching trigrams
 | ||||
|     std::sort(suggestions.begin(), suggestions.end(), | ||||
|               [&](const std::string &a, const std::string &b) { | ||||
|                   Word word_a(a); | ||||
|                   Word word_b(b); | ||||
|                   unsigned int match_a = word_a.get_matches(input_trigrams); | ||||
|                   unsigned int match_b = word_b.get_matches(input_trigrams); | ||||
|                   return match_a > | ||||
|                          match_b; // Sort in descending order of match count
 | ||||
|               }); | ||||
| } | ||||
| 
 | ||||
| void Dictionary::trim_suggestions(std::vector<std::string> &suggestions, | ||||
|                                   const std::string &word) const { | ||||
|     // Remove duplicates using a set
 | ||||
|     std::set<std::string> unique_suggestions(suggestions.begin(), | ||||
|                                              suggestions.end()); | ||||
|     suggestions.assign(unique_suggestions.begin(), unique_suggestions.end()); | ||||
| 
 | ||||
|     // Remove the input word from the suggestions list (if present)
 | ||||
|     suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word), | ||||
|                       suggestions.end()); | ||||
| 
 | ||||
|     // Example: Remove any suggestions that are too short
 | ||||
|     suggestions.erase( | ||||
|         std::remove_if(suggestions.begin(), suggestions.end(), | ||||
|                        [](const std::string &s) { | ||||
|                            return s.length() < | ||||
|                                   3; // Remove words shorter than 3 characters
 | ||||
|                        }), | ||||
|         suggestions.end()); | ||||
| } | ||||
| 
 | ||||
| int Dictionary::spit(path p) { | ||||
|     std::ofstream file(p); | ||||
| 
 | ||||
|  | @ -60,9 +123,9 @@ int Dictionary::slurp(path p) { | |||
| 
 | ||||
|     std::string line; | ||||
|     while (std::getline(file, line)) { | ||||
|         if (line.size() > MAXLEN) | ||||
|             continue; | ||||
|         words[line.size()].push_back(Word(line)); | ||||
|         // Words larger  than max gets placed in the topmost bucket
 | ||||
|         words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back( | ||||
|             Word(line)); | ||||
|     } | ||||
| 
 | ||||
|     file.close(); | ||||
|  |  | |||
|  | @ -1,5 +1,4 @@ | |||
| #ifndef DICTIONARY_H | ||||
| #define DICTIONARY_H | ||||
| #pragma once | ||||
| 
 | ||||
| #include "word.h" | ||||
| #include <filesystem> | ||||
|  | @ -8,19 +7,23 @@ | |||
| 
 | ||||
| #define MAXLEN 30 | ||||
| 
 | ||||
| using std::vector; | ||||
| // using std::vector;
 | ||||
| using std::filesystem::path; | ||||
| 
 | ||||
| class Dictionary { | ||||
|   public: | ||||
|     Dictionary(); | ||||
|     void add_trigram_suggestions(std::vector<std::string> &suggestions, | ||||
|                                  const std::string &word) const; | ||||
|     void rank_suggestions(std::vector<std::string> &suggestions, | ||||
|                           const std::string &word) const; | ||||
|     void trim_suggestions(std::vector<std::string> &suggestions, | ||||
|                           const std::string &word) const; | ||||
|     bool contains(const std::string &word) const; | ||||
|     std::vector<std::string> get_suggestions(const std::string &word) const; | ||||
|     int slurp(path p); | ||||
|     int spit(path p); | ||||
| 
 | ||||
|   private: | ||||
|     vector<Word> words[MAXLEN]; | ||||
|     std::vector<Word> words[MAXLEN]; | ||||
| }; | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -1,14 +1,13 @@ | |||
| #include <iostream> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <algorithm> | ||||
| #include <string> | ||||
| #include <vector> | ||||
| 
 | ||||
| int edit_distance(const std::string& s1, const std::string& s2) { | ||||
| int edit_distance(const std::string &s1, const std::string &s2) { | ||||
|     size_t m = s1.size(); | ||||
|     size_t n = s2.size(); | ||||
| 
 | ||||
|     // Create a 2D DP table
 | ||||
|     std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1)); | ||||
|     std::vector<std::vector<size_t>> dp(m + 1, std::vector<size_t>(n + 1)); | ||||
| 
 | ||||
|     // Fill the base cases
 | ||||
|     for (size_t i = 0; i <= m; ++i) | ||||
|  | @ -23,13 +22,14 @@ int edit_distance(const std::string& s1, const std::string& s2) { | |||
|             if (s1[i - 1] == s2[j - 1]) { | ||||
|                 dp[i][j] = dp[i - 1][j - 1]; // No operation needed
 | ||||
|             } else { | ||||
|                 dp[i][j] = 1 + std::min({dp[i - 1][j],     // Deletion
 | ||||
|                                          dp[i][j - 1],     // Insertion
 | ||||
|                                          dp[i - 1][j - 1]  // Substitution
 | ||||
|                                         }); | ||||
|                 dp[i][j] = 1 + std::min({ | ||||
|                                    dp[i - 1][j],    // Deletion
 | ||||
|                                    dp[i][j - 1],    // Insertion
 | ||||
|                                    dp[i - 1][j - 1] // Substitution
 | ||||
|                                }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return dp[m][n]; | ||||
|     return static_cast<int>(dp[m][n]); | ||||
| } | ||||
|  |  | |||
|  | @ -1,18 +1,17 @@ | |||
| #include <algorithm> | ||||
| #include <iostream> | ||||
| #include <string> | ||||
| #include <vector> | ||||
| 
 | ||||
| /**
 | ||||
|  * @brief Computes the edit distance (Levenshtein distance) between two strings. | ||||
|  * | ||||
|  * The edit distance is defined as the minimum number of single-character edits | ||||
|  * (insertions, deletions, or substitutions) required to transform one string into the other. | ||||
|  * (insertions, deletions, or substitutions) required to transform one string | ||||
|  * into the other. | ||||
|  * | ||||
|  * This implementation uses dynamic programming to compute the distance efficiently. | ||||
|  * This implementation uses dynamic programming to compute the distance | ||||
|  * efficiently. | ||||
|  * | ||||
|  * @param s1 The first string. | ||||
|  * @param s2 The second string. | ||||
|  * @return The edit distance between the two strings. | ||||
|  */ | ||||
| int edit_distance(const std::string& s1, const std::string& s2); | ||||
| int edit_distance(const std::string &s1, const std::string &s2); | ||||
|  |  | |||
|  | @ -8,8 +8,7 @@ | |||
| 
 | ||||
| #include <iostream> | ||||
| 
 | ||||
| bool do_test(const std::string& x, const std::string& y, int expected) | ||||
| { | ||||
| bool do_test(const std::string &x, const std::string &y, int expected) { | ||||
|     auto actual = edit_distance(x, y); | ||||
|     if (actual != expected) { | ||||
|         std::cout << "*** WRONG: distance(" << x << ", " << y << ") was " | ||||
|  | @ -19,8 +18,7 @@ bool do_test(const std::string& x, const std::string& y, int expected) | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| int main() | ||||
| { | ||||
| int main() { | ||||
|     int res = do_test("foobar", "foobar", 0); | ||||
|     res += do_test("x", "x", 0); | ||||
|     res += do_test("baz", "bar", 1); | ||||
|  |  | |||
|  | @ -32,6 +32,10 @@ Word::Word(const std::string &w) : word(w) { | |||
| 
 | ||||
| string Word::get_word() const { return string(); } | ||||
| 
 | ||||
| vector<std::string> Word::get_triagrams() const { | ||||
|     return triagrams; | ||||
| } | ||||
| 
 | ||||
| unsigned int Word::get_matches(const vector<string> &t) const { | ||||
|     unsigned int matches = 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -17,6 +17,9 @@ class Word { | |||
|     /** Returns the word */ | ||||
|     std::string get_word() const; | ||||
| 
 | ||||
|     /** Returns triagrams */ | ||||
|   std::vector<std::string> get_triagrams() const; | ||||
| 
 | ||||
|     /** Returns how many of the trigrams in t that are present
 | ||||
|      in this word's trigram vector */ | ||||
|     unsigned int get_matches(const std::vector<std::string> &t) const; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue