Compare commits
6 commits
fe00d47e02
...
7a62bebf76
Author | SHA1 | Date | |
---|---|---|---|
|
7a62bebf76 | ||
|
70170ea995 | ||
|
d10300509e | ||
|
7dd7f5610b | ||
|
94d807fc67 | ||
|
8c8930f5c5 |
8 changed files with 118 additions and 36 deletions
|
@ -3,6 +3,7 @@ CXXFLAGS = -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Wold-style-cast
|
||||||
#CXXFLAGS += -Werror
|
#CXXFLAGS += -Werror
|
||||||
|
|
||||||
SRC = $(wildcard *.cc)
|
SRC = $(wildcard *.cc)
|
||||||
|
HDR = $(wildcard *.h)
|
||||||
OBJ = $(SRC:.cc=.o)
|
OBJ = $(SRC:.cc=.o)
|
||||||
|
|
||||||
all: spell edit $(OBJ)
|
all: spell edit $(OBJ)
|
||||||
|
@ -19,7 +20,18 @@ spell: spell.o word.o dictionary.o
|
||||||
@echo "Building $@"
|
@echo "Building $@"
|
||||||
@$(CXX) -c $(CXXFLAGS) $< -o $@
|
@$(CXX) -c $(CXXFLAGS) $< -o $@
|
||||||
|
|
||||||
|
lint: clang-tidy cppcheck clang-format
|
||||||
|
|
||||||
|
clang-tidy:
|
||||||
|
clang-tidy $(SRC) -- $(CXXFLAGS)
|
||||||
|
|
||||||
|
cppcheck:
|
||||||
|
cppcheck --enable=all --language=c++ --std=c++17 --suppress=missingIncludeSystem -I/usr/include $(SRC) $(HDR)
|
||||||
|
|
||||||
|
clang-format:
|
||||||
|
clang-format -i $(SRC) $(HDR)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o spell edit
|
rm -f *.o spell edit
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean all lint clang-tidy cppcheck clang-format
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
#include "dictionary.h"
|
#include "dictionary.h"
|
||||||
#include "word.h"
|
#include "word.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <filesystem>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ using std::vector;
|
||||||
Dictionary::Dictionary() {}
|
Dictionary::Dictionary() {}
|
||||||
|
|
||||||
bool Dictionary::contains(const string &word) const {
|
bool Dictionary::contains(const string &word) const {
|
||||||
int l = word.length();
|
auto l = word.length();
|
||||||
Word w = Word(word);
|
Word w = Word(word);
|
||||||
if (std::find(this->words[l].begin(), this->words[l].end(), w) !=
|
if (std::find(this->words[l].begin(), this->words[l].end(), w) !=
|
||||||
std::end(this->words[l])) {
|
std::end(this->words[l])) {
|
||||||
|
@ -22,14 +22,77 @@ bool Dictionary::contains(const string &word) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> Dictionary::get_suggestions(const string &word) const {
|
std::vector<string> Dictionary::get_suggestions(const string &word) const {
|
||||||
vector<string> suggestions;
|
vector<string> suggestions;
|
||||||
// add_trigram_suggestions(suggestions, word);
|
add_trigram_suggestions(suggestions, word);
|
||||||
// rank_suggestions(suggestions, word);
|
rank_suggestions(suggestions, word);
|
||||||
// trim_suggestions(suggestions);
|
trim_suggestions(suggestions, word);
|
||||||
return suggestions;
|
return suggestions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const {
|
||||||
|
// Get trigrams of the input word
|
||||||
|
Word input_word(word);
|
||||||
|
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
|
||||||
|
|
||||||
|
// Iterate through all words in the dictionary
|
||||||
|
for (int i = 0; i < MAXLEN; ++i) {
|
||||||
|
for (const Word &dict_word : words[i]) {
|
||||||
|
// Get the trigrams of the dictionary word
|
||||||
|
const std::vector<std::string> &dict_word_trigrams =
|
||||||
|
dict_word.get_triagrams();
|
||||||
|
|
||||||
|
// Count how many trigrams match
|
||||||
|
unsigned int match_count = dict_word.get_matches(input_trigrams);
|
||||||
|
|
||||||
|
// If there are any matches, add the word to suggestions
|
||||||
|
if (match_count > 0) {
|
||||||
|
suggestions.push_back(dict_word.get_word());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const {
|
||||||
|
// Get trigrams of the input word
|
||||||
|
Word input_word(word);
|
||||||
|
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
|
||||||
|
|
||||||
|
// Sort suggestions based on the number of matching trigrams
|
||||||
|
std::sort(suggestions.begin(), suggestions.end(),
|
||||||
|
[&](const std::string &a, const std::string &b) {
|
||||||
|
Word word_a(a);
|
||||||
|
Word word_b(b);
|
||||||
|
unsigned int match_a = word_a.get_matches(input_trigrams);
|
||||||
|
unsigned int match_b = word_b.get_matches(input_trigrams);
|
||||||
|
return match_a >
|
||||||
|
match_b; // Sort in descending order of match count
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const {
|
||||||
|
// Remove duplicates using a set
|
||||||
|
std::set<std::string> unique_suggestions(suggestions.begin(),
|
||||||
|
suggestions.end());
|
||||||
|
suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
|
||||||
|
|
||||||
|
// Remove the input word from the suggestions list (if present)
|
||||||
|
suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
|
||||||
|
suggestions.end());
|
||||||
|
|
||||||
|
// Example: Remove any suggestions that are too short
|
||||||
|
suggestions.erase(
|
||||||
|
std::remove_if(suggestions.begin(), suggestions.end(),
|
||||||
|
[](const std::string &s) {
|
||||||
|
return s.length() <
|
||||||
|
3; // Remove words shorter than 3 characters
|
||||||
|
}),
|
||||||
|
suggestions.end());
|
||||||
|
}
|
||||||
|
|
||||||
int Dictionary::spit(path p) {
|
int Dictionary::spit(path p) {
|
||||||
std::ofstream file(p);
|
std::ofstream file(p);
|
||||||
|
|
||||||
|
@ -60,9 +123,9 @@ int Dictionary::slurp(path p) {
|
||||||
|
|
||||||
std::string line;
|
std::string line;
|
||||||
while (std::getline(file, line)) {
|
while (std::getline(file, line)) {
|
||||||
if (line.size() > MAXLEN)
|
// Words larger than max gets placed in the topmost bucket
|
||||||
continue;
|
words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back(
|
||||||
words[line.size()].push_back(Word(line));
|
Word(line));
|
||||||
}
|
}
|
||||||
|
|
||||||
file.close();
|
file.close();
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#ifndef DICTIONARY_H
|
#pragma once
|
||||||
#define DICTIONARY_H
|
|
||||||
|
|
||||||
#include "word.h"
|
#include "word.h"
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
@ -8,19 +7,23 @@
|
||||||
|
|
||||||
#define MAXLEN 30
|
#define MAXLEN 30
|
||||||
|
|
||||||
using std::vector;
|
// using std::vector;
|
||||||
using std::filesystem::path;
|
using std::filesystem::path;
|
||||||
|
|
||||||
class Dictionary {
|
class Dictionary {
|
||||||
public:
|
public:
|
||||||
Dictionary();
|
Dictionary();
|
||||||
|
void add_trigram_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const;
|
||||||
|
void rank_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const;
|
||||||
|
void trim_suggestions(std::vector<std::string> &suggestions,
|
||||||
|
const std::string &word) const;
|
||||||
bool contains(const std::string &word) const;
|
bool contains(const std::string &word) const;
|
||||||
std::vector<std::string> get_suggestions(const std::string &word) const;
|
std::vector<std::string> get_suggestions(const std::string &word) const;
|
||||||
int slurp(path p);
|
int slurp(path p);
|
||||||
int spit(path p);
|
int spit(path p);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
vector<Word> words[MAXLEN];
|
std::vector<Word> words[MAXLEN];
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
int edit_distance(const std::string& s1, const std::string& s2) {
|
int edit_distance(const std::string &s1, const std::string &s2) {
|
||||||
size_t m = s1.size();
|
size_t m = s1.size();
|
||||||
size_t n = s2.size();
|
size_t n = s2.size();
|
||||||
|
|
||||||
// Create a 2D DP table
|
// Create a 2D DP table
|
||||||
std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1));
|
std::vector<std::vector<size_t>> dp(m + 1, std::vector<size_t>(n + 1));
|
||||||
|
|
||||||
// Fill the base cases
|
// Fill the base cases
|
||||||
for (size_t i = 0; i <= m; ++i)
|
for (size_t i = 0; i <= m; ++i)
|
||||||
|
@ -23,13 +22,14 @@ int edit_distance(const std::string& s1, const std::string& s2) {
|
||||||
if (s1[i - 1] == s2[j - 1]) {
|
if (s1[i - 1] == s2[j - 1]) {
|
||||||
dp[i][j] = dp[i - 1][j - 1]; // No operation needed
|
dp[i][j] = dp[i - 1][j - 1]; // No operation needed
|
||||||
} else {
|
} else {
|
||||||
dp[i][j] = 1 + std::min({dp[i - 1][j], // Deletion
|
dp[i][j] = 1 + std::min({
|
||||||
dp[i][j - 1], // Insertion
|
dp[i - 1][j], // Deletion
|
||||||
dp[i - 1][j - 1] // Substitution
|
dp[i][j - 1], // Insertion
|
||||||
});
|
dp[i - 1][j - 1] // Substitution
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return dp[m][n];
|
return static_cast<int>(dp[m][n]);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,18 +1,17 @@
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Computes the edit distance (Levenshtein distance) between two strings.
|
* @brief Computes the edit distance (Levenshtein distance) between two strings.
|
||||||
*
|
*
|
||||||
* The edit distance is defined as the minimum number of single-character edits
|
* The edit distance is defined as the minimum number of single-character edits
|
||||||
* (insertions, deletions, or substitutions) required to transform one string into the other.
|
* (insertions, deletions, or substitutions) required to transform one string
|
||||||
|
* into the other.
|
||||||
*
|
*
|
||||||
* This implementation uses dynamic programming to compute the distance efficiently.
|
* This implementation uses dynamic programming to compute the distance
|
||||||
|
* efficiently.
|
||||||
*
|
*
|
||||||
* @param s1 The first string.
|
* @param s1 The first string.
|
||||||
* @param s2 The second string.
|
* @param s2 The second string.
|
||||||
* @return The edit distance between the two strings.
|
* @return The edit distance between the two strings.
|
||||||
*/
|
*/
|
||||||
int edit_distance(const std::string& s1, const std::string& s2);
|
int edit_distance(const std::string &s1, const std::string &s2);
|
||||||
|
|
|
@ -8,8 +8,7 @@
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
bool do_test(const std::string& x, const std::string& y, int expected)
|
bool do_test(const std::string &x, const std::string &y, int expected) {
|
||||||
{
|
|
||||||
auto actual = edit_distance(x, y);
|
auto actual = edit_distance(x, y);
|
||||||
if (actual != expected) {
|
if (actual != expected) {
|
||||||
std::cout << "*** WRONG: distance(" << x << ", " << y << ") was "
|
std::cout << "*** WRONG: distance(" << x << ", " << y << ") was "
|
||||||
|
@ -19,8 +18,7 @@ bool do_test(const std::string& x, const std::string& y, int expected)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main() {
|
||||||
{
|
|
||||||
int res = do_test("foobar", "foobar", 0);
|
int res = do_test("foobar", "foobar", 0);
|
||||||
res += do_test("x", "x", 0);
|
res += do_test("x", "x", 0);
|
||||||
res += do_test("baz", "bar", 1);
|
res += do_test("baz", "bar", 1);
|
||||||
|
|
|
@ -32,6 +32,10 @@ Word::Word(const std::string &w) : word(w) {
|
||||||
|
|
||||||
string Word::get_word() const { return string(); }
|
string Word::get_word() const { return string(); }
|
||||||
|
|
||||||
|
vector<std::string> Word::get_triagrams() const {
|
||||||
|
return triagrams;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned int Word::get_matches(const vector<string> &t) const {
|
unsigned int Word::get_matches(const vector<string> &t) const {
|
||||||
unsigned int matches = 0;
|
unsigned int matches = 0;
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,9 @@ class Word {
|
||||||
/** Returns the word */
|
/** Returns the word */
|
||||||
std::string get_word() const;
|
std::string get_word() const;
|
||||||
|
|
||||||
|
/** Returns triagrams */
|
||||||
|
std::vector<std::string> get_triagrams() const;
|
||||||
|
|
||||||
/** Returns how many of the trigrams in t that are present
|
/** Returns how many of the trigrams in t that are present
|
||||||
in this word's trigram vector */
|
in this word's trigram vector */
|
||||||
unsigned int get_matches(const std::vector<std::string> &t) const;
|
unsigned int get_matches(const std::vector<std::string> &t) const;
|
||||||
|
|
Loading…
Reference in a new issue