2024-11-20 17:46:21 +01:00
|
|
|
#include "dictionary.h"
|
2024-11-21 09:47:11 +01:00
|
|
|
#include "edit_distance.h"
|
2024-11-20 17:46:21 +01:00
|
|
|
#include "word.h"
|
|
|
|
#include <algorithm>
|
2021-10-27 15:15:47 +02:00
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
2024-11-21 08:50:09 +01:00
|
|
|
#include <set>
|
2024-11-20 17:46:21 +01:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2021-10-27 15:15:47 +02:00
|
|
|
|
|
|
|
using std::string;
|
|
|
|
using std::vector;
|
2024-11-20 17:46:21 +01:00
|
|
|
|
|
|
|
Dictionary::Dictionary() {}
|
|
|
|
|
2024-11-21 07:48:49 +01:00
|
|
|
bool Dictionary::contains(const string &word) const {
|
2024-11-21 08:46:55 +01:00
|
|
|
auto l = word.length();
|
2024-11-21 07:48:49 +01:00
|
|
|
Word w = Word(word);
|
|
|
|
if (std::find(this->words[l].begin(), this->words[l].end(), w) !=
|
|
|
|
std::end(this->words[l])) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2021-10-27 15:15:47 +02:00
|
|
|
|
2024-11-21 08:50:09 +01:00
|
|
|
std::vector<string> Dictionary::get_suggestions(const string &word) const {
|
2024-11-20 17:46:21 +01:00
|
|
|
vector<string> suggestions;
|
2024-11-21 08:50:09 +01:00
|
|
|
add_trigram_suggestions(suggestions, word);
|
|
|
|
trim_suggestions(suggestions, word);
|
2024-11-21 09:18:48 +01:00
|
|
|
rank_suggestions(suggestions, word);
|
2024-11-20 17:46:21 +01:00
|
|
|
return suggestions;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|
|
|
|
|
2024-11-21 08:50:09 +01:00
|
|
|
void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
|
|
|
|
const std::string &word) const {
|
|
|
|
// Get trigrams of the input word
|
|
|
|
Word input_word(word);
|
|
|
|
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
|
|
|
|
|
|
|
|
// Iterate through all words in the dictionary
|
|
|
|
for (int i = 0; i < MAXLEN; ++i) {
|
|
|
|
for (const Word &dict_word : words[i]) {
|
|
|
|
// Get the trigrams of the dictionary word
|
|
|
|
const std::vector<std::string> &dict_word_trigrams =
|
|
|
|
dict_word.get_triagrams();
|
|
|
|
|
|
|
|
// Count how many trigrams match
|
|
|
|
unsigned int match_count = dict_word.get_matches(input_trigrams);
|
|
|
|
|
|
|
|
// If there are any matches, add the word to suggestions
|
|
|
|
if (match_count > 0) {
|
|
|
|
suggestions.push_back(dict_word.get_word());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
|
|
|
|
const std::string &word) const {
|
2024-11-21 09:47:11 +01:00
|
|
|
// Sort suggestions based on the levenshtein distance
|
2024-11-21 08:50:09 +01:00
|
|
|
std::sort(suggestions.begin(), suggestions.end(),
|
|
|
|
[&](const std::string &a, const std::string &b) {
|
2024-11-21 09:47:11 +01:00
|
|
|
auto dist_a = edit_distance(a, word);
|
|
|
|
auto dist_b = edit_distance(b, word);
|
|
|
|
return dist_a < dist_b;
|
2024-11-21 08:50:09 +01:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
|
|
|
|
const std::string &word) const {
|
|
|
|
// Remove duplicates using a set
|
|
|
|
std::set<std::string> unique_suggestions(suggestions.begin(),
|
|
|
|
suggestions.end());
|
|
|
|
suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
|
|
|
|
|
|
|
|
// Remove the input word from the suggestions list (if present)
|
|
|
|
suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
|
|
|
|
suggestions.end());
|
|
|
|
|
2024-11-21 09:33:20 +01:00
|
|
|
auto l = word.length();
|
|
|
|
std::cout << "WTF" << l << std::endl;
|
|
|
|
|
|
|
|
// Example: Remove any suggestions that are not within 1 string length
|
|
|
|
suggestions.erase(std::remove_if(suggestions.begin(), suggestions.end(),
|
|
|
|
[l](const std::string &s) {
|
|
|
|
return s.length() > (l + 1) ||
|
|
|
|
s.length() < (l - 1);
|
|
|
|
}),
|
|
|
|
suggestions.end());
|
2024-11-21 08:50:09 +01:00
|
|
|
}
|
|
|
|
|
2024-11-20 17:46:21 +01:00
|
|
|
int Dictionary::spit(path p) {
|
|
|
|
std::ofstream file(p);
|
|
|
|
|
|
|
|
if (!file.is_open()) {
|
|
|
|
std::cerr << "Error opening file! " << std::endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2024-11-20 19:11:48 +01:00
|
|
|
for (int a = 0; a < MAXLEN; a++) {
|
2024-11-20 17:46:21 +01:00
|
|
|
for (auto &word : words[a]) {
|
2024-11-21 07:48:45 +01:00
|
|
|
file << word;
|
2024-11-20 17:46:21 +01:00
|
|
|
file << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
file.flush();
|
|
|
|
file.close();
|
|
|
|
return 0;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|
|
|
|
|
2024-11-20 17:46:21 +01:00
|
|
|
int Dictionary::slurp(path p) {
|
|
|
|
std::ifstream file(p.string());
|
|
|
|
|
|
|
|
if (!file.is_open()) {
|
|
|
|
std::cerr << "Error opening file! " << std::endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string line;
|
|
|
|
while (std::getline(file, line)) {
|
2024-11-21 09:18:48 +01:00
|
|
|
if (line.empty())
|
|
|
|
continue;
|
2024-11-21 08:46:55 +01:00
|
|
|
// Words larger than max gets placed in the topmost bucket
|
|
|
|
words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back(
|
|
|
|
Word(line));
|
2024-11-20 17:46:21 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
return 0;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|