labs-edaf30/lab2/dictionary.cc

134 lines
4.2 KiB
C++
Raw Normal View History

2024-11-20 17:46:21 +01:00
#include "dictionary.h"
#include "word.h"
#include <algorithm>
2021-10-27 15:15:47 +02:00
#include <fstream>
#include <iostream>
2024-11-21 08:50:09 +01:00
#include <set>
2024-11-20 17:46:21 +01:00
#include <string>
#include <vector>
2021-10-27 15:15:47 +02:00
using std::string;
using std::vector;
2024-11-20 17:46:21 +01:00
Dictionary::Dictionary() {}
2024-11-21 07:48:49 +01:00
bool Dictionary::contains(const string &word) const {
auto l = word.length();
2024-11-21 07:48:49 +01:00
Word w = Word(word);
if (std::find(this->words[l].begin(), this->words[l].end(), w) !=
std::end(this->words[l])) {
return true;
}
return false;
}
2021-10-27 15:15:47 +02:00
2024-11-21 08:50:09 +01:00
std::vector<string> Dictionary::get_suggestions(const string &word) const {
2024-11-20 17:46:21 +01:00
vector<string> suggestions;
2024-11-21 08:50:09 +01:00
add_trigram_suggestions(suggestions, word);
rank_suggestions(suggestions, word);
trim_suggestions(suggestions, word);
2024-11-20 17:46:21 +01:00
return suggestions;
2021-10-27 15:15:47 +02:00
}
2024-11-21 08:50:09 +01:00
void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Get trigrams of the input word
Word input_word(word);
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
// Iterate through all words in the dictionary
for (int i = 0; i < MAXLEN; ++i) {
for (const Word &dict_word : words[i]) {
// Get the trigrams of the dictionary word
const std::vector<std::string> &dict_word_trigrams =
dict_word.get_triagrams();
// Count how many trigrams match
unsigned int match_count = dict_word.get_matches(input_trigrams);
// If there are any matches, add the word to suggestions
if (match_count > 0) {
suggestions.push_back(dict_word.get_word());
}
}
}
}
void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Get trigrams of the input word
Word input_word(word);
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
// Sort suggestions based on the number of matching trigrams
std::sort(suggestions.begin(), suggestions.end(),
[&](const std::string &a, const std::string &b) {
Word word_a(a);
Word word_b(b);
unsigned int match_a = word_a.get_matches(input_trigrams);
unsigned int match_b = word_b.get_matches(input_trigrams);
return match_a >
match_b; // Sort in descending order of match count
});
}
void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Remove duplicates using a set
std::set<std::string> unique_suggestions(suggestions.begin(),
suggestions.end());
suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
// Remove the input word from the suggestions list (if present)
suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
suggestions.end());
// Example: Remove any suggestions that are too short
suggestions.erase(
std::remove_if(suggestions.begin(), suggestions.end(),
[](const std::string &s) {
return s.length() <
3; // Remove words shorter than 3 characters
}),
suggestions.end());
}
2024-11-20 17:46:21 +01:00
int Dictionary::spit(path p) {
std::ofstream file(p);
if (!file.is_open()) {
std::cerr << "Error opening file! " << std::endl;
return 1;
}
2024-11-20 19:11:48 +01:00
for (int a = 0; a < MAXLEN; a++) {
2024-11-20 17:46:21 +01:00
for (auto &word : words[a]) {
2024-11-21 07:48:45 +01:00
file << word;
2024-11-20 17:46:21 +01:00
file << std::endl;
}
}
file.flush();
file.close();
return 0;
2021-10-27 15:15:47 +02:00
}
2024-11-20 17:46:21 +01:00
int Dictionary::slurp(path p) {
std::ifstream file(p.string());
if (!file.is_open()) {
std::cerr << "Error opening file! " << std::endl;
return 1;
}
std::string line;
while (std::getline(file, line)) {
// Words larger than max gets placed in the topmost bucket
words[std::min(line.size(), static_cast<size_t>(MAXLEN) - 1)].push_back(
Word(line));
2024-11-20 17:46:21 +01:00
}
file.close();
return 0;
2021-10-27 15:15:47 +02:00
}