2024-11-20 17:46:21 +01:00
|
|
|
#include "dictionary.h"
|
|
|
|
#include "word.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include <filesystem>
|
2021-10-27 15:15:47 +02:00
|
|
|
#include <fstream>
|
|
|
|
#include <iostream>
|
2024-11-20 17:46:21 +01:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2021-10-27 15:15:47 +02:00
|
|
|
|
|
|
|
using std::string;
|
|
|
|
using std::vector;
|
2024-11-20 17:46:21 +01:00
|
|
|
// using std::filesystem::path;
|
|
|
|
|
|
|
|
Dictionary::Dictionary() {}
|
|
|
|
|
|
|
|
bool Dictionary::contains(const string &word) const { return true; }
|
2021-10-27 15:15:47 +02:00
|
|
|
|
2024-11-20 17:46:21 +01:00
|
|
|
vector<string> Dictionary::get_suggestions(const string &word) const {
|
|
|
|
vector<string> suggestions;
|
|
|
|
// add_trigram_suggestions(suggestions, word);
|
|
|
|
// rank_suggestions(suggestions, word);
|
|
|
|
// trim_suggestions(suggestions);
|
|
|
|
return suggestions;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|
|
|
|
|
2024-11-20 17:46:21 +01:00
|
|
|
// Function to generate trigrams from a string
|
|
|
|
std::vector<std::string> get_trigrams(const std::string &text) {
|
|
|
|
std::vector<std::string> trigrams;
|
|
|
|
if (text.size() < 3) {
|
|
|
|
return trigrams; // Return an empty vector if the input is too short
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i <= text.size() - 3; ++i) {
|
|
|
|
trigrams.push_back(
|
|
|
|
text.substr(i, 3)); // Extract a substring of length 3
|
|
|
|
}
|
|
|
|
|
|
|
|
return trigrams;
|
|
|
|
}
|
|
|
|
|
|
|
|
int Dictionary::spit(path p) {
|
|
|
|
std::ofstream file(p);
|
|
|
|
|
|
|
|
if (!file.is_open()) {
|
|
|
|
std::cerr << "Error opening file! " << std::endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2024-11-20 19:11:48 +01:00
|
|
|
for (int a = 0; a < MAXLEN; a++) {
|
2024-11-20 17:46:21 +01:00
|
|
|
for (auto &word : words[a]) {
|
|
|
|
std::vector<std::string> trias = get_trigrams(word.get_word());
|
|
|
|
file << word << " " << trias.size();
|
|
|
|
|
|
|
|
for (auto tria : trias) {
|
|
|
|
file << " " << tria;
|
|
|
|
}
|
|
|
|
|
|
|
|
file << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
file.flush();
|
|
|
|
file.close();
|
|
|
|
return 0;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|
|
|
|
|
2024-11-20 17:46:21 +01:00
|
|
|
int Dictionary::slurp(path p) {
|
|
|
|
std::ifstream file(p.string());
|
|
|
|
|
|
|
|
if (!file.is_open()) {
|
|
|
|
std::cerr << "Error opening file! " << std::endl;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string line;
|
|
|
|
while (std::getline(file, line)) {
|
2024-11-20 19:11:48 +01:00
|
|
|
if (line.size() > MAXLEN)
|
|
|
|
continue;
|
2024-11-20 17:46:21 +01:00
|
|
|
words[line.size()].push_back(Word(line, get_trigrams(line)));
|
|
|
|
}
|
|
|
|
|
|
|
|
file.close();
|
|
|
|
return 0;
|
2021-10-27 15:15:47 +02:00
|
|
|
}
|