This commit is contained in:
Imbus 2024-11-21 07:48:45 +01:00
parent fdae90ad9f
commit b843fc98e0
3 changed files with 35 additions and 24 deletions

View file

@ -9,7 +9,6 @@
using std::string;
using std::vector;
// using std::filesystem::path;
Dictionary::Dictionary() {}
@ -23,21 +22,6 @@ vector<string> Dictionary::get_suggestions(const string &word) const {
return suggestions;
}
// Function to generate trigrams from a string
std::vector<std::string> get_trigrams(const std::string &text) {
std::vector<std::string> trigrams;
if (text.size() < 3) {
return trigrams; // Return an empty vector if the input is too short
}
for (size_t i = 0; i <= text.size() - 3; ++i) {
trigrams.push_back(
text.substr(i, 3)); // Extract a substring of length 3
}
return trigrams;
}
int Dictionary::spit(path p) {
std::ofstream file(p);
@ -48,13 +32,7 @@ int Dictionary::spit(path p) {
for (int a = 0; a < MAXLEN; a++) {
for (auto &word : words[a]) {
std::vector<std::string> trias = get_trigrams(word.get_word());
file << word << " " << trias.size();
for (auto tria : trias) {
file << " " << tria;
}
file << word;
file << std::endl;
}
}
@ -76,7 +54,7 @@ int Dictionary::slurp(path p) {
while (std::getline(file, line)) {
if (line.size() > MAXLEN)
continue;
words[line.size()].push_back(Word(line, get_trigrams(line)));
words[line.size()].push_back(Word(line));
}
file.close();

View file

@ -1,4 +1,5 @@
#include "word.h"
#include "dictionary.h"
#include <algorithm>
#include <string>
#include <vector>
@ -10,6 +11,25 @@ Word::Word(const string &w, const vector<string> &t) : word(w), triagrams(t) {
std::sort(triagrams.begin(), triagrams.end());
}
std::vector<std::string> get_trigrams(const std::string &text) {
std::vector<std::string> trigrams;
if (text.size() < 3) {
return trigrams; // Return an empty vector if the input is too short
}
for (size_t i = 0; i <= text.size() - 3; ++i) {
trigrams.push_back(
text.substr(i, 3)); // Extract a substring of length 3
}
return trigrams;
}
Word::Word(const std::string &w) : word(w) {
this->triagrams = get_trigrams(w);
std::sort(triagrams.begin(), triagrams.end());
}
string Word::get_word() const { return string(); }
unsigned int Word::get_matches(const vector<string> &t) const {
@ -27,8 +47,15 @@ std::ostream &operator<<(std::ostream &out, const Word &w) {
auto space = string(" ");
out << w.word;
out << space;
out << w.triagrams.size();
for (const auto &tria : w.triagrams) {
out << space << tria;
}
return out;
}
bool operator==(const Word &lhs, const Word &rhs) {
return lhs.word == rhs.word &&
std::equal(lhs.triagrams.begin(), lhs.triagrams.end(),
rhs.triagrams.begin());
}

View file

@ -11,6 +11,9 @@ class Word {
/** Creates a word w with the sorted trigrams t */
Word(const std::string &w, const std::vector<std::string> &t);
/** Creates a word w and derives the triagrams internally */
Word(const std::string &w);
/** Returns the word */
std::string get_word() const;
@ -22,4 +25,7 @@ class Word {
const std::string word;
std::vector<std::string> triagrams;
friend std::ostream &operator<<(std::ostream &out, const Word &o);
friend bool operator==(const Word &lhs, const Word &rhs);
};
bool operator==(const Word &lhs, const Word &rhs);