Initial implementation of matcher

This commit is contained in:
Imbus 2024-11-21 08:50:09 +01:00
parent 70170ea995
commit 7a62bebf76
2 changed files with 77 additions and 10 deletions

View file

@ -3,6 +3,7 @@
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <set>
#include <string> #include <string>
#include <vector> #include <vector>
@ -21,14 +22,77 @@ bool Dictionary::contains(const string &word) const {
return false; return false;
} }
vector<string> Dictionary::get_suggestions(const string &word) const { std::vector<string> Dictionary::get_suggestions(const string &word) const {
vector<string> suggestions; vector<string> suggestions;
// add_trigram_suggestions(suggestions, word); add_trigram_suggestions(suggestions, word);
// rank_suggestions(suggestions, word); rank_suggestions(suggestions, word);
// trim_suggestions(suggestions); trim_suggestions(suggestions, word);
return suggestions; return suggestions;
} }
void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Get trigrams of the input word
Word input_word(word);
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
// Iterate through all words in the dictionary
for (int i = 0; i < MAXLEN; ++i) {
for (const Word &dict_word : words[i]) {
// Get the trigrams of the dictionary word
const std::vector<std::string> &dict_word_trigrams =
dict_word.get_triagrams();
// Count how many trigrams match
unsigned int match_count = dict_word.get_matches(input_trigrams);
// If there are any matches, add the word to suggestions
if (match_count > 0) {
suggestions.push_back(dict_word.get_word());
}
}
}
}
void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Get trigrams of the input word
Word input_word(word);
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
// Sort suggestions based on the number of matching trigrams
std::sort(suggestions.begin(), suggestions.end(),
[&](const std::string &a, const std::string &b) {
Word word_a(a);
Word word_b(b);
unsigned int match_a = word_a.get_matches(input_trigrams);
unsigned int match_b = word_b.get_matches(input_trigrams);
return match_a >
match_b; // Sort in descending order of match count
});
}
void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const {
// Remove duplicates using a set
std::set<std::string> unique_suggestions(suggestions.begin(),
suggestions.end());
suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
// Remove the input word from the suggestions list (if present)
suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
suggestions.end());
// Example: Remove any suggestions that are too short
suggestions.erase(
std::remove_if(suggestions.begin(), suggestions.end(),
[](const std::string &s) {
return s.length() <
3; // Remove words shorter than 3 characters
}),
suggestions.end());
}
int Dictionary::spit(path p) { int Dictionary::spit(path p) {
std::ofstream file(p); std::ofstream file(p);

View file

@ -1,5 +1,4 @@
#ifndef DICTIONARY_H #pragma once
#define DICTIONARY_H
#include "word.h" #include "word.h"
#include <filesystem> #include <filesystem>
@ -8,19 +7,23 @@
#define MAXLEN 30 #define MAXLEN 30
using std::vector; // using std::vector;
using std::filesystem::path; using std::filesystem::path;
class Dictionary { class Dictionary {
public: public:
Dictionary(); Dictionary();
void add_trigram_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const;
void rank_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const;
void trim_suggestions(std::vector<std::string> &suggestions,
const std::string &word) const;
bool contains(const std::string &word) const; bool contains(const std::string &word) const;
std::vector<std::string> get_suggestions(const std::string &word) const; std::vector<std::string> get_suggestions(const std::string &word) const;
int slurp(path p); int slurp(path p);
int spit(path p); int spit(path p);
private: private:
vector<Word> words[MAXLEN]; std::vector<Word> words[MAXLEN];
}; };
#endif