Initial implementation of matcher
This commit is contained in:
parent
70170ea995
commit
7a62bebf76
2 changed files with 77 additions and 10 deletions
|
@ -3,6 +3,7 @@
|
|||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -21,14 +22,77 @@ bool Dictionary::contains(const string &word) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
vector<string> Dictionary::get_suggestions(const string &word) const {
|
||||
std::vector<string> Dictionary::get_suggestions(const string &word) const {
|
||||
vector<string> suggestions;
|
||||
// add_trigram_suggestions(suggestions, word);
|
||||
// rank_suggestions(suggestions, word);
|
||||
// trim_suggestions(suggestions);
|
||||
add_trigram_suggestions(suggestions, word);
|
||||
rank_suggestions(suggestions, word);
|
||||
trim_suggestions(suggestions, word);
|
||||
return suggestions;
|
||||
}
|
||||
|
||||
void Dictionary::add_trigram_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const {
|
||||
// Get trigrams of the input word
|
||||
Word input_word(word);
|
||||
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
|
||||
|
||||
// Iterate through all words in the dictionary
|
||||
for (int i = 0; i < MAXLEN; ++i) {
|
||||
for (const Word &dict_word : words[i]) {
|
||||
// Get the trigrams of the dictionary word
|
||||
const std::vector<std::string> &dict_word_trigrams =
|
||||
dict_word.get_triagrams();
|
||||
|
||||
// Count how many trigrams match
|
||||
unsigned int match_count = dict_word.get_matches(input_trigrams);
|
||||
|
||||
// If there are any matches, add the word to suggestions
|
||||
if (match_count > 0) {
|
||||
suggestions.push_back(dict_word.get_word());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Dictionary::rank_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const {
|
||||
// Get trigrams of the input word
|
||||
Word input_word(word);
|
||||
const std::vector<std::string> &input_trigrams = input_word.get_triagrams();
|
||||
|
||||
// Sort suggestions based on the number of matching trigrams
|
||||
std::sort(suggestions.begin(), suggestions.end(),
|
||||
[&](const std::string &a, const std::string &b) {
|
||||
Word word_a(a);
|
||||
Word word_b(b);
|
||||
unsigned int match_a = word_a.get_matches(input_trigrams);
|
||||
unsigned int match_b = word_b.get_matches(input_trigrams);
|
||||
return match_a >
|
||||
match_b; // Sort in descending order of match count
|
||||
});
|
||||
}
|
||||
|
||||
void Dictionary::trim_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const {
|
||||
// Remove duplicates using a set
|
||||
std::set<std::string> unique_suggestions(suggestions.begin(),
|
||||
suggestions.end());
|
||||
suggestions.assign(unique_suggestions.begin(), unique_suggestions.end());
|
||||
|
||||
// Remove the input word from the suggestions list (if present)
|
||||
suggestions.erase(std::remove(suggestions.begin(), suggestions.end(), word),
|
||||
suggestions.end());
|
||||
|
||||
// Example: Remove any suggestions that are too short
|
||||
suggestions.erase(
|
||||
std::remove_if(suggestions.begin(), suggestions.end(),
|
||||
[](const std::string &s) {
|
||||
return s.length() <
|
||||
3; // Remove words shorter than 3 characters
|
||||
}),
|
||||
suggestions.end());
|
||||
}
|
||||
|
||||
int Dictionary::spit(path p) {
|
||||
std::ofstream file(p);
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#ifndef DICTIONARY_H
|
||||
#define DICTIONARY_H
|
||||
#pragma once
|
||||
|
||||
#include "word.h"
|
||||
#include <filesystem>
|
||||
|
@ -8,19 +7,23 @@
|
|||
|
||||
#define MAXLEN 30
|
||||
|
||||
using std::vector;
|
||||
// using std::vector;
|
||||
using std::filesystem::path;
|
||||
|
||||
class Dictionary {
|
||||
public:
|
||||
Dictionary();
|
||||
void add_trigram_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const;
|
||||
void rank_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const;
|
||||
void trim_suggestions(std::vector<std::string> &suggestions,
|
||||
const std::string &word) const;
|
||||
bool contains(const std::string &word) const;
|
||||
std::vector<std::string> get_suggestions(const std::string &word) const;
|
||||
int slurp(path p);
|
||||
int spit(path p);
|
||||
|
||||
private:
|
||||
vector<Word> words[MAXLEN];
|
||||
std::vector<Word> words[MAXLEN];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue