From 7d93d5dbfaa4d4450883c7b484ecc9929e0bfea7 Mon Sep 17 00:00:00 2001 From: Imbus <> Date: Wed, 20 Nov 2024 17:46:21 +0100 Subject: [PATCH] Lab2 ish --- lab2/Makefile | 24 +++++++++++++ lab2/dictionary.cc | 82 +++++++++++++++++++++++++++++++++++++------ lab2/dictionary.h | 19 +++++++--- lab2/edit_distance.cc | 35 ++++++++++++++++++ lab2/edit_distance.h | 18 ++++++++++ lab2/spell.cc | 31 ++++++++-------- lab2/word.cc | 34 +++++++++++++----- lab2/word.h | 34 ++++++++++-------- 8 files changed, 224 insertions(+), 53 deletions(-) create mode 100644 lab2/Makefile create mode 100644 lab2/edit_distance.cc create mode 100644 lab2/edit_distance.h diff --git a/lab2/Makefile b/lab2/Makefile new file mode 100644 index 0000000..4e354d7 --- /dev/null +++ b/lab2/Makefile @@ -0,0 +1,24 @@ +CXX = g++ +CXXFLAGS = -g3 -Werror -Wall -Wpedantic -Wunused-variable -std=c++17 + +SRC = $(wildcard *.cc) +OBJ = $(SRC:.cc=.o) + +all: spell edit $(OBJ) + +edit: test_edit_distance.o edit_distance.o + @echo "Building & linking $@" + @$(CXX) $(CXXFLAGS) $^ -o $@ + +spell: spell.o word.o dictionary.o + @echo "Building & linking $@" + @$(CXX) $(CXXFLAGS) $^ -o $@ + +%.o:%.cc + @echo "Building $@" + @$(CXX) -c $(CXXFLAGS) $< -o $@ + +clean: + rm -f *.o spell edit + +.PHONY: clean diff --git a/lab2/dictionary.cc b/lab2/dictionary.cc index 79bb69e..d4658e8 100644 --- a/lab2/dictionary.cc +++ b/lab2/dictionary.cc @@ -1,22 +1,82 @@ -#include -#include +#include "dictionary.h" +#include "word.h" +#include +#include #include #include -#include -#include "word.h" -#include "dictionary.h" +#include +#include using std::string; using std::vector; +// using std::filesystem::path; -Dictionary::Dictionary() { +Dictionary::Dictionary() {} + +bool Dictionary::contains(const string &word) const { return true; } + +vector Dictionary::get_suggestions(const string &word) const { + vector suggestions; + // add_trigram_suggestions(suggestions, word); + // rank_suggestions(suggestions, word); + // trim_suggestions(suggestions); + return suggestions; } -bool Dictionary::contains(const string& word) const { - return true; +// Function to generate trigrams from a string +std::vector get_trigrams(const std::string &text) { + std::vector trigrams; + if (text.size() < 3) { + return trigrams; // Return an empty vector if the input is too short + } + + for (size_t i = 0; i <= text.size() - 3; ++i) { + trigrams.push_back( + text.substr(i, 3)); // Extract a substring of length 3 + } + + return trigrams; } -vector Dictionary::get_suggestions(const string& word) const { - vector suggestions; - return suggestions; +int Dictionary::spit(path p) { + std::ofstream file(p); + + if (!file.is_open()) { + std::cerr << "Error opening file! " << std::endl; + return 1; + } + + for (int a = 0; a < 25; a++) { + for (auto &word : words[a]) { + std::vector trias = get_trigrams(word.get_word()); + file << word << " " << trias.size(); + + for (auto tria : trias) { + file << " " << tria; + } + + file << std::endl; + } + } + + file.flush(); + file.close(); + return 0; +} + +int Dictionary::slurp(path p) { + std::ifstream file(p.string()); + + if (!file.is_open()) { + std::cerr << "Error opening file! " << std::endl; + return 1; + } + + std::string line; + while (std::getline(file, line)) { + words[line.size()].push_back(Word(line, get_trigrams(line))); + } + + file.close(); + return 0; } diff --git a/lab2/dictionary.h b/lab2/dictionary.h index b56f143..e543011 100644 --- a/lab2/dictionary.h +++ b/lab2/dictionary.h @@ -1,15 +1,24 @@ #ifndef DICTIONARY_H #define DICTIONARY_H +#include "word.h" +#include #include #include +using std::vector; +using std::filesystem::path; + class Dictionary { -public: - Dictionary(); - bool contains(const std::string& word) const; - std::vector get_suggestions(const std::string& word) const; -private: + public: + Dictionary(); + bool contains(const std::string &word) const; + std::vector get_suggestions(const std::string &word) const; + int slurp(path p); + int spit(path p); + + private: + vector words[25]; }; #endif diff --git a/lab2/edit_distance.cc b/lab2/edit_distance.cc new file mode 100644 index 0000000..905aeb4 --- /dev/null +++ b/lab2/edit_distance.cc @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +int edit_distance(const std::string& s1, const std::string& s2) { + size_t m = s1.size(); + size_t n = s2.size(); + + // Create a 2D DP table + std::vector> dp(m + 1, std::vector(n + 1)); + + // Fill the base cases + for (size_t i = 0; i <= m; ++i) + dp[i][0] = i; // Deletion cost + + for (size_t j = 0; j <= n; ++j) + dp[0][j] = j; // Insertion cost + + // Fill the DP table + for (size_t i = 1; i <= m; ++i) { + for (size_t j = 1; j <= n; ++j) { + if (s1[i - 1] == s2[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; // No operation needed + } else { + dp[i][j] = 1 + std::min({dp[i - 1][j], // Deletion + dp[i][j - 1], // Insertion + dp[i - 1][j - 1] // Substitution + }); + } + } + } + + return dp[m][n]; +} diff --git a/lab2/edit_distance.h b/lab2/edit_distance.h new file mode 100644 index 0000000..71765ad --- /dev/null +++ b/lab2/edit_distance.h @@ -0,0 +1,18 @@ +#include +#include +#include +#include + +/** + * @brief Computes the edit distance (Levenshtein distance) between two strings. + * + * The edit distance is defined as the minimum number of single-character edits + * (insertions, deletions, or substitutions) required to transform one string into the other. + * + * This implementation uses dynamic programming to compute the distance efficiently. + * + * @param s1 The first string. + * @param s2 The second string. + * @return The edit distance between the two strings. + */ +int edit_distance(const std::string& s1, const std::string& s2); diff --git a/lab2/spell.cc b/lab2/spell.cc index dfb820b..04c3d39 100644 --- a/lab2/spell.cc +++ b/lab2/spell.cc @@ -1,18 +1,17 @@ +#include "dictionary.h" +#include +#include #include #include -#include #include -#include -#include "dictionary.h" -using std::string; -using std::vector; using std::cin; using std::cout; using std::endl; +using std::string; +using std::vector; -void check_word(const string& word, const Dictionary& dict) -{ +void check_word(const string &word, const Dictionary &dict) { if (dict.contains(word)) { cout << "Correct." << endl; } else { @@ -21,18 +20,22 @@ void check_word(const string& word, const Dictionary& dict) cout << "Wrong, no suggestions." << endl; } else { cout << "Wrong. Suggestions:" << endl; - for (const auto& w : suggestions) { + for (const auto &w : suggestions) { cout << " " << w << endl; } } } } + int main() { - Dictionary dict; - string word; - while (cin >> word) { - transform(word.begin(), word.end(), word.begin(), ::tolower); - check_word(word, dict); - } + Dictionary dict; + string word; + dict.slurp(std::filesystem::path("/usr/share/dict/words")); + // dict.spit(std::filesystem::path("words.txt")); + + // while (cin >> word) { + // transform(word.begin(), word.end(), word.begin(), ::tolower); + // check_word(word, dict); + // } return 0; } diff --git a/lab2/word.cc b/lab2/word.cc index 20b9417..870d6a2 100644 --- a/lab2/word.cc +++ b/lab2/word.cc @@ -1,16 +1,34 @@ +#include "word.h" +#include #include #include -#include "word.h" -using std::vector; using std::string; +using std::vector; -Word::Word(const string& w, const vector& t) {} - -string Word::get_word() const { - return string(); +Word::Word(const string &w, const vector &t) : word(w), triagrams(t) { + std::sort(triagrams.begin(), triagrams.end()); } -unsigned int Word::get_matches(const vector& t) const { - return 0; +string Word::get_word() const { return string(); } + +unsigned int Word::get_matches(const vector &t) const { + unsigned int matches = 0; + + for (const auto &triagram : t) { + if (std::binary_search(triagrams.begin(), triagrams.end(), triagram)) { + ++matches; + } + } + return matches; +} + +std::ostream &operator<<(std::ostream &out, const Word &w) { + auto space = string(" "); + out << w.word; + out << space; + for (const auto &tria : w.triagrams) { + out << space << tria; + } + return out; } diff --git a/lab2/word.h b/lab2/word.h index 9fb0716..7d07c8b 100644 --- a/lab2/word.h +++ b/lab2/word.h @@ -1,21 +1,25 @@ -#ifndef WORD_H -#define WORD_H +#pragma once #include #include +/* + * Contains a word and its triagrams + */ class Word { -public: - /* Creates a word w with the sorted trigrams t */ - Word(const std::string& w, const std::vector& t); - - /* Returns the word */ - std::string get_word() const; - - /* Returns how many of the trigrams in t that are present - in this word's trigram vector */ - unsigned int get_matches(const std::vector& t) const; -private: -}; + public: + /** Creates a word w with the sorted trigrams t */ + Word(const std::string &w, const std::vector &t); -#endif + /** Returns the word */ + std::string get_word() const; + + /** Returns how many of the trigrams in t that are present + in this word's trigram vector */ + unsigned int get_matches(const std::vector &t) const; + + private: + const std::string word; + const std::vector triagrams; + friend std::ostream &operator<<(std::ostream &out, const Word &o); +};