mirror of
https://github.com/florisboard/florisboard.git
synced 2024-09-19 19:42:20 +02:00
Set up base for Kotlin/C++ interoperability
This commit is contained in:
parent
64040f0407
commit
be1fc710ed
1
.gitignore
vendored
1
.gitignore
vendored
@ -43,3 +43,4 @@ crowdin.properties
|
||||
|
||||
# AndroidX Room schema JSONs
|
||||
/app/schemas/
|
||||
/app/.cxx/
|
||||
|
@ -38,12 +38,26 @@ android {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags("-std=c++17", "-fexceptions", "-frtti")
|
||||
arguments("-DANDROID_STL=c++_static")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buildFeatures {
|
||||
viewBinding = true
|
||||
}
|
||||
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path("src/main/cpp/CMakeLists.txt")
|
||||
version = "3.18.1"
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
named("debug").configure {
|
||||
applicationIdSuffix = ".debug"
|
||||
|
39
app/src/main/cpp/CMakeLists.txt
Normal file
39
app/src/main/cpp/CMakeLists.txt
Normal file
@ -0,0 +1,39 @@
|
||||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html
|
||||
|
||||
cmake_minimum_required(VERSION 3.18.1)
|
||||
|
||||
project("florisboard")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
add_subdirectory(ime/nlp)
|
||||
|
||||
add_library(
|
||||
# Name
|
||||
florisboard-native
|
||||
|
||||
# Type
|
||||
SHARED
|
||||
|
||||
# Sources
|
||||
native-lib.cpp
|
||||
dev_patrickgold_florisboard_ime_nlp_SuggestionList.cpp
|
||||
)
|
||||
|
||||
find_library(
|
||||
# Save to var
|
||||
log-lib
|
||||
|
||||
# Original name
|
||||
log
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
# Destination
|
||||
florisboard-native
|
||||
|
||||
# Sources
|
||||
${log-lib}
|
||||
ime-nlp
|
||||
)
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <jni.h>
|
||||
#include "ime/nlp/staged_suggestion_list.h"
|
||||
|
||||
using namespace ime::nlp;
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeInitialize(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jint max_size) {
|
||||
auto *stagedSuggestionList = new SuggestionList(max_size);
|
||||
return reinterpret_cast<jlong>(stagedSuggestionList);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeDispose(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
delete suggestionList;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeAdd(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jstring word,
|
||||
jint freq) {
|
||||
const char *cWord = env->GetStringUTFChars(word, nullptr);
|
||||
const word_t stdWord = word_t(cWord);
|
||||
env->ReleaseStringUTFChars(word, cWord);
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->add(stdWord, freq);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeClear(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
suggestionList->clear();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jboolean JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeContains(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jstring element) {
|
||||
const char *cWord = env->GetStringUTFChars(element, nullptr);
|
||||
const word_t stdWord = word_t(cWord);
|
||||
env->ReleaseStringUTFChars(element, cWord);
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return suggestionList->containsWord(stdWord);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeGetOrNull(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr,
|
||||
jint index) {
|
||||
auto *suggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
WeightedToken *weightedToken = suggestionList->get(index);
|
||||
if (weightedToken == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return env->NewStringUTF(weightedToken->data.c_str());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_nlp_SuggestionList_00024Companion_nativeSize(
|
||||
JNIEnv *env,
|
||||
jobject thiz,
|
||||
jlong native_ptr) {
|
||||
auto *stagedSuggestionList = reinterpret_cast<SuggestionList *>(native_ptr);
|
||||
return stagedSuggestionList->size();
|
||||
}
|
0
app/src/main/cpp/ime/dummy
Normal file
0
app/src/main/cpp/ime/dummy
Normal file
13
app/src/main/cpp/ime/nlp/CMakeLists.txt
Normal file
13
app/src/main/cpp/ime/nlp/CMakeLists.txt
Normal file
@ -0,0 +1,13 @@
|
||||
add_library(
|
||||
# Name
|
||||
ime-nlp
|
||||
|
||||
# Headers
|
||||
nlp.h
|
||||
token.h
|
||||
staged_suggestion_list.h
|
||||
|
||||
# Sources
|
||||
token.cpp
|
||||
staged_suggestion_list.cpp
|
||||
)
|
33
app/src/main/cpp/ime/nlp/nlp.h
Normal file
33
app/src/main/cpp/ime/nlp/nlp.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_NLP_H
|
||||
#define FLORISBOARD_NLP_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
typedef std::string word_t;
|
||||
typedef int16_t freq_t;
|
||||
|
||||
const freq_t FREQ_MIN = 0x00;
|
||||
const freq_t FREQ_MAX = 0xFF;
|
||||
const freq_t FREQ_POSSIBLY_OFFENSIVE = 0x01;
|
||||
|
||||
} // namespace ime::nlp
|
||||
|
||||
#endif // FLORISBOARD_NLP_H
|
108
app/src/main/cpp/ime/nlp/staged_suggestion_list.cpp
Normal file
108
app/src/main/cpp/ime/nlp/staged_suggestion_list.cpp
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "staged_suggestion_list.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
using namespace ime::nlp;
|
||||
|
||||
SuggestionList::SuggestionList(size_t _maxSize) :
|
||||
maxSize(_maxSize), internalSize(0), internalArray(new WeightedToken*[_maxSize])
|
||||
{
|
||||
// Initialize the internal array to null pointers
|
||||
for (size_t n = 0; n < maxSize; n++) {
|
||||
internalArray[n] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
SuggestionList::~SuggestionList() {
|
||||
delete[] internalArray;
|
||||
}
|
||||
|
||||
bool SuggestionList::add(word_t word, freq_t freq) {
|
||||
auto entryIndex = indexOfWord(word);
|
||||
if (entryIndex.has_value()) {
|
||||
// Word exists already
|
||||
auto entry = get(entryIndex.value());
|
||||
if (entry->freq < freq) {
|
||||
// Need to update freq
|
||||
entry->freq = freq;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (internalSize < maxSize) {
|
||||
internalArray[internalSize++] = new WeightedToken(std::move(word), freq);
|
||||
} else {
|
||||
WeightedToken *last = internalArray[internalSize - 1];
|
||||
if (last->freq < freq) {
|
||||
internalArray[internalSize - 1] = new WeightedToken(std::move(word), freq);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::sort(internalArray, internalArray + internalSize, std::greater<>());
|
||||
return true;
|
||||
}
|
||||
|
||||
void SuggestionList::clear() {
|
||||
for (size_t n = 0; n < internalSize; n++) {
|
||||
delete internalArray[n];
|
||||
internalArray[n] = nullptr;
|
||||
}
|
||||
internalSize = 0;
|
||||
}
|
||||
|
||||
bool SuggestionList::contains(WeightedToken &element) {
|
||||
return indexOf(element).has_value();
|
||||
}
|
||||
|
||||
bool SuggestionList::containsWord(const word_t &word) {
|
||||
return indexOfWord(word).has_value();
|
||||
}
|
||||
|
||||
WeightedToken *SuggestionList::get(size_t index) {
|
||||
if (index < 0 || index >= maxSize) return nullptr;
|
||||
return internalArray[index];
|
||||
}
|
||||
|
||||
std::optional<size_t> SuggestionList::indexOf(WeightedToken &element) {
|
||||
for (size_t n = 0; n < internalSize; n++) {
|
||||
if (element == *internalArray[n]) {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::optional<size_t> SuggestionList::indexOfWord(const word_t &word) {
|
||||
for (size_t n = 0; n < internalSize; n++) {
|
||||
if (word == internalArray[n]->data) {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool SuggestionList::isEmpty() const {
|
||||
return internalSize == 0;
|
||||
}
|
||||
|
||||
size_t SuggestionList::size() const {
|
||||
return internalSize;
|
||||
}
|
48
app/src/main/cpp/ime/nlp/staged_suggestion_list.h
Normal file
48
app/src/main/cpp/ime/nlp/staged_suggestion_list.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_STAGED_SUGGESTION_LIST_H
|
||||
#define FLORISBOARD_STAGED_SUGGESTION_LIST_H
|
||||
|
||||
#include <optional>
|
||||
#include "token.h"
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
class SuggestionList {
|
||||
public:
|
||||
SuggestionList(size_t _maxSize);
|
||||
~SuggestionList();
|
||||
|
||||
bool add(word_t word, freq_t freq);
|
||||
void clear();
|
||||
bool contains(WeightedToken &element);
|
||||
bool containsWord(const word_t &word);
|
||||
WeightedToken* get(size_t index);
|
||||
std::optional<size_t> indexOf(WeightedToken &element);
|
||||
std::optional<size_t> indexOfWord(const word_t &word);
|
||||
bool isEmpty() const;
|
||||
size_t size() const;
|
||||
|
||||
private:
|
||||
WeightedToken** internalArray;
|
||||
size_t internalSize;
|
||||
size_t maxSize;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif //FLORISBOARD_STAGED_SUGGESTION_LIST_H
|
57
app/src/main/cpp/ime/nlp/token.cpp
Normal file
57
app/src/main/cpp/ime/nlp/token.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "token.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
using namespace ime::nlp;
|
||||
|
||||
Token::Token(word_t _data) : data(std::move(_data)) {}
|
||||
|
||||
bool operator==(const Token &t1, const Token &t2) {
|
||||
return t1.data == t2.data;
|
||||
}
|
||||
|
||||
bool operator!=(const Token &t1, const Token &t2) {
|
||||
return t1.data != t2.data;
|
||||
}
|
||||
|
||||
WeightedToken::WeightedToken(word_t _data, freq_t _freq) : Token(std::move(_data)), freq(_freq) {}
|
||||
|
||||
bool operator==(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.data == t2.data && t1.freq == t2.freq;
|
||||
}
|
||||
|
||||
bool operator!=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.data != t2.data || t1.freq != t2.freq;
|
||||
}
|
||||
|
||||
bool operator<(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq < t2.freq;
|
||||
}
|
||||
|
||||
bool operator<=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq <= t2.freq;
|
||||
}
|
||||
|
||||
bool operator>(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq > t2.freq;
|
||||
}
|
||||
|
||||
bool operator>=(const WeightedToken &t1, const WeightedToken &t2) {
|
||||
return t1.freq >= t2.freq;
|
||||
}
|
49
app/src/main/cpp/ime/nlp/token.h
Normal file
49
app/src/main/cpp/ime/nlp/token.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef FLORISBOARD_TOKEN_H
|
||||
#define FLORISBOARD_TOKEN_H
|
||||
|
||||
#include "nlp.h"
|
||||
#include <string>
|
||||
|
||||
namespace ime::nlp {
|
||||
|
||||
class Token {
|
||||
public:
|
||||
word_t data;
|
||||
Token(word_t _data);
|
||||
|
||||
friend bool operator==(const Token &t1, const Token &t2);
|
||||
friend bool operator!=(const Token &t1, const Token &t2);
|
||||
};
|
||||
|
||||
class WeightedToken : public Token {
|
||||
public:
|
||||
freq_t freq;
|
||||
WeightedToken(word_t _data, freq_t _freq);
|
||||
|
||||
friend bool operator==(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator!=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator<(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator<=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator>(const WeightedToken &t1, const WeightedToken &t2);
|
||||
friend bool operator>=(const WeightedToken &t1, const WeightedToken &t2);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // FLORISBOARD_TOKEN_H
|
10
app/src/main/cpp/native-lib.cpp
Normal file
10
app/src/main/cpp/native-lib.cpp
Normal file
@ -0,0 +1,10 @@
|
||||
#include <jni.h>
|
||||
#include <string>
|
||||
|
||||
extern "C" JNIEXPORT jstring JNICALL
|
||||
Java_dev_patrickgold_florisboard_ime_core_FlorisBoard_jniHelloWorld(
|
||||
JNIEnv* env,
|
||||
jobject /* this */) {
|
||||
std::string hello = "Hello from C++";
|
||||
return env->NewStringUTF(hello.c_str());
|
||||
}
|
@ -140,10 +140,14 @@ class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardManager
|
||||
textInputManager = TextInputManager.getInstance()
|
||||
mediaInputManager = MediaInputManager.getInstance()
|
||||
clipInputManager = ClipboardInputManager.getInstance()
|
||||
|
||||
System.loadLibrary("florisboard-native")
|
||||
}
|
||||
|
||||
lateinit var asyncExecutor: ExecutorService
|
||||
|
||||
external fun jniHelloWorld(): String
|
||||
|
||||
companion object {
|
||||
@Synchronized
|
||||
fun getInstance(): FlorisBoard {
|
||||
@ -190,7 +194,7 @@ class FlorisBoard : InputMethodService(), LifecycleOwner, FlorisClipboardManager
|
||||
.build()
|
||||
)
|
||||
}*/
|
||||
flogInfo(LogTopic.IMS_EVENTS)
|
||||
flogInfo(LogTopic.IMS_EVENTS) { jniHelloWorld() }
|
||||
serviceLifecycleDispatcher.onServicePreSuperOnCreate()
|
||||
|
||||
imeManager = getSystemService(Context.INPUT_METHOD_SERVICE) as? InputMethodManager
|
||||
|
@ -83,7 +83,7 @@ open class NgramNode(
|
||||
*/
|
||||
fun listSimilarWords(
|
||||
input: String,
|
||||
list: StagedSuggestionList<String, Int>,
|
||||
list: SuggestionList,
|
||||
word: StringBuilder,
|
||||
allowPossiblyOffensive: Boolean,
|
||||
maxEditDistance: Int,
|
||||
@ -100,9 +100,7 @@ open class NgramNode(
|
||||
|| !isPossiblyOffensive)) {
|
||||
// Using shift right instead of divide by 2^(costSum) as it is mathematically the
|
||||
// same but faster.
|
||||
if (list.canAdd(freq shr costSum)) {
|
||||
list.add(word.toString(), freq shr costSum)
|
||||
}
|
||||
list.add(word.toString(), freq shr costSum)
|
||||
}
|
||||
if (pos <= -1) {
|
||||
for (childNode in higherOrderChildren) {
|
||||
@ -152,7 +150,7 @@ open class NgramNode(
|
||||
}
|
||||
}
|
||||
|
||||
fun listAllSameOrderWords(list: StagedSuggestionList<String, Int>, word: StringBuilder, allowPossiblyOffensive: Boolean) {
|
||||
fun listAllSameOrderWords(list: SuggestionList, word: StringBuilder, allowPossiblyOffensive: Boolean) {
|
||||
word.append(char)
|
||||
if (isWord && ((isPossiblyOffensive && allowPossiblyOffensive) || !isPossiblyOffensive)) {
|
||||
if (list.canAdd(freq)) {
|
||||
@ -248,13 +246,13 @@ open class FlorisLanguageModel(
|
||||
}
|
||||
if (splitNode != null) {
|
||||
// Input thus far is valid
|
||||
val wordNodes = StagedSuggestionList<String, Int>(maxTokenCount)
|
||||
val wordNodes = SuggestionList.new(maxTokenCount)
|
||||
val strBuilder = StringBuilder().append(word.substring(0, word.length - 1))
|
||||
splitNode.listAllSameOrderWords(wordNodes, strBuilder, allowPossiblyOffensive)
|
||||
ngramList.addAll(wordNodes)
|
||||
}
|
||||
if (ngramList.size < maxTokenCount) {
|
||||
val wordNodes = StagedSuggestionList<String, Int>(maxTokenCount)
|
||||
val wordNodes = SuggestionList.new(maxTokenCount)
|
||||
val strBuilder = StringBuilder()
|
||||
currentNode.listSimilarWords(word, wordNodes, strBuilder, allowPossiblyOffensive, maxEditDistance)
|
||||
ngramList.addAll(wordNodes)
|
||||
|
@ -1,129 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.nlp
|
||||
|
||||
/**
|
||||
* Abstract interface representing a n-gram of tokens. Each n-gram instance can be assigned a
|
||||
* unique frequency [freq].
|
||||
*/
|
||||
open class Ngram<T : Any, F : Comparable<F>>(_tokens: List<Token<T>>, _freq: F) {
|
||||
companion object {
|
||||
/** Constant order value for unigrams. */
|
||||
const val ORDER_UNIGRAM: Int = 1
|
||||
|
||||
/** Constant order value for bigrams. */
|
||||
const val ORDER_BIGRAM: Int = 2
|
||||
|
||||
/** Constant order value for trigrams. */
|
||||
const val ORDER_TRIGRAM: Int = 3
|
||||
}
|
||||
|
||||
init {
|
||||
if (_tokens.size < ORDER_UNIGRAM) {
|
||||
throw Exception("A n-gram must contain at least 1 token!")
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A list of tokens for this n-gram. The length of this list is guaranteed to be matching
|
||||
* [order].
|
||||
*/
|
||||
val tokens: List<Token<T>> = _tokens
|
||||
|
||||
/**
|
||||
* The frequency value of this n-gram.
|
||||
*/
|
||||
val freq: F = _freq
|
||||
|
||||
/**
|
||||
* The order of this n-gram (1, 2, 3, ...).
|
||||
*/
|
||||
val order: Int
|
||||
get() = tokens.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract interface representing a token used in [Ngram].
|
||||
*/
|
||||
open class Token<T : Any>(_data: T) {
|
||||
/**
|
||||
* The data of this token.
|
||||
*/
|
||||
val data: T = _data
|
||||
|
||||
override fun toString(): String {
|
||||
return "Token(\"$data\")"
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
return data.hashCode()
|
||||
}
|
||||
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (javaClass != other?.javaClass) return false
|
||||
|
||||
other as Token<*>
|
||||
|
||||
if (data != other.data) return false
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as [Token] but allows to add a frequency value [freq].
|
||||
*/
|
||||
open class WeightedToken<T : Any, F : Comparable<F>>(_data: T, _freq: F) : Token<T>(_data) {
|
||||
/**
|
||||
* The frequency of this weighed token.
|
||||
*/
|
||||
val freq: F = _freq
|
||||
|
||||
override fun toString(): String {
|
||||
return "WeightedToken(\"$data\", $freq)"
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
return data.hashCode() + 31 * freq.hashCode()
|
||||
}
|
||||
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (javaClass != other?.javaClass) return false
|
||||
|
||||
other as WeightedToken<*, *>
|
||||
|
||||
if (data != other.data || freq != other.freq) return false
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of tokens carrying [CharSequence] data to a list of [CharSequence].
|
||||
*/
|
||||
fun List<Token<CharSequence>>.toCharSequenceList(): List<CharSequence> {
|
||||
return this.map { it.data }
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of tokens carrying [String] data to a list of [String].
|
||||
*/
|
||||
fun List<Token<String>>.toStringList(): List<String> {
|
||||
return this.map { it.data }
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.nlp
|
||||
|
||||
class StagedSuggestionList<T : Any, F : Comparable<F>>(
|
||||
private val maxSize: Int
|
||||
) : Collection<WeightedToken<T, F>> {
|
||||
private val internalArray: Array<WeightedToken<T, F>?> = Array(maxSize) { null }
|
||||
private var internalSize: Int = 0
|
||||
|
||||
override val size: Int
|
||||
get() = internalSize
|
||||
|
||||
fun add(token: T, freq: F): Boolean {
|
||||
if (internalSize < maxSize) {
|
||||
internalArray[internalSize++] = WeightedToken(token, freq)
|
||||
internalArray.sortByDescending { it?.freq }
|
||||
return true
|
||||
} else {
|
||||
if (internalArray.last()!!.freq < freq) {
|
||||
internalArray[internalArray.lastIndex] = WeightedToken(token, freq)
|
||||
internalArray.sortByDescending { it?.freq }
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
fun canAdd(freq: F): Boolean {
|
||||
return internalSize < maxSize || internalArray.last()!!.freq < freq
|
||||
}
|
||||
|
||||
fun clear() {
|
||||
for (n in internalArray.indices) {
|
||||
internalArray[n] = null
|
||||
}
|
||||
internalSize = 0
|
||||
}
|
||||
|
||||
override fun contains(element: WeightedToken<T, F>): Boolean = internalArray.contains(element)
|
||||
|
||||
override fun containsAll(elements: Collection<WeightedToken<T, F>>): Boolean {
|
||||
elements.forEach { if (!contains(it)) return false }
|
||||
return true
|
||||
}
|
||||
|
||||
@Throws(IndexOutOfBoundsException::class)
|
||||
operator fun get(index: Int): WeightedToken<T, F> {
|
||||
val element = getOrNull(index)
|
||||
if (element == null) {
|
||||
throw IndexOutOfBoundsException("The specified index $index is not within the bounds of this list!")
|
||||
} else {
|
||||
return element
|
||||
}
|
||||
}
|
||||
|
||||
fun getOrNull(index: Int): WeightedToken<T, F>? {
|
||||
return internalArray.getOrNull(index)
|
||||
}
|
||||
|
||||
override fun isEmpty(): Boolean = internalSize <= 0
|
||||
|
||||
override fun iterator(): Iterator<WeightedToken<T, F>> {
|
||||
return StagedIterator(this)
|
||||
}
|
||||
|
||||
class StagedIterator<T : Any, F : Comparable<F>> internal constructor (
|
||||
private val stagedSuggestionList: StagedSuggestionList<T, F>
|
||||
) : Iterator<WeightedToken<T, F>> {
|
||||
var index = 0
|
||||
|
||||
override fun next(): WeightedToken<T, F> = stagedSuggestionList[index++]
|
||||
|
||||
override fun hasNext(): Boolean = stagedSuggestionList.getOrNull(index) != null
|
||||
}
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.nlp
|
||||
|
||||
@JvmInline
|
||||
value class SuggestionList private constructor(
|
||||
private val nativePtr: NativePtr
|
||||
) : Collection<String> {
|
||||
companion object {
|
||||
fun new(maxSize: Int): SuggestionList {
|
||||
val nativePtr = nativeInitialize(maxSize)
|
||||
return SuggestionList(nativePtr)
|
||||
}
|
||||
|
||||
external fun nativeInitialize(maxSize: Int): NativePtr
|
||||
external fun nativeDispose(nativePtr: NativePtr)
|
||||
|
||||
external fun nativeAdd(nativePtr: NativePtr, word: Word, freq: Freq): Boolean
|
||||
external fun nativeClear(nativePtr: NativePtr)
|
||||
external fun nativeContains(nativePtr: NativePtr, element: Word): Boolean
|
||||
external fun nativeGetOrNull(nativePtr: NativePtr, index: Int): Word?
|
||||
external fun nativeSize(nativePtr: NativePtr): Int
|
||||
}
|
||||
|
||||
override val size: Int
|
||||
get() = nativeSize(nativePtr)
|
||||
|
||||
fun add(word: Word, freq: Freq): Boolean {
|
||||
return nativeAdd(nativePtr, word, freq)
|
||||
}
|
||||
|
||||
fun clear() {
|
||||
nativeClear(nativePtr)
|
||||
}
|
||||
|
||||
override fun contains(element: Word): Boolean {
|
||||
return nativeContains(nativePtr, element)
|
||||
}
|
||||
|
||||
override fun containsAll(elements: Collection<Word>): Boolean {
|
||||
elements.forEach { if (!contains(it)) return false }
|
||||
return true
|
||||
}
|
||||
|
||||
@Throws(IndexOutOfBoundsException::class)
|
||||
operator fun get(index: Int): Word {
|
||||
val element = getOrNull(index)
|
||||
if (element == null) {
|
||||
throw IndexOutOfBoundsException("The specified index $index is not within the bounds of this list!")
|
||||
} else {
|
||||
return element
|
||||
}
|
||||
}
|
||||
|
||||
fun getOrNull(index: Int): Word? {
|
||||
return nativeGetOrNull(nativePtr, index)
|
||||
}
|
||||
|
||||
override fun isEmpty(): Boolean = size <= 0
|
||||
|
||||
override fun iterator(): Iterator<Word> {
|
||||
return SuggestionListIterator(this)
|
||||
}
|
||||
|
||||
fun dispose() {
|
||||
nativeDispose(nativePtr)
|
||||
}
|
||||
|
||||
class SuggestionListIterator internal constructor (
|
||||
private val suggestionList: SuggestionList
|
||||
) : Iterator<Word> {
|
||||
var index = 0
|
||||
|
||||
override fun next(): Word = suggestionList[index++]
|
||||
|
||||
override fun hasNext(): Boolean = suggestionList.getOrNull(index) != null
|
||||
}
|
||||
}
|
23
app/src/main/java/dev/patrickgold/florisboard/ime/nlp/nlp.kt
Normal file
23
app/src/main/java/dev/patrickgold/florisboard/ime/nlp/nlp.kt
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Patrick Goldinger
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package dev.patrickgold.florisboard.ime.nlp
|
||||
|
||||
typealias NativePtr = Long
|
||||
typealias Word = String
|
||||
typealias Freq = Int
|
||||
|
||||
const val NATIVE_NULLPTR = 0
|
@ -2,8 +2,7 @@ package dev.patrickgold.florisboard.ime.text.nlp
|
||||
|
||||
import dev.patrickgold.florisboard.ime.nlp.NgramNode
|
||||
import dev.patrickgold.florisboard.ime.nlp.NgramTree
|
||||
import dev.patrickgold.florisboard.ime.nlp.StagedSuggestionList
|
||||
import dev.patrickgold.florisboard.ime.nlp.WeightedToken
|
||||
import dev.patrickgold.florisboard.ime.nlp.SuggestionList
|
||||
import org.hamcrest.CoreMatchers.`is`
|
||||
import org.hamcrest.CoreMatchers.nullValue
|
||||
import org.hamcrest.MatcherAssert.assertThat
|
||||
@ -49,11 +48,11 @@ class NgramNodeTest {
|
||||
|
||||
@Test
|
||||
fun listAllSameOrderWords_returnsCorrectList_forGivenPrefix() {
|
||||
val words = StagedSuggestionList<String, Int>(4)
|
||||
val words = SuggestionList<String, Int>(4)
|
||||
ngramTreeToBeTested.higherOrderChildren['t'].listAllSameOrderWords(words, true)
|
||||
assertThat(
|
||||
words,
|
||||
`is`(StagedSuggestionList<String, Int>(4).apply {
|
||||
`is`(SuggestionList<String, Int>(4).apply {
|
||||
add("the", 255)
|
||||
add("them", 230)
|
||||
add("to", 220)
|
||||
|
Loading…
Reference in New Issue
Block a user