mirror of
https://github.com/ankidroid/Anki-Android.git
synced 2024-09-20 03:52:15 +02:00
Migrate CsvSniffer.java to kotlin
This commit is contained in:
parent
640ef9fec8
commit
6857229658
@ -43,7 +43,7 @@ permission notice:
|
||||
// Example of class name: "/com/ichi2/anki/UIUtils.kt"
|
||||
// Ensure that it starts with '/' (slash)
|
||||
def source = Source.MAIN
|
||||
def className = "/com/ichi2/libanki/importer/python/CsvSniffer.kt"
|
||||
def className = ""
|
||||
|
||||
enum Source {
|
||||
MAIN("/src/main/java"),
|
||||
|
@ -19,433 +19,378 @@
|
||||
Ported from https://github.com/python/cpython/blob/a74eea238f5baba15797e2e8b570d153bc8690a7/Lib/csv.py#L159
|
||||
|
||||
*/
|
||||
package com.ichi2.libanki.importer.python
|
||||
|
||||
package com.ichi2.libanki.importer.python;
|
||||
|
||||
import android.annotation.SuppressLint;
|
||||
import android.os.Build;
|
||||
|
||||
import com.ichi2.libanki.importer.CsvException;
|
||||
import com.ichi2.utils.HashUtil;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import androidx.annotation.Nullable;
|
||||
import androidx.annotation.RequiresApi;
|
||||
import android.annotation.SuppressLint
|
||||
import android.os.Build
|
||||
import androidx.annotation.RequiresApi
|
||||
import com.ichi2.libanki.importer.CsvException
|
||||
import com.ichi2.utils.HashUtil.HashMapInit
|
||||
import com.ichi2.utils.KotlinCleanup
|
||||
import java.util.*
|
||||
import java.util.regex.Matcher
|
||||
import java.util.regex.Pattern
|
||||
|
||||
@SuppressLint("NonPublicNonStaticFieldName")
|
||||
@RequiresApi(Build.VERSION_CODES.O) // Regex group(str)
|
||||
public class CsvSniffer {
|
||||
@KotlinCleanup("fix IDE lint issues")
|
||||
class CsvSniffer {
|
||||
private val preferred: CharArray
|
||||
|
||||
|
||||
private final char[] preferred;
|
||||
|
||||
|
||||
public CsvSniffer() {
|
||||
init {
|
||||
// in case there is more than one possible delimiter
|
||||
preferred = new char[] {',', '\t', ';', ' ', ':'};
|
||||
preferred = charArrayOf(',', '\t', ';', ' ', ':')
|
||||
}
|
||||
|
||||
|
||||
|
||||
public CsvDialect sniff(String sample, char[] delimiters) {
|
||||
|
||||
List<Character> delimiterList = toList(delimiters);
|
||||
GuessQuoteAndDelimiter result = _guess_quote_and_delimiter(sample, delimiterList);
|
||||
char quotechar = result.quotechar;
|
||||
boolean doublequote = result.doublequote;
|
||||
char delimiter = result.delimiter;
|
||||
boolean skipinitialspace = result.skipinitialspace;
|
||||
|
||||
if (delimiter == '\0') {
|
||||
Guess g = _guess_delimiter(sample, delimiterList);
|
||||
delimiter = g.delimiter;
|
||||
skipinitialspace = g.skipinitialspace;
|
||||
fun sniff(sample: String, delimiters: CharArray?): CsvDialect {
|
||||
val delimiterList = toList(delimiters)
|
||||
val result = _guess_quote_and_delimiter(sample, delimiterList)
|
||||
val quotechar = result.quotechar
|
||||
val doublequote = result.doublequote
|
||||
var delimiter = result.delimiter
|
||||
var skipinitialspace = result.skipinitialspace
|
||||
if (delimiter == '\u0000') {
|
||||
val g = _guess_delimiter(sample, delimiterList)
|
||||
delimiter = g.delimiter
|
||||
skipinitialspace = g.skipinitialspace
|
||||
}
|
||||
|
||||
if (delimiter == '\0') {
|
||||
throw new CsvException("Could not determine delimiter");
|
||||
if (delimiter == '\u0000') {
|
||||
throw CsvException("Could not determine delimiter")
|
||||
}
|
||||
|
||||
CsvDialect dialect = new CsvDialect("sniffed");
|
||||
|
||||
dialect.mDoublequote = doublequote;
|
||||
dialect.mDelimiter = delimiter;
|
||||
@KotlinCleanup("use a scope function")
|
||||
val dialect = CsvDialect("sniffed")
|
||||
dialect.mDoublequote = doublequote
|
||||
dialect.mDelimiter = delimiter
|
||||
// _csv.reader won't accept a quotechar of ''
|
||||
dialect.mQuotechar = quotechar == '\0' ? '"' : quotechar;
|
||||
dialect.mSkipInitialSpace = skipinitialspace;
|
||||
|
||||
return dialect;
|
||||
dialect.mQuotechar = if (quotechar == '\u0000') '"' else quotechar
|
||||
dialect.mSkipInitialSpace = skipinitialspace
|
||||
return dialect
|
||||
}
|
||||
|
||||
|
||||
private List<Character> toList(@Nullable char[] delimiters) {
|
||||
@KotlinCleanup("could be further simplified: return if/else, use delimiters.toList()")
|
||||
private fun toList(delimiters: CharArray?): List<Char> {
|
||||
if (delimiters == null) {
|
||||
return new ArrayList<>(0);
|
||||
return ArrayList(0)
|
||||
}
|
||||
ArrayList<Character> ret = new ArrayList<>(delimiters.length);
|
||||
for (char delimiter : delimiters) {
|
||||
ret.add(delimiter);
|
||||
val ret = ArrayList<Char>(delimiters.size)
|
||||
for (delimiter in delimiters) {
|
||||
ret.add(delimiter)
|
||||
}
|
||||
return ret;
|
||||
return ret
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Looks for text enclosed between two identical quotes
|
||||
* (the probable quotechar) which are preceded and followed
|
||||
* by the same character (the probable delimiter).
|
||||
* For example:
|
||||
* ,'some text',
|
||||
* The quote with the most wins, same with the delimiter.
|
||||
* If there is no quotechar the delimiter can't be determined
|
||||
* this way.
|
||||
* Looks for text enclosed between two identical quotes
|
||||
* (the probable quotechar) which are preceded and followed
|
||||
* by the same character (the probable delimiter).
|
||||
* For example:
|
||||
* ,'some text',
|
||||
* The quote with the most wins, same with the delimiter.
|
||||
* If there is no quotechar the delimiter can't be determined
|
||||
* this way.
|
||||
*/
|
||||
private GuessQuoteAndDelimiter _guess_quote_and_delimiter(String data, List<Character> delimiters) {
|
||||
ArrayList<String> regexes = new ArrayList<>(4);
|
||||
regexes.add("(?<delim>[^\\w\\n\"'])(?<space> ?)(?<quote>[\"']).*?\\k<quote>\\k<delim>"); // ,".*?",
|
||||
regexes.add("(?:^|\\n)(?<quote>[\"']).*?\\k<quote>(?<delim>[^\\w\\n\"'])(?<space> ?)"); // ".*?",
|
||||
regexes.add("(?<delim>[^\\w\\n\"'])(?<space> ?)(?<quote>[\"']).*?\\k<quote>(?:$|\\n)"); // ,".*?"
|
||||
regexes.add("(?:^|\\n)(?<quote>[\"']).*?\\k<quote>(?:$|\\n)"); // ".*?" (no delim, no space)
|
||||
|
||||
|
||||
List<Group> matches = new ArrayList<>();
|
||||
|
||||
for(String regex : regexes) {
|
||||
Pattern p = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL);
|
||||
Matcher m = p.matcher(data);
|
||||
private fun _guess_quote_and_delimiter(data: String, delimiters: List<Char>?): GuessQuoteAndDelimiter {
|
||||
val regexes = ArrayList<String>(4)
|
||||
regexes.add("(?<delim>[^\\w\\n\"'])(?<space> ?)(?<quote>[\"']).*?\\k<quote>\\k<delim>") // ,".*?",
|
||||
regexes.add("(?:^|\\n)(?<quote>[\"']).*?\\k<quote>(?<delim>[^\\w\\n\"'])(?<space> ?)") // ".*?",
|
||||
regexes.add("(?<delim>[^\\w\\n\"'])(?<space> ?)(?<quote>[\"']).*?\\k<quote>(?:$|\\n)") // ,".*?"
|
||||
regexes.add("(?:^|\\n)(?<quote>[\"']).*?\\k<quote>(?:$|\\n)") // ".*?" (no delim, no space)
|
||||
val matches: MutableList<Group> = ArrayList()
|
||||
for (regex in regexes) {
|
||||
val p = Pattern.compile(regex, Pattern.MULTILINE or Pattern.DOTALL)
|
||||
val m = p.matcher(data)
|
||||
while (m.find()) {
|
||||
Group g = new Group();
|
||||
g.delim = getCharOrNull(m, "delim");
|
||||
g.quote = getCharOrNull(m, "quote");
|
||||
g.space = m.group("space");
|
||||
matches.add(g);
|
||||
val g = Group()
|
||||
g.delim = getCharOrNull(m, "delim")
|
||||
g.quote = getCharOrNull(m, "quote")
|
||||
g.space = m.group("space")
|
||||
matches.add(g)
|
||||
}
|
||||
if (!matches.isEmpty()) {
|
||||
break;
|
||||
break
|
||||
}
|
||||
}
|
||||
if (matches.isEmpty()) {
|
||||
return new GuessQuoteAndDelimiter('\0', false, '\0', false);
|
||||
return GuessQuoteAndDelimiter('\u0000', false, '\u0000', false)
|
||||
}
|
||||
|
||||
|
||||
Map<Character, Integer> quotes = HashUtil.HashMapInit(matches.size());
|
||||
Map<Character, Integer> delims = new HashMap<>();
|
||||
int spaces = 0;
|
||||
for (Group m : matches) {
|
||||
char key = m.quote;
|
||||
if (key != '\0') {
|
||||
quotes.put(key, quotes.getOrDefault(key, 0) + 1);
|
||||
val quotes: MutableMap<Char, Int> = HashMapInit(matches.size)
|
||||
val delims: MutableMap<Char, Int> = HashMap()
|
||||
var spaces = 0
|
||||
for (m in matches) {
|
||||
var key = m.quote
|
||||
if (key != '\u0000') {
|
||||
quotes[key] = quotes.getOrDefault(key, 0) + 1
|
||||
}
|
||||
|
||||
key = m.delim;
|
||||
|
||||
if (key != '\0' && (delimiters == null || delimiters.isEmpty() || delimiters.contains(key))) {
|
||||
delims.put(key, delims.getOrDefault(key, 0) + 1);
|
||||
key = m.delim
|
||||
if (key != '\u0000' && (delimiters == null || delimiters.isEmpty() || delimiters.contains(key))) {
|
||||
delims[key] = delims.getOrDefault(key, 0) + 1
|
||||
}
|
||||
|
||||
if (m.space != null && m.space.length() > 0) {
|
||||
spaces += 1;
|
||||
if (m.space != null && m.space!!.length > 0) {
|
||||
spaces += 1
|
||||
}
|
||||
}
|
||||
|
||||
Character quotechar = max(quotes);
|
||||
|
||||
Character delim;
|
||||
boolean skipinitialspace;
|
||||
val quotechar = max(quotes)!!
|
||||
var delim: Char
|
||||
val skipinitialspace: Boolean
|
||||
if (!delims.isEmpty()) {
|
||||
delim = max(delims);
|
||||
skipinitialspace = delims.get(delim) == spaces;
|
||||
delim = max(delims)!!
|
||||
skipinitialspace = delims[delim] == spaces
|
||||
if (delim == '\n') { // most likely a file with a single column
|
||||
delim = '\0';
|
||||
delim = '\u0000'
|
||||
}
|
||||
} else {
|
||||
// there is *no* delimiter, it's a single column of quoted data
|
||||
delim = '\0';
|
||||
skipinitialspace = false;
|
||||
delim = '\u0000'
|
||||
skipinitialspace = false
|
||||
}
|
||||
|
||||
|
||||
// if we see an extra quote between delimiters, we've got a
|
||||
// double quoted format
|
||||
String regex = String.format("((%s)|^)\\W*%s[^%s\\n]*%s[^%s\\n]*%s\\W*((%s)|$)", delim, quotechar, delim, quotechar, delim, quotechar, delim);
|
||||
Pattern dq_regexp = Pattern.compile(regex, Pattern.MULTILINE);
|
||||
|
||||
|
||||
boolean doublequote = dq_regexp.matcher(data).find();
|
||||
|
||||
return new GuessQuoteAndDelimiter(quotechar, doublequote, delim, skipinitialspace);
|
||||
val regex = String.format(
|
||||
"((%s)|^)\\W*%s[^%s\\n]*%s[^%s\\n]*%s\\W*((%s)|$)",
|
||||
delim,
|
||||
quotechar,
|
||||
delim,
|
||||
quotechar,
|
||||
delim,
|
||||
quotechar,
|
||||
delim
|
||||
)
|
||||
val dq_regexp = Pattern.compile(regex, Pattern.MULTILINE)
|
||||
val doublequote = dq_regexp.matcher(data).find()
|
||||
return GuessQuoteAndDelimiter(quotechar, doublequote, delim, skipinitialspace)
|
||||
}
|
||||
|
||||
|
||||
private char getCharOrNull(Matcher m, String delim) {
|
||||
String group = m.group(delim);
|
||||
if (group == null || group.length() == 0) {
|
||||
return '\0';
|
||||
}
|
||||
return group.charAt(0);
|
||||
@KotlinCleanup("method name?! the method can't return null")
|
||||
private fun getCharOrNull(m: Matcher, delim: String): Char {
|
||||
val group = m.group(delim)
|
||||
return if (group == null || group.length == 0) {
|
||||
'\u0000'
|
||||
} else group[0]
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The delimiter /should/ occur the same number of times on
|
||||
* each row. However, due to malformed data, it may not. We don't want
|
||||
* an all or nothing approach, so we allow for small variations in this
|
||||
* number.
|
||||
* 1) build a table of the frequency of each character on every line.
|
||||
* 2) build a table of frequencies of this frequency (meta-frequency?),
|
||||
* e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
|
||||
* 7 times in 2 rows'
|
||||
* 3) use the mode of the meta-frequency to determine the /expected/
|
||||
* frequency for that character
|
||||
* 4) find out how often the character actually meets that goal
|
||||
* 5) the character that best meets its goal is the delimiter
|
||||
* 1) build a table of the frequency of each character on every line.
|
||||
* 2) build a table of frequencies of this frequency (meta-frequency?),
|
||||
* e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
|
||||
* 7 times in 2 rows'
|
||||
* 3) use the mode of the meta-frequency to determine the /expected/
|
||||
* frequency for that character
|
||||
* 4) find out how often the character actually meets that goal
|
||||
* 5) the character that best meets its goal is the delimiter
|
||||
* For performance reasons, the data is evaluated in chunks, so it can
|
||||
* try and evaluate the smallest portion of the data possible, evaluating
|
||||
* additional chunks as necessary.
|
||||
*/
|
||||
private Guess _guess_delimiter(String input, List<Character> delimiters) {
|
||||
private fun _guess_delimiter(input: String, delimiters: List<Char>?): Guess {
|
||||
|
||||
// remove falsey values
|
||||
String[] samples = input.split("\n");
|
||||
List<String> data = new ArrayList<>(samples.length);
|
||||
for (String s : samples) {
|
||||
if (s == null || s.length() == 0) {
|
||||
continue;
|
||||
val samples = input.split("\n").toTypedArray()
|
||||
val data: MutableList<String> = ArrayList(samples.size)
|
||||
for (s in samples) {
|
||||
if (s.length == 0) {
|
||||
continue
|
||||
}
|
||||
data.add(s);
|
||||
data.add(s)
|
||||
}
|
||||
|
||||
char[] ascii = new char[128]; // 7-bit ASCII
|
||||
for(char i = 0; i < 128; i++) {
|
||||
ascii[i] = i;
|
||||
val ascii = CharArray(128) // 7-bit ASCII
|
||||
for (i in 0..127) {
|
||||
ascii[i] = i.toChar()
|
||||
}
|
||||
|
||||
// build frequency tables
|
||||
int chunkLength = Math.min(10, data.size());
|
||||
int iteration = 0;
|
||||
Map<Character, Map<Integer, Integer>> charFrequency = new HashMap<>();
|
||||
Map<Character, Tuple> modes = new HashMap<>();
|
||||
Map<Character, Tuple> delims = new HashMap<>();
|
||||
int start = 0;
|
||||
int end = chunkLength;
|
||||
|
||||
while (start < data.size()) {
|
||||
iteration++;
|
||||
for (String line : data.subList(start, end)) {
|
||||
for (char c : ascii) {
|
||||
Map<Integer, Integer> metaFrequency = charFrequency.getOrDefault(c, new HashMap<>());
|
||||
val chunkLength = Math.min(10, data.size)
|
||||
var iteration = 0
|
||||
val charFrequency: MutableMap<Char, MutableMap<Int, Int>> = HashMap()
|
||||
val modes: MutableMap<Char, Tuple> = HashMap()
|
||||
val delims: MutableMap<Char, Tuple> = HashMap()
|
||||
var start = 0
|
||||
var end = chunkLength
|
||||
while (start < data.size) {
|
||||
iteration++
|
||||
for (line in data.subList(start, end)) {
|
||||
for (c in ascii) {
|
||||
val metaFrequency = charFrequency.getOrDefault(c, HashMap())
|
||||
// must count even if frequency is 0
|
||||
int freq = countInString(line, c);
|
||||
val freq = countInString(line, c)
|
||||
// value is the mode
|
||||
metaFrequency.put(freq, metaFrequency.getOrDefault(freq, 0) + 1);
|
||||
charFrequency.put(c, metaFrequency);
|
||||
metaFrequency[freq] = metaFrequency.getOrDefault(freq, 0) + 1
|
||||
charFrequency[c] = metaFrequency
|
||||
}
|
||||
}
|
||||
for (Map.Entry<Character, Map<Integer, Integer>> e : charFrequency.entrySet()) {
|
||||
char c = e.getKey();
|
||||
Set<Map.Entry<Integer, Integer>> bareList = e.getValue().entrySet();
|
||||
|
||||
List<Tuple> items = new ArrayList<>(bareList.size());
|
||||
|
||||
for (Map.Entry<Integer, Integer> entry : bareList) {
|
||||
items.add(new Tuple(entry));
|
||||
for ((c, value) in charFrequency) {
|
||||
val bareList = value.entries
|
||||
val items: MutableList<Tuple> = ArrayList(bareList.size)
|
||||
for (entry in bareList) {
|
||||
items.add(Tuple(entry))
|
||||
}
|
||||
|
||||
if (items.size() == 1 && items.get(0).second == 0) {
|
||||
continue;
|
||||
if (items.size == 1 && items[0].second == 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
// get the mode of the frequencies
|
||||
if (items.size() > 1) {
|
||||
modes.put(c, maxSecond(items));
|
||||
if (items.size > 1) {
|
||||
val toRemove = maxSecond(items)
|
||||
// adjust the mode - subtract the sum of all
|
||||
// other frequencies
|
||||
Tuple toRemove = modes.get(c);
|
||||
items.remove(toRemove);
|
||||
modes.put(c, new Tuple(toRemove.first, toRemove.second - sumSecond(items)));
|
||||
items.remove(toRemove)
|
||||
modes[c] = Tuple(toRemove!!.first, toRemove.second - sumSecond(items))
|
||||
} else {
|
||||
modes.put(c, items.get(0));
|
||||
modes[c] = items[0]
|
||||
}
|
||||
}
|
||||
|
||||
// build a list of possible delimiters
|
||||
Set<Map.Entry<Character, Tuple>> modeList = modes.entrySet();
|
||||
float total = Math.min(chunkLength * iteration, data.size());
|
||||
val modeList: Set<Map.Entry<Char, Tuple>> = modes.entries
|
||||
val total = Math.min(chunkLength * iteration, data.size).toFloat()
|
||||
// (rows of consistent data) / (number of rows) = 100%
|
||||
double consistency = 1.0;
|
||||
var consistency = 1.0
|
||||
// minimum consistency threshold
|
||||
double threshold = 0.9;
|
||||
val threshold = 0.9
|
||||
while (delims.isEmpty() && consistency >= threshold) {
|
||||
for (Map.Entry<Character, Tuple> entry : modeList) {
|
||||
Tuple value = entry.getValue();
|
||||
for ((key, value) in modeList) {
|
||||
if (value.first > 0 && value.second > 0) {
|
||||
if (((double) value.second / total) >= consistency && (delimiters == null || delimiters.contains(entry.getKey()))) {
|
||||
delims.put(entry.getKey(), value);
|
||||
if (value.second.toDouble() / total >= consistency && (delimiters == null || delimiters.contains(key))) {
|
||||
delims[key] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
consistency -= 0.01;
|
||||
consistency -= 0.01
|
||||
}
|
||||
|
||||
if (delims.size() == 1) {
|
||||
Character delim = new ArrayList<>(delims.keySet()).get(0);
|
||||
boolean skipinitialspace = countInString(data.get(0), delim) == countInString(data.get(0), delim + " ");
|
||||
return new Guess(delim, skipinitialspace);
|
||||
if (delims.size == 1) {
|
||||
val delim = ArrayList(delims.keys)[0]
|
||||
val skipinitialspace = countInString(data[0], delim) == countInString(
|
||||
data[0], "$delim "
|
||||
)
|
||||
return Guess(delim, skipinitialspace)
|
||||
}
|
||||
// analyze another chunkLength lines
|
||||
start = end;
|
||||
end += chunkLength;
|
||||
start = end
|
||||
end += chunkLength
|
||||
}
|
||||
|
||||
if (delims.isEmpty()) {
|
||||
return new Guess('\0', false);
|
||||
return Guess('\u0000', false)
|
||||
}
|
||||
|
||||
// if there's more than one, fall back to a 'preferred' list
|
||||
if (delims.size() > 1) {
|
||||
for (char d : preferred) {
|
||||
if (delims.size > 1) {
|
||||
for (d in preferred) {
|
||||
if (delims.containsKey(d)) {
|
||||
boolean skipinitialspace = countInString(data.get(0), d) == countInString(data.get(0), d + " ");
|
||||
return new Guess(d, skipinitialspace);
|
||||
val skipinitialspace = countInString(data[0], d) == countInString(
|
||||
data[0], "$d "
|
||||
)
|
||||
return Guess(d, skipinitialspace)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nothing else indicates a preference, pick the character that
|
||||
// dominates(?)
|
||||
ArrayList<Map.Entry<Tuple, Character>> items = new ArrayList<>(delims.size());
|
||||
for(Map.Entry<Character, Tuple> i : delims.entrySet()) {
|
||||
items.add(new AbstractMap.SimpleEntry<>(i.getValue(), i.getKey()));
|
||||
val items = ArrayList<Map.Entry<Tuple, Char>>(delims.size)
|
||||
for ((key, value) in delims) {
|
||||
items.add(AbstractMap.SimpleEntry(value, key))
|
||||
}
|
||||
items.sort((o1, o2) -> {
|
||||
int compare = Integer.compare(o1.getKey().first, o2.getKey().first);
|
||||
if (compare != 0) {
|
||||
return compare;
|
||||
items.sortWith(
|
||||
kotlin.Comparator { o1: Map.Entry<Tuple, Char>, o2: Map.Entry<Tuple, Char> ->
|
||||
val compare = o1.key.first.compareTo(o2.key.first)
|
||||
if (compare != 0) {
|
||||
compare
|
||||
} else {
|
||||
o1.key.second.compareTo(o2.key.second)
|
||||
}
|
||||
}
|
||||
return Integer.compare(o1.getKey().second, o2.getKey().second);
|
||||
});
|
||||
char delim = items.get(items.size() - 1).getValue();
|
||||
|
||||
boolean skipinitialspace = countInString(data.get(0), delim) == countInString(data.get(0), delim + " ");
|
||||
return new Guess(delim, skipinitialspace);
|
||||
|
||||
)
|
||||
val delim = items[items.size - 1].value
|
||||
val skipinitialspace = countInString(data[0], delim) == countInString(
|
||||
data[0], "$delim "
|
||||
)
|
||||
return Guess(delim, skipinitialspace)
|
||||
}
|
||||
|
||||
|
||||
private int sumSecond(List<Tuple> items) {
|
||||
int total = 0;
|
||||
for (Tuple item : items) {
|
||||
total += item.second;
|
||||
private fun sumSecond(items: List<Tuple?>): Int {
|
||||
var total = 0
|
||||
for (item in items) {
|
||||
total += item!!.second
|
||||
}
|
||||
return total;
|
||||
return total
|
||||
}
|
||||
|
||||
|
||||
private <T> T max(Map<T, Integer> histogram) {
|
||||
T max = null;
|
||||
int maximum = 0;
|
||||
for (Map.Entry<T, Integer> entry : histogram.entrySet()) {
|
||||
if (entry.getValue() > maximum) {
|
||||
maximum = entry.getValue();
|
||||
max = entry.getKey();
|
||||
private fun <T> max(histogram: Map<T, Int>): T? {
|
||||
var max: T? = null
|
||||
var maximum = 0
|
||||
for ((key, value) in histogram) {
|
||||
if (value > maximum) {
|
||||
maximum = value
|
||||
max = key
|
||||
}
|
||||
}
|
||||
return max;
|
||||
return max
|
||||
}
|
||||
|
||||
|
||||
/** max(items, key = lambda x:x[1]) */
|
||||
private Tuple maxSecond(List<Tuple> items) {
|
||||
/** max(items, key = lambda x:x[1]) */
|
||||
private fun maxSecond(items: List<Tuple?>): Tuple? {
|
||||
// items = [(1,1), (2,1)]
|
||||
// pp(max(items, key = lambda x:x[1]))
|
||||
// (1,1) - the first is picked, so use > max
|
||||
int max = 0;
|
||||
Tuple bestMax = null;
|
||||
for (Tuple item : items) {
|
||||
if (item.second > max) {
|
||||
bestMax = item;
|
||||
max = item.second;
|
||||
var max = 0
|
||||
var bestMax: Tuple? = null
|
||||
for (item in items) {
|
||||
if (item!!.second > max) {
|
||||
bestMax = item
|
||||
max = item.second
|
||||
}
|
||||
}
|
||||
return bestMax;
|
||||
return bestMax
|
||||
}
|
||||
|
||||
|
||||
private static class Tuple {
|
||||
public final int first;
|
||||
public final int second;
|
||||
|
||||
|
||||
public Tuple(Integer key, Integer value) {
|
||||
first = key;
|
||||
second = value;
|
||||
}
|
||||
|
||||
|
||||
public Tuple(Map.Entry<Integer, Integer> entry) {
|
||||
this(entry.getKey(), entry.getValue());
|
||||
}
|
||||
private class Tuple(val first: Int, val second: Int) {
|
||||
constructor(entry: Map.Entry<Int, Int>) : this(entry.key, entry.value) {}
|
||||
}
|
||||
|
||||
private static int countInString(String s, char c) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
if (s.charAt(i) == c) {
|
||||
count++;
|
||||
protected class GuessQuoteAndDelimiter(
|
||||
val quotechar: Char,
|
||||
val doublequote: Boolean,
|
||||
delimiter: Char,
|
||||
skipinitialspace: Boolean
|
||||
) : Guess(delimiter, skipinitialspace)
|
||||
|
||||
@KotlinCleanup("check: values were assigned by the migration tool, seems ok from where class it's used")
|
||||
protected class Group {
|
||||
var quote = 0.toChar()
|
||||
var delim = 0.toChar()
|
||||
var space: String? = null
|
||||
}
|
||||
|
||||
protected open class Guess(val delimiter: Char, val skipinitialspace: Boolean)
|
||||
|
||||
companion object {
|
||||
@JvmStatic
|
||||
private fun countInString(s: String, c: Char): Int {
|
||||
var count = 0
|
||||
for (i in 0 until s.length) {
|
||||
if (s[i] == c) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private static int countInString(String haystack, String needle) {
|
||||
int idx = 0;
|
||||
int count = 0;
|
||||
|
||||
while (idx != -1) {
|
||||
idx = haystack.indexOf(needle, idx);
|
||||
if (idx != -1) {
|
||||
count++;
|
||||
idx += needle.length();
|
||||
@JvmStatic
|
||||
private fun countInString(haystack: String, needle: String): Int {
|
||||
var idx = 0
|
||||
var count = 0
|
||||
while (idx != -1) {
|
||||
idx = haystack.indexOf(needle, idx)
|
||||
if (idx != -1) {
|
||||
count++
|
||||
idx += needle.length
|
||||
}
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
protected static class GuessQuoteAndDelimiter extends Guess {
|
||||
public final char quotechar;
|
||||
public final boolean doublequote;
|
||||
|
||||
|
||||
public GuessQuoteAndDelimiter(char quotechar, boolean doublequote, char delimiter, boolean skipinitialspace) {
|
||||
super(delimiter, skipinitialspace);
|
||||
this.quotechar = quotechar;
|
||||
this.doublequote = doublequote;
|
||||
}
|
||||
}
|
||||
|
||||
protected static class Group {
|
||||
public char quote;
|
||||
public char delim;
|
||||
public String space;
|
||||
}
|
||||
|
||||
protected static class Guess {
|
||||
public final char delimiter;
|
||||
public final boolean skipinitialspace;
|
||||
|
||||
|
||||
public Guess(char delimiter, boolean skipinitialspace) {
|
||||
this.delimiter = delimiter;
|
||||
this.skipinitialspace = skipinitialspace;
|
||||
return count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user