| Chunk |
|---|
| Conflicting content |
|---|
return tokens.toArray(tokensArr);
}
<<<<<<< HEAD
=======
public static void main(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException{
if(args.length < 2){
System.err.println("usage: [input] [output-file]");
System.exit(-1);
}
Tokenizer tokenizer = new BigramChineseTokenizer();
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF8"));
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF8"));
String line = null;
while((line = in.readLine()) != null){
String[] tokens = tokenizer.processContent(line);
String s = "";
for (String token : tokens) {
s += token+" ";
}
out.write(s+"\n");
}
out.close();
in.close();
}
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
@Override
public String removeBorderStopWords(String tokenizedText) {
return tokenizedText; |
| Solution content |
|---|
return tokens.toArray(tokensArr);
}
@Override
public String removeBorderStopWords(String tokenizedText) {
return tokenizedText; |
| File |
|---|
| BigramChineseTokenizer.java |
| Developer's decision |
|---|
| Version 1 |
| Kind of conflict |
|---|
| Method declaration |
| Chunk |
|---|
| Conflicting content |
|---|
package ivory.core.tokenize; import ivory.core.Constants; <<<<<<< HEAD ======= >>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9 import java.io.IOException; import java.util.Set; |
| Solution content |
|---|
package ivory.core.tokenize; import ivory.core.Constants; import java.io.IOException; import java.util.Set; |
| File |
|---|
| OpenNLPTokenizer.java |
| Developer's decision |
|---|
| Version 1 |
| Kind of conflict |
|---|
| Blank |
| Chunk |
|---|
| Conflicting content |
|---|
public String[] processContent(String text) {
String[] tokens = null;
try {
<<<<<<< HEAD
text = postNormalize(preNormalize(text).toLowerCase()); // normalization for non-Chinese characters
=======
text = text.toLowerCase(); // for non-Chinese characters
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
tokens = classifier.classifyStringAndReturnAnswers(text, readerWriter);
} catch (IOException e) {
LOG.info("Problem in tokenizing Chinese"); |
| Solution content |
|---|
public String[] processContent(String text) {
String[] tokens = null;
try {
text = postNormalize(preNormalize(text).toLowerCase()); // normalization for non-Chinese characters
tokens = classifier.classifyStringAndReturnAnswers(text, readerWriter);
} catch (IOException e) {
LOG.info("Problem in tokenizing Chinese"); |
| File |
|---|
| StanfordChineseTokenizer.java |
| Developer's decision |
|---|
| Version 1 |
| Kind of conflict |
|---|
| Comment |
| Method invocation |
| Variable |
| Chunk |
|---|
| Conflicting content |
|---|
} else {
StringBuilder finalTokenized = new StringBuilder();
for (String token : tokens) {
<<<<<<< HEAD
if ( vocab.get(token) <= 0) { continue; }
finalTokenized.append( token + " " );
=======
if (vocab.get(token) <= 0) {
continue;
}
tokenized += (token + " ");
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
}
return finalTokenized.toString().trim().split("\\s+");
} |
| Solution content |
|---|
} else {
StringBuilder finalTokenized = new StringBuilder();
for (String token : tokens) {
if ( vocab.get(token) <= 0) { continue; }
finalTokenized.append( token + " " );
}
return finalTokenized.toString().trim().split("\\s+");
} |
| File |
|---|
| StanfordChineseTokenizer.java |
| Developer's decision |
|---|
| Version 1 |
| Kind of conflict |
|---|
| If statement |
| Method invocation |
| Variable |
| Chunk |
|---|
| Conflicting content |
|---|
}
@Override
<<<<<<< HEAD
=======
public int getNumberTokens(String text) {
return processContent(text).length;
}
@Override
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
public String removeBorderStopWords(String tokenizedText) {
return tokenizedText;
} |
| Solution content |
|---|
}
@Override
public String removeBorderStopWords(String tokenizedText) {
return tokenizedText;
} |
| File |
|---|
| StanfordChineseTokenizer.java |
| Developer's decision |
|---|
| Version 1 |
| Kind of conflict |
|---|
| Annotation |
| Method declaration |
| Chunk |
|---|
| Conflicting content |
|---|
for (String token : tokens) {
s += token+" ";
}
<<<<<<< HEAD
out.write(s.trim() + "\n");
=======
out.write(s+"\n");
in.close();
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
}
out.close();
|
| Solution content |
|---|
for (String token : tokens) {
s += token+" ";
}
out.write(s.trim() + "\n");
}
in.close();
out.close();
|
| File |
|---|
| Tokenizer.java |
| Developer's decision |
|---|
| Manual |
| Kind of conflict |
|---|
| Method invocation |
| Chunk |
|---|
| Conflicting content |
|---|
}
}
<<<<<<< HEAD
@SuppressWarnings("unchecked")
public static Class getTokenizerClass(String lang) {
=======
public static Class extends Tokenizer> getTokenizerClass(String lang) {
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
if (lang.equals("zh")) {
return StanfordChineseTokenizer.class;
}else if(lang.equals("de") || lang.equals("en") || lang.equals("fr")) { |
| Solution content |
|---|
}
}
public static Class extends Tokenizer> getTokenizerClass(String lang) {
if (lang.equals("zh")) {
return StanfordChineseTokenizer.class;
}else if(lang.equals("de") || lang.equals("en") || lang.equals("fr")) { |
| File |
|---|
| TokenizerFactory.java |
| Developer's decision |
|---|
| Version 2 |
| Kind of conflict |
|---|
| Annotation |
| Method signature |