Projects >> Ivory >>a547b5aed740b80037764f0a27138f568fe8c994

Chunk
Conflicting content
    return tokens.toArray(tokensArr); 
  }

<<<<<<< HEAD
=======
  public static void main(String[] args) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException{
    if(args.length < 2){
      System.err.println("usage: [input] [output-file]");
      System.exit(-1);
    }
    Tokenizer tokenizer = new BigramChineseTokenizer();
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF8"));
    BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF8"));

    String line = null;
    while((line = in.readLine()) != null){
      String[] tokens = tokenizer.processContent(line);
      String s = "";
      for (String token : tokens) {
        s += token+" ";
      }
      out.write(s+"\n");
    }
    out.close();
    in.close();
  }

>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
  @Override
  public String removeBorderStopWords(String tokenizedText) {
    return tokenizedText;
Solution content
    return tokens.toArray(tokensArr); 
  }

  @Override
  public String removeBorderStopWords(String tokenizedText) {
    return tokenizedText;
File
BigramChineseTokenizer.java
Developer's decision
Version 1
Kind of conflict
Method declaration
Chunk
Conflicting content
package ivory.core.tokenize;

import ivory.core.Constants;
<<<<<<< HEAD
=======

>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
import java.io.IOException;
import java.util.Set;
Solution content
package ivory.core.tokenize;

import ivory.core.Constants;
import java.io.IOException;
import java.util.Set;
File
OpenNLPTokenizer.java
Developer's decision
Version 1
Kind of conflict
Blank
Chunk
Conflicting content
  public String[] processContent(String text) {
    String[] tokens = null;
    try {
<<<<<<< HEAD
      text = postNormalize(preNormalize(text).toLowerCase());      // normalization for non-Chinese characters
=======
      text = text.toLowerCase(); // for non-Chinese characters
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
      tokens = classifier.classifyStringAndReturnAnswers(text, readerWriter);
    } catch (IOException e) {
      LOG.info("Problem in tokenizing Chinese");
Solution content
  public String[] processContent(String text) {
    String[] tokens = null;
    try {
      text = postNormalize(preNormalize(text).toLowerCase());      // normalization for non-Chinese characters
      tokens = classifier.classifyStringAndReturnAnswers(text, readerWriter);
    } catch (IOException e) {
      LOG.info("Problem in tokenizing Chinese");
File
StanfordChineseTokenizer.java
Developer's decision
Version 1
Kind of conflict
Comment
Method invocation
Variable
Chunk
Conflicting content
    } else {
      StringBuilder finalTokenized = new StringBuilder();
      for (String token : tokens) {
<<<<<<< HEAD
        if ( vocab.get(token) <= 0) { continue; }
        finalTokenized.append( token + " " );
=======
        if (vocab.get(token) <= 0) {
          continue;
        }
        tokenized += (token + " ");
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
      }
      return finalTokenized.toString().trim().split("\\s+");
    }
Solution content
    } else {
      StringBuilder finalTokenized = new StringBuilder();
      for (String token : tokens) {
        if ( vocab.get(token) <= 0) { continue; }
        finalTokenized.append( token + " " );
      }
      return finalTokenized.toString().trim().split("\\s+");
    }
File
StanfordChineseTokenizer.java
Developer's decision
Version 1
Kind of conflict
If statement
Method invocation
Variable
Chunk
Conflicting content
  }

  @Override
<<<<<<< HEAD
=======
  public int getNumberTokens(String text) {
    return processContent(text).length;
  }

  @Override
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
  public String removeBorderStopWords(String tokenizedText) {
    return tokenizedText;
  }
Solution content
  }

  @Override
  public String removeBorderStopWords(String tokenizedText) {
    return tokenizedText;
  }
File
StanfordChineseTokenizer.java
Developer's decision
Version 1
Kind of conflict
Annotation
Method declaration
Chunk
Conflicting content
        for (String token : tokens) {
          s += token+" ";
        }
<<<<<<< HEAD
        out.write(s.trim() + "\n");
=======
        out.write(s+"\n");
        in.close();
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
      }
      out.close();
Solution content
        for (String token : tokens) {
          s += token+" ";
        }
        out.write(s.trim() + "\n");
      }
      in.close();
      out.close();
File
Tokenizer.java
Developer's decision
Manual
Kind of conflict
Method invocation
Chunk
Conflicting content
    }
  }

<<<<<<< HEAD
  @SuppressWarnings("unchecked")
  public static Class getTokenizerClass(String lang) {
=======
  public static Class getTokenizerClass(String lang) {
>>>>>>> b597e2d86cb71b1217a08f91a6e63f2f42a804b9
    if (lang.equals("zh")) {
      return StanfordChineseTokenizer.class;
    }else if(lang.equals("de") || lang.equals("en") || lang.equals("fr")) {
Solution content
    }
  }

  public static Class getTokenizerClass(String lang) {
    if (lang.equals("zh")) {
      return StanfordChineseTokenizer.class;
    }else if(lang.equals("de") || lang.equals("en") || lang.equals("fr")) {
File
TokenizerFactory.java
Developer's decision
Version 2
Kind of conflict
Annotation
Method signature