Projects >> thrax >>5b0839503d875434b97e04efe8affa1124f8c110

Chunk
Conflicting content
package edu.jhu.thrax.util;

<<<<<<< HEAD
import java.io.IOException;
=======
import java.util.Scanner;
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
Solution content
package edu.jhu.thrax.util;

import java.util.Scanner;
import java.util.HashSet;
import java.util.Map;
File
CreateGlueGrammar.java
Developer's decision
None
Kind of conflict
Import
Chunk
Conflicting content
import org.apache.hadoop.io.Text;

<<<<<<< HEAD
public class CreateGlueGrammar {
  private static HashSet nts;

  // private static TreeMap unaryFeatures;
  // private static TreeMap binaryFeatures;

  // [GOAL] |||  |||  ||| 0
  // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
  // [GOAL] ||| [GOAL,1]  ||| [GOAL,1]  ||| 0
  private static final String RULE_START = "[%1$s] |||  |||  ||| 0";
  private static final String RULE_TWO =
      "[%1$s] ||| [%1$s,1] [%2$s,2] ||| [%1$s,1] [%2$s,2] ||| -1";
  private static final String RULE_END = "[%1$s] ||| [%1$s,1]  ||| [%1$s,1]  ||| 0";
  private static String GOAL = "GOAL";

  // private static boolean LABEL = false;
  // private static String [] FEATURES;
=======
public class CreateGlueGrammar
{
    private static HashSet nts;

    // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
    // [GOAL] ||| [GOAL,1]  ||| [GOAL,1]  ||| 0
    // [GOAL] |||  |||  ||| 0
    private static final String RULE_START = "[%1$s] |||  |||  ||| 0";
    private static final String RULE_TWO = "[%1$s] ||| [%1$s,1] [%2$s,2] ||| [%1$s,1] [%2$s,2] ||| -1";
    private static final String RULE_END = "[%1$s] ||| [%1$s,1]  ||| [%1$s,1]  ||| 0";

    // [GOAL] |||  [X,1]  |||  [X,1]  ||| 0
    private static final String RULE_TOP = "[%1$s] |||  [%2$s,1]  |||  [%2$s,1]  ||| 0";

    private static String GOAL = "GOAL";
    private static boolean LABEL = false;
    private static String [] FEATURES;
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

  public static void main(String[] argv) throws IOException {
    if (argv.length > 0) {
Solution content
import java.io.IOException;

public class CreateGlueGrammar
{
    private static HashSet nts;

    // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
    // [GOAL] ||| [GOAL,1]  ||| [GOAL,1]  ||| 0
    // [GOAL] |||  |||  ||| 0
    private static final String RULE_START = "[%1$s] |||  |||  ||| 0";
    private static final String RULE_TWO = "[%1$s] ||| [%1$s,1] [%2$s,2] ||| [%1$s,1] [%2$s,2] ||| -1";
    private static final String RULE_END = "[%1$s] ||| [%1$s,1]  ||| [%1$s,1]  ||| 0";

    // [GOAL] |||  [X,1]  |||  [X,1]  ||| 0
    private static final String RULE_TOP = "[%1$s] |||  [%2$s,1]  |||  [%2$s,1]  ||| 0";

    private static String GOAL = "GOAL";
    private static boolean LABEL = false;
    private static String [] FEATURES;
File
CreateGlueGrammar.java
Developer's decision
Version 2
Kind of conflict
Attribute
Class signature
Comment
Chunk
Conflicting content
      if (opts.containsKey("goal-symbol")) GOAL = opts.get("goal-symbol");
    }

<<<<<<< HEAD
    Scanner scanner = new Scanner(System.in, "UTF-8");
    nts = new HashSet();
    while (scanner.hasNextLine()) {
      String line = scanner.nextLine();
      int lhsStart = line.indexOf("[") + 1;
      int lhsEnd = line.indexOf("]");
      if (lhsStart < 1 || lhsEnd < 0) {
        System.err.printf("malformed rule: %s\n", line);
        continue;
      }
      String lhs = line.substring(lhsStart, lhsEnd);
      nts.add(lhs);
    }
    scanner.close();
    System.out.println(String.format(RULE_START, GOAL));
    for (String nt : nts) {
      Text n = new Text(nt);
      System.out.println(String.format(RULE_TWO, GOAL, n));
    }
    System.out.println(String.format(RULE_END, GOAL));
  }

=======
        Scanner scanner = new Scanner(System.in, "UTF-8");
        nts = new HashSet();
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            int lhsStart = line.indexOf("[") + 1;
            int lhsEnd = line.indexOf("]");
            if (lhsStart < 1 || lhsEnd < 0) {
                System.err.printf("malformed rule: %s\n", line);
                continue;
            }
            String lhs = line.substring(lhsStart, lhsEnd);
            nts.add(lhs);
        }
        System.out.println(String.format(RULE_START, GOAL));
        for (String nt : nts)
          System.out.println(String.format(RULE_TWO, GOAL, nt));
        System.out.println(String.format(RULE_END, GOAL));
        for (String nt: nts) 
          System.out.println(String.format(RULE_TOP, GOAL, nt));
    }
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
}
Solution content
        Scanner scanner = new Scanner(System.in, "UTF-8");
        nts = new HashSet();
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            int lhsStart = line.indexOf("[") + 1;
            int lhsEnd = line.indexOf("]");
            if (lhsStart < 1 || lhsEnd < 0) {
                System.err.printf("malformed rule: %s\n", line);
                continue;
            }
            String lhs = line.substring(lhsStart, lhsEnd);
            nts.add(lhs);
        }
        System.out.println(String.format(RULE_START, GOAL));
        for (String nt : nts)
          System.out.println(String.format(RULE_TWO, GOAL, nt));
        System.out.println(String.format(RULE_END, GOAL));
        for (String nt: nts) 
          System.out.println(String.format(RULE_TOP, GOAL, nt));
    }
}
File
CreateGlueGrammar.java
Developer's decision
Version 2
Kind of conflict
Attribute
For statement
Method invocation
Variable
While statement
Chunk
Conflicting content
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

<<<<<<< HEAD
public class TestSetFilter {
  private List testSentences;
  private Map> sentencesByWord;
=======
import java.io.PrintWriter;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.File;

import edu.jhu.thrax.ThraxConfig;

import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;

public class TestSetFilter
{
  private List testSentences;
  private Map> sentencesByWord;
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
  private Set ngrams;

  // for caching of accepted rules
Solution content
import java.util.zip.GZIPOutputStream;
import java.util.zip.GZIPInputStream;

import java.io.PrintWriter;
import java.io.FileOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.File;

import edu.jhu.thrax.ThraxConfig;

import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;

public class TestSetFilter
{
  private List testSentences;
  private Map> sentencesByWord;
  private Set ngrams;

  // for caching of accepted rules
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Attribute
Class signature
Import
Chunk
Conflicting content
  public TestSetFilter() {
    testSentences = new ArrayList();
<<<<<<< HEAD
    sentencesByWord = new HashMap>();
    acceptedLastSourceSide = false;
    lastSourceSide = null;
  }
=======
    sentencesByWord = new HashMap>();
    acceptedLastSourceSide = false;
    lastSourceSide = null;
  }    
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

  public void setVerbose(boolean value) {
    verbose = value;
Solution content
  public TestSetFilter() {
    testSentences = new ArrayList();
    sentencesByWord = new HashMap>();
    acceptedLastSourceSide = false;
    lastSourceSide = null;
  }    

  public void setVerbose(boolean value) {
    verbose = value;
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
Attribute
Method invocation
Chunk
Conflicting content
  public void setParallel(boolean value) {
    parallel = value;
<<<<<<< HEAD
  }
=======
  } 
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

  public void setFast(boolean value) {
    fast = value;
Solution content
  public void setParallel(boolean value) {
    parallel = value;
  } 

  public void setFast(boolean value) {
    fast = value;
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
Other
Chunk
Conflicting content
    RULE_LENGTH = value;
  }

<<<<<<< HEAD
  private void getTestSentences(String filename) {
=======
  private void getTestSentences(String filename)
  {
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    try {
      Scanner scanner = new Scanner(new File(filename), "UTF-8");
      while (scanner.hasNextLine()) {
Solution content
    RULE_LENGTH = value;
  }

  private void getTestSentences(String filename)
  {
    try {
      Scanner scanner = new Scanner(new File(filename), "UTF-8");
      while (scanner.hasNextLine()) {
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
Method signature
Chunk
Conflicting content
    }
        addSentenceToWordHash(sentencesByWord, line, testSentences.size());
        testSentences.add(line);
      }
<<<<<<< HEAD
      scanner.close();
    } catch (FileNotFoundException e) {
      System.err.printf("Could not open %s\n", e.getMessage());
    }

    if (verbose) System.err.println("Added " + testSentences.size() + " sentences.\n");
=======
    }
    catch (FileNotFoundException e) {
      System.err.printf("Could not open %s\n", e.getMessage());

    if (verbose) 
      System.err.println("Added " + testSentences.size() + " sentences.\n");
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

    ngrams = getTestNGrams(testSentences);
  }
Solution content
        addSentenceToWordHash(sentencesByWord, line, testSentences.size());
        testSentences.add(line);
      }
    }
    catch (FileNotFoundException e) {
      System.err.printf("Could not open %s\n", e.getMessage());
    }

    if (verbose) 
      System.err.println("Added " + testSentences.size() + " sentences.\n");

    ngrams = getTestNGrams(testSentences);
  }
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Catch clause
If statement
Method invocation
Chunk
Conflicting content
    ngrams = getTestNGrams(testSentences);
  }

<<<<<<< HEAD
  /**
   * setSentence()
   * 
   * Sets a single sentence against which the grammar is filtered. Used in filtering the grammar on
   * the fly at runtime.
   */
  public void setSentence(String sentence) {
    if (testSentences == null) testSentences = new ArrayList();

    if (sentencesByWord == null) sentencesByWord = new HashMap>();
=======
  /** setSentence()
   *
   * Sets a single sentence against which the grammar is filtered.
   * Used in filtering the grammar on the fly at runtime.
   */
  public void setSentence(String sentence) {
    if (testSentences == null)
      testSentences = new ArrayList();

    if (sentencesByWord == null)
      sentencesByWord = new HashMap>();
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

    // reset the list of sentences and the hash mapping words to
    // sets of sentences they appear in
Solution content
    ngrams = getTestNGrams(testSentences);
  }

  /** setSentence()
   *
   * Sets a single sentence against which the grammar is filtered.
   * Used in filtering the grammar on the fly at runtime.
   */
  public void setSentence(String sentence) {
    if (testSentences == null)
      testSentences = new ArrayList();

    if (sentencesByWord == null)
      sentencesByWord = new HashMap>();

    // reset the list of sentences and the hash mapping words to
    // sets of sentences they appear in
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Comment
If statement
Method signature
Chunk
Conflicting content
    ngrams = getTestNGrams(testSentences);
  }

<<<<<<< HEAD
  /**
   * filterGrammarToFile
   * 
   * Filters a large grammar against a single sentence, and writes the resulting grammar to a file.
   * The input grammar is assumed to be compressed, and the output file is also compressed.
   */
  public void filterGrammarToFile(String fullGrammarFile, String sentence,
      String filteredGrammarFile, boolean fast) {

    System.err.println(String.format("filterGrammarToFile(%s,%s,%s,%s)\n", fullGrammarFile,
        sentence, filteredGrammarFile, (fast ? "fast" : "exact")));

    this.fast = fast;
    setSentence(sentence);

    try {
      Scanner scanner =
          new Scanner(new GZIPInputStream(new FileInputStream(fullGrammarFile)), "UTF-8");
      int rulesIn = 0;
      int rulesOut = 0;
      boolean verbose = false;
      if (verbose) System.err.println("Processing rules...");

      PrintWriter out =
          new PrintWriter(new GZIPOutputStream(new FileOutputStream(filteredGrammarFile)));
      // byte newline[] = "\n".getBytes("UTF-8");
=======
  /** filterGrammarToFile
   *
   * Filters a large grammar against a single sentence, and writes
   * the resulting grammar to a file.  The input grammar is assumed
   * to be compressed, and the output file is also compressed.
   */
  public void filterGrammarToFile(String fullGrammarFile,
    String sentence,
    String filteredGrammarFile,
    boolean fast) {
    
    System.err.println(String.format("filterGrammarToFile(%s,%s,%s,%s)\n",
        fullGrammarFile,
        sentence,
        filteredGrammarFile,
        (fast ? "fast" : "exact")));

    fast = fast;
    setSentence(sentence);

    try {
      Scanner scanner = new Scanner(new GZIPInputStream(new FileInputStream(fullGrammarFile)), "UTF-8");
      int rulesIn = 0;
      int rulesOut = 0;
      boolean verbose = false;
      if (verbose)
        System.err.println("Processing rules...");

      PrintWriter out = new PrintWriter(new GZIPOutputStream(new FileOutputStream(filteredGrammarFile)));
      byte newline[] = "\n".getBytes("UTF-8");
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a

      // iterate over all lines in the grammar
      while (scanner.hasNextLine()) {
Solution content
    ngrams = getTestNGrams(testSentences);
  }

  /** filterGrammarToFile
   *
   * Filters a large grammar against a single sentence, and writes
   * the resulting grammar to a file.  The input grammar is assumed
   * to be compressed, and the output file is also compressed.
   */
  public void filterGrammarToFile(String fullGrammarFile,
    String sentence,
    String filteredGrammarFile,
    boolean fast) {
    
    System.err.println(String.format("filterGrammarToFile(%s,%s,%s,%s)\n",
        fullGrammarFile,
        sentence,
        filteredGrammarFile,
        (fast ? "fast" : "exact")));

    fast = fast;
    setSentence(sentence);

    try {
      Scanner scanner = new Scanner(new GZIPInputStream(new FileInputStream(fullGrammarFile)), "UTF-8");
      int rulesIn = 0;
      int rulesOut = 0;
      boolean verbose = false;
      if (verbose)
        System.err.println("Processing rules...");

      PrintWriter out = new PrintWriter(new GZIPOutputStream(new FileOutputStream(filteredGrammarFile)));
      byte newline[] = "\n".getBytes("UTF-8");

      // iterate over all lines in the grammar
      while (scanner.hasNextLine()) {
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Attribute
Comment
If statement
Method invocation
Method signature
Try statement
Variable
Chunk
Conflicting content
      // iterate over all lines in the grammar
      while (scanner.hasNextLine()) {
        if (verbose) {
<<<<<<< HEAD
          if ((rulesIn + 1) % 2000 == 0) {
            System.err.print(".");
            System.err.flush();
          }
          if ((rulesIn + 1) % 100000 == 0) {
            System.err.println(" [" + (rulesIn + 1) + "]");
=======
          if ((rulesIn+1) % 2000 == 0) {
            System.err.print(".");
            System.err.flush();
          }
          if ((rulesIn+1) % 100000 == 0) {
            System.err.println(" [" + (rulesIn+1) + "]");
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
            System.err.flush();
          }
        }
Solution content
      // iterate over all lines in the grammar
      while (scanner.hasNextLine()) {
        if (verbose) {
          if ((rulesIn+1) % 2000 == 0) {
            System.err.print(".");
            System.err.flush();
          }
          if ((rulesIn+1) % 100000 == 0) {
            System.err.println(" [" + (rulesIn+1) + "]");
            System.err.flush();
          }
        }
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
If statement
Method invocation
Chunk
Conflicting content
          rulesOut++;
        }
      }
<<<<<<< HEAD
      scanner.close();
=======

>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
      out.close();

      if (verbose) {
Solution content
          rulesOut++;
        }
      }

      out.close();

      if (verbose) {
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Chunk
Conflicting content
    }
  }

<<<<<<< HEAD
  public Pattern getPattern(String rule) {
    String[] parts = FormatUtils.P_DELIM.split(rule);
=======
  public Pattern getPattern(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    if (parts.length != 4) {
      return null;
    }
Solution content
    }
  }

  public Pattern getPattern(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4) {
      return null;
    }
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Method invocation
Method signature
Variable
Chunk
Conflicting content
  /**
   * Top-level filter, responsible for calling the fast or exact version.
   */
<<<<<<< HEAD
  public boolean inTestSet(String rule) {
    String[] parts = FormatUtils.P_DELIM.split(rule);
    if (parts.length != 4) return false;

    String sourceSide = parts[1].trim();
    if (!sourceSide.equals(lastSourceSide)) {
      lastSourceSide = sourceSide;
      acceptedLastSourceSide = fast ? inTestSetFast(rule) : inTestSetExact(rule);
=======
  public boolean inTestSet(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return false;

    String sourceSide = parts[1].trim();
    if (! sourceSide.equals(lastSourceSide)) {
      lastSourceSide = sourceSide;
      acceptedLastSourceSide = fast 
        ? inTestSetFast(rule) 
        : inTestSetExact(rule);
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    } else {
      cached++;
    }
Solution content
  /**
   * Top-level filter, responsible for calling the fast or exact version.
   */
  public boolean inTestSet(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return false;

    String sourceSide = parts[1].trim();
    if (! sourceSide.equals(lastSourceSide)) {
      lastSourceSide = sourceSide;
      acceptedLastSourceSide = fast 
        ? inTestSetFast(rule) 
        : inTestSetExact(rule);
    } else {
      cached++;
    }
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Attribute
If statement
Method invocation
Method signature
Variable
Chunk
Conflicting content
    return acceptedLastSourceSide;
  }

<<<<<<< HEAD
    return acceptedLastSourceSide;
  }



  private boolean inTestSetFast(String rule) {

    String[] parts = FormatUtils.P_DELIM.split(rule);
=======


  private boolean inTestSetFast(String rule) {

    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    String source = parts[1];

    for (String chunk : source.split(NT_REGEX)) {
Solution content
    return acceptedLastSourceSide;
  }



  private boolean inTestSetFast(String rule) {

    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    String source = parts[1];

    for (String chunk : source.split(NT_REGEX)) {
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Attribute
Method invocation
Method signature
Return statement
Variable
Chunk
Conflicting content
    for (String chunk : source.split(NT_REGEX)) {
      chunk = chunk.trim();
      /* Important: you need to make sure the string isn't empty. */
<<<<<<< HEAD
      if (!chunk.equals("") && !ngrams.contains(chunk)) return false;
=======
      if (! chunk.equals("") && ! ngrams.contains(chunk))
        return false;
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    }
    return true;
  }
Solution content
    for (String chunk : source.split(NT_REGEX)) {
      chunk = chunk.trim();
      /* Important: you need to make sure the string isn't empty. */
      if (! chunk.equals("") && ! ngrams.contains(chunk))
        return false;
    }
    return true;
  }
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
If statement
Chunk
Conflicting content
  }

  private boolean inTestSetExact(String rule) {
<<<<<<< HEAD
    Pattern pattern = getPattern(rule);
    for (int i : getSentencesForRule(sentencesByWord, rule)) {
      if (pattern.matcher(testSentences.get(i)).find()) {
        return true;
      }
    }
    return hasAbstractSource(rule) > 1;
  }

  private static void addSentenceToWordHash(Map> sentencesByWord,
      String sentence, int index) {
    String[] tokens = FormatUtils.P_SPACE.split(sentence);
    for (String t : tokens) {
      if (sentencesByWord.containsKey(t))
        sentencesByWord.get(t).add(index);
      else {
        Set set = new HashSet();
        set.add(index);
        sentencesByWord.put(t, set);
      }
    }
  }

  private Set getSentencesForRule(Map> sentencesByWord, String rule) {
    String[] parts = FormatUtils.P_DELIM.split(rule);
    if (parts.length != 4) return Collections.emptySet();
    String source = parts[1].trim();
    List> list = new ArrayList>();
    for (String t : FormatUtils.P_SPACE.split(source)) {
      if (t.matches(NT_REGEX)) continue;
      if (sentencesByWord.containsKey(t))
        list.add(sentencesByWord.get(t));
      else
        return Collections.emptySet();
    }
    return intersect(list);
  }

  /**
   * Determines whether a rule is an abstract rule. An abstract rule is one that has no terminals on
   * its source side.
   * 
   * If the rule is abstract, the rule's arity is returned. Otherwise, 0 is returned.
   */
  private int hasAbstractSource(String rule) {
    String[] parts = FormatUtils.P_DELIM.split(rule);
    if (parts.length != 4) return 0;
    String source = parts[1].trim();
    int nonterminalCount = 0;
    for (String t : FormatUtils.P_SPACE.split(source)) {
      if (!t.matches(NT_REGEX)) return 0;
      nonterminalCount++;
    }
    return nonterminalCount;
  }

  private static  Set intersect(List> list) {
    if (list.isEmpty()) return Collections.emptySet();
    Set result = new HashSet(list.get(0));
    for (int i = 1; i < list.size(); i++) {
      result.retainAll(list.get(i));
      if (result.isEmpty()) return Collections.emptySet();
    }
    if (result.isEmpty()) return Collections.emptySet();
    return result;
  }

  private Set getTestNGrams(List sentences) {
    if (sentences.isEmpty()) return Collections.emptySet();
=======
    if (inTestSetFast(rule)) {
      Pattern pattern = getPattern(rule);
      for (int i : getSentencesForRule(sentencesByWord, rule)) {
        if (pattern.matcher(testSentences.get(i)).find()) {
          return true;
        }
      }
      return hasAbstractSource(rule) > 1;
    } 
    return false;
  }

  private void addSentenceToWordHash(Map> sentencesByWord, String sentence, int index)
  {
    String [] tokens = sentence.split("\\s+");
    for (String t : tokens) {
      if (sentencesByWord.containsKey(t))
        sentencesByWord.get(t).add(index);
      else {
        Set set = new HashSet();
        set.add(index);
        sentencesByWord.put(t, set);
      }
    }
  }

  private Set getSentencesForRule(Map> sentencesByWord, String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return Collections.emptySet();
    String source = parts[1].trim();
    List> list = new ArrayList>();
    for (String t : source.split("\\s+")) {
      if (t.matches(NT_REGEX))
        continue;
      if (sentencesByWord.containsKey(t))
        list.add(sentencesByWord.get(t));
      else
        return Collections.emptySet();
    }
    return intersect(list);
  }

  /**
   * Determines whether a rule is an abstract rule.  An abstract
   * rule is one that has no terminals on its source side.
   *
   * If the rule is abstract, the rule's arity is returned.
   * Otherwise, 0 is returned.
   */
  private int hasAbstractSource(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return 0;
    String source = parts[1].trim();
    int nonterminalCount = 0;
    for (String t : source.split("\\s+")) {
      if (!t.matches(NT_REGEX))
        return 0;
      nonterminalCount++;
    }
    return nonterminalCount;
  }

  private  Set intersect(List> list)
  {
    if (list.isEmpty())
      return Collections.emptySet();
    Set result = new HashSet(list.get(0));
    for (int i = 1; i < list.size(); i++) {
      result.retainAll(list.get(i));
      if (result.isEmpty())
        return Collections.emptySet();
    }
    if (result.isEmpty())
      return Collections.emptySet();
    return result;
  }

  private Set getTestNGrams(List sentences)
  {
    if (sentences.isEmpty())
      return Collections.emptySet();
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    Set result = new HashSet();
    for (String s : sentences)
      result.addAll(getNGramsUpToLength(RULE_LENGTH, s));
Solution content
      Pattern pattern = getPattern(rule);
  }

  private boolean inTestSetExact(String rule) {
    if (inTestSetFast(rule)) {
      for (int i : getSentencesForRule(sentencesByWord, rule)) {
        if (pattern.matcher(testSentences.get(i)).find()) {
          return true;
        }
      }
      return hasAbstractSource(rule) > 1;
    } 
    return false;
  }

  private void addSentenceToWordHash(Map> sentencesByWord, String sentence, int index)
  {
    String [] tokens = sentence.split("\\s+");
    for (String t : tokens) {
      if (sentencesByWord.containsKey(t))
        sentencesByWord.get(t).add(index);
      else {
        Set set = new HashSet();
        set.add(index);
        sentencesByWord.put(t, set);
      }
    }
  }

  private Set getSentencesForRule(Map> sentencesByWord, String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return Collections.emptySet();
    String source = parts[1].trim();
    List> list = new ArrayList>();
    for (String t : source.split("\\s+")) {
      if (t.matches(NT_REGEX))
        continue;
      if (sentencesByWord.containsKey(t))
        list.add(sentencesByWord.get(t));
      else
        return Collections.emptySet();
    }
    return intersect(list);
  }

  /**
   * Determines whether a rule is an abstract rule.  An abstract
   * rule is one that has no terminals on its source side.
   *
   * If the rule is abstract, the rule's arity is returned.
   * Otherwise, 0 is returned.
   */
  private int hasAbstractSource(String rule)
  {
    String [] parts = rule.split(ThraxConfig.DELIMITER_REGEX);
    if (parts.length != 4)
      return 0;
    String source = parts[1].trim();
    int nonterminalCount = 0;
    for (String t : source.split("\\s+")) {
      if (!t.matches(NT_REGEX))
        return 0;
      nonterminalCount++;
    }
    return nonterminalCount;
  }

  private  Set intersect(List> list)
  {
    if (list.isEmpty())
      return Collections.emptySet();
    Set result = new HashSet(list.get(0));
    for (int i = 1; i < list.size(); i++) {
      result.retainAll(list.get(i));
      if (result.isEmpty())
        return Collections.emptySet();
    }
    if (result.isEmpty())
      return Collections.emptySet();
    return result;
  }

  private Set getTestNGrams(List sentences)
  {
    if (sentences.isEmpty())
      return Collections.emptySet();
    Set result = new HashSet();
    for (String s : sentences)
      result.addAll(getNGramsUpToLength(RULE_LENGTH, s));
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Comment
For statement
If statement
Method declaration
Method invocation
Method signature
Return statement
Variable
Chunk
Conflicting content
    return result;
  }

<<<<<<< HEAD
  private static Set getNGramsUpToLength(int length, String sentence) {
    if (length < 1) return Collections.emptySet();
    String[] tokens = FormatUtils.P_SPACE.split(sentence.trim());
=======
  private Set getNGramsUpToLength(int length, String sentence)
  {
    if (length < 1)
      return Collections.emptySet();
    String [] tokens = sentence.trim().split("\\s+");
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    int maxOrder = length < tokens.length ? length : tokens.length;
    Set result = new HashSet();
    for (int order = 1; order <= maxOrder; order++) {
Solution content
    return result;
  }

  private Set getNGramsUpToLength(int length, String sentence)
  {
    if (length < 1)
      return Collections.emptySet();
    String [] tokens = sentence.trim().split("\\s+");
    int maxOrder = length < tokens.length ? length : tokens.length;
    Set result = new HashSet();
    for (int order = 1; order <= maxOrder; order++) {
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
If statement
Method invocation
Method signature
Variable
Chunk
Conflicting content
    for (int order = 1; order <= maxOrder; order++) {
      for (int start = 0; start < tokens.length - order + 1; start++)
        result.add(createNGram(tokens, start, order));
<<<<<<< HEAD
    }
    return result;
  }

  private static String createNGram(String[] tokens, int start, int order) {
    if (order < 1 || start + order > tokens.length) {
      return "";
    }
    String result = tokens[start];
    for (int i = 1; i < order; i++)
      result += " " + tokens[start + i];
    return result;
  }

  public static void main(String[] argv) {
    // do some setup
    if (argv.length < 1) {
      System.err.println("usage: TestSetFilter [-v|-p|-f|-n N]  [test set2 ...]");
      System.err.println("    -v    verbose output");
      System.err.println("    -p    parallel compatibility");
      System.err.println("    -f    fast mode");
      System.err.println("    -n    max n-gram to compare to (default 12)");
      return;
=======
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    }
    return result;
  }
Solution content
    for (int order = 1; order <= maxOrder; order++) {
      for (int start = 0; start < tokens.length - order + 1; start++)
        result.add(createNGram(tokens, start, order));
    }
    return result;
  }
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Comment
If statement
Method declaration
Method invocation
Method signature
Return statement
Variable
Chunk
Conflicting content
    return result;
  }

<<<<<<< HEAD
    // int sentenceNumber = -1;
    TestSetFilter filter = new TestSetFilter();

    for (int i = 0; i < argv.length; i++) {
      if (argv[i].equals("-v")) {
        filter.setVerbose(true);
        continue;
      } else if (argv[i].equals("-p")) {
        filter.setParallel(true);
        continue;
      } else if (argv[i].equals("-f")) {
        filter.setFast(true);
        continue;
      } else if (argv[i].equals("-n")) {
        filter.setRuleLength(Integer.parseInt(argv[i + 1]));
        i++;
        continue;
      }
      filter.getTestSentences(argv[i]);
    }

=======
  private String createNGram(String [] tokens, int start, int order)
  {
    if (order < 1 || start + order > tokens.length) {
      return "";
    }
    String result = tokens[start];
    for (int i = 1; i < order; i++)
      result += " " + tokens[start + i];
    return result;
  }

  public static void main(String [] argv)
  {
    // do some setup
    if (argv.length < 1) {
      System.err.println("usage: TestSetFilter [-v|-p|-f|-n N]  [test set2 ...]");
      System.err.println("    -v    verbose output");
      System.err.println("    -p    parallel compatibility");
      System.err.println("    -f    fast mode");
      System.err.println("    -n    max n-gram to compare to (default 12)");
      return;
    }

    int sentenceNumber = -1;
    TestSetFilter filter = new TestSetFilter();

    for (int i = 0; i < argv.length; i++) {
      if (argv[i].equals("-v")) {
        filter.setVerbose(true);
        continue;
      }
      else if (argv[i].equals("-p")) {
        filter.setParallel(true);
        continue;
      }
      else if (argv[i].equals("-f")) {
        filter.setFast(true);
        continue;
      }
      else if (argv[i].equals("-n")) {
        filter.setRuleLength(Integer.parseInt(argv[i+1]));
        i++;
        continue;
      }
      filter.getTestSentences(argv[i]);
    }

>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    Scanner scanner = new Scanner(System.in, "UTF-8");
    int rulesIn = 0;
    int rulesOut = 0;
Solution content
    return result;
  }
  private String createNGram(String [] tokens, int start, int order)
  {
    if (order < 1 || start + order > tokens.length) {
      return "";
    }
    String result = tokens[start];
    for (int i = 1; i < order; i++)
      result += " " + tokens[start + i];
    return result;
  }

  public static void main(String [] argv)
  {
    // do some setup
    if (argv.length < 1) {
      System.err.println("usage: TestSetFilter [-v|-p|-f|-n N]  [test set2 ...]");
      System.err.println("    -v    verbose output");
      System.err.println("    -p    parallel compatibility");
      System.err.println("    -f    fast mode");
      System.err.println("    -n    max n-gram to compare to (default 12)");
      return;
    }

    int sentenceNumber = -1;
    TestSetFilter filter = new TestSetFilter();

    for (int i = 0; i < argv.length; i++) {
      if (argv[i].equals("-v")) {
        filter.setVerbose(true);
        continue;
      }
      else if (argv[i].equals("-p")) {
        filter.setParallel(true);
        continue;
      }
      else if (argv[i].equals("-f")) {
        filter.setFast(true);
        continue;
      }
      else if (argv[i].equals("-n")) {
        filter.setRuleLength(Integer.parseInt(argv[i+1]));
        i++;
        continue;
      }
      filter.getTestSentences(argv[i]);
    }

    Scanner scanner = new Scanner(System.in, "UTF-8");
    int rulesIn = 0;
    int rulesOut = 0;
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Comment
For statement
If statement
Method declaration
Method invocation
Method signature
Variable
Chunk
Conflicting content
    int rulesOut = 0;
    if (filter.verbose) {
      System.err.println("Processing rules...");
<<<<<<< HEAD
      if (filter.fast) System.err.println("Using fast version...");
=======
      if (filter.fast)
        System.err.println("Using fast version...");
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
      System.err.println("Using at max " + filter.RULE_LENGTH + " n-grams...");
    }
    while (scanner.hasNextLine()) {
Solution content
    int rulesOut = 0;
    if (filter.verbose) {
      System.err.println("Processing rules...");
      if (filter.fast)
        System.err.println("Using fast version...");
      System.err.println("Using at max " + filter.RULE_LENGTH + " n-grams...");
    }
    while (scanner.hasNextLine()) {
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
If statement
Chunk
Conflicting content
        if ((rulesIn + 1) % 2000 == 0) {
    }
    while (scanner.hasNextLine()) {
      if (filter.verbose) {
<<<<<<< HEAD
          System.err.print(".");
          System.err.flush();
        }
        if ((rulesIn + 1) % 100000 == 0) {
          System.err.println(" [" + (rulesIn + 1) + "]");
          System.err.flush();
        }
=======
        if ((rulesIn+1) % 2000 == 0) {
          System.err.print(".");
          System.err.flush();
        }
        if ((rulesIn+1) % 100000 == 0) {
          System.err.println(" [" + (rulesIn+1) + "]");
          System.err.flush();
        }
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
      }
      rulesIn++;
      String rule = scanner.nextLine();
Solution content
    }
    while (scanner.hasNextLine()) {
      if (filter.verbose) {
        if ((rulesIn+1) % 2000 == 0) {
          System.err.print(".");
          System.err.flush();
        }
        if ((rulesIn+1) % 100000 == 0) {
          System.err.println(" [" + (rulesIn+1) + "]");
          System.err.flush();
        }
      }
      rulesIn++;
      String rule = scanner.nextLine();
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
If statement
Chunk
Conflicting content
      if (filter.inTestSet(rule)) {
        System.out.println(rule);
<<<<<<< HEAD
        if (filter.parallel) System.out.flush();
        rulesOut++;
      } else if (filter.parallel) {
=======
        if (filter.parallel)
          System.out.flush();
        rulesOut++;
      }
      else if (filter.parallel) {
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
        System.out.println("");
        System.out.flush();
      }
Solution content
      if (filter.inTestSet(rule)) {
        System.out.println(rule);
        if (filter.parallel)
          System.out.flush();
        rulesOut++;
      }
      else if (filter.parallel) {
        System.out.println("");
        System.out.flush();
      }
File
TestSetFilter.java
Developer's decision
Version 1
Kind of conflict
If statement
Variable
Chunk
Conflicting content
        System.out.flush();
      }
    }
<<<<<<< HEAD
    scanner.close();

=======
>>>>>>> 3d2266edffffdfa382240637c3c5b56e011d3b6a
    if (filter.verbose) {
      System.err.println("[INFO] Total rules read: " + rulesIn);
      System.err.println("[INFO] Rules kept: " + rulesOut);
Solution content
        System.out.flush();
      }
    }
    if (filter.verbose) {
      System.err.println("[INFO] Total rules read: " + rulesIn);
      System.err.println("[INFO] Rules kept: " + rulesOut);
File
TestSetFilter.java
Developer's decision
Version 2
Kind of conflict
Method invocation