Projects >> thrax >>65c2d620f58c195a9bdf217589ae570ea9e09aac

Chunk
Conflicting content
import java.io.IOException;

<<<<<<< HEAD
import edu.jhu.thrax.datatypes.*;
import edu.jhu.thrax.util.exceptions.*;
import edu.jhu.thrax.util.Vocabulary;
import edu.jhu.thrax.util.ConfFileParser;
import edu.jhu.thrax.util.io.InputUtilities;
import edu.jhu.thrax.ThraxConfig;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * This class extracts Hiero-style SCFG rules. The inputs that are needed
 * are "source" "target" and "alignment", which are the source and target
 * sides of a parallel corpus, and an alignment between each of the sentences.
 */
public class HierarchicalRuleExtractor implements RuleExtractor {

    public int INIT_LENGTH_LIMIT = 10;
    public int NONLEX_SOURCE_LENGTH_LIMIT = 5;
    public int NONLEX_SOURCE_WORD_LIMIT = 5;
    public int NONLEX_TARGET_LENGTH_LIMIT = 5;
    public int NONLEX_TARGET_WORD_LIMIT = 5;
    public int NT_LIMIT = 2;
    public int LEXICAL_MINIMUM = 1;
    public boolean ALLOW_ADJACENT_NTS = false;
    public boolean ALLOW_LOOSE_BOUNDS = false;
    public boolean ALLOW_FULL_SENTENCE_RULES = true;
    public boolean ALLOW_ABSTRACT = false;
    public boolean ALLOW_X_NONLEX = false;
    public int RULE_SPAN_LIMIT = 12;
    public int LEX_TARGET_LENGTH_LIMIT = 12;
    public int LEX_SOURCE_LENGTH_LIMIT = 12;

    public boolean SOURCE_IS_PARSED = false;
    public boolean TARGET_IS_PARSED = false;
    public boolean REVERSE = false;

    private SpanLabeler labeler;
    private Collection defaultLabel;

	private Mapper.Context context;

    /**
     * Default constructor. The grammar parameters are initalized according
     * to how they are set in the thrax config file.
     */
    public HierarchicalRuleExtractor(Mapper.Context mapContext, Configuration conf, SpanLabeler labeler)
    {
		this.context = mapContext;
        this.labeler = labeler;
        INIT_LENGTH_LIMIT = conf.getInt("thrax.initial-phrase-length", 10);
        NONLEX_SOURCE_LENGTH_LIMIT = conf.getInt("thrax.nonlex-source-length", 5);
        NONLEX_SOURCE_WORD_LIMIT = conf.getInt("thrax.nonlex-source-words", 5);
        NONLEX_TARGET_LENGTH_LIMIT = conf.getInt("thrax.nonlex-target-length", 5);
        NONLEX_TARGET_WORD_LIMIT = conf.getInt("thrax.nonlex-target-words", 5);
        NT_LIMIT = conf.getInt("thrax.arity", 2);
        LEXICAL_MINIMUM = conf.getInt("thrax.lexicality", 1);
        ALLOW_ADJACENT_NTS = conf.getBoolean("thrax.adjacent-nts", false);
        ALLOW_LOOSE_BOUNDS = conf.getBoolean("thrax.loose", false);
        ALLOW_FULL_SENTENCE_RULES = conf.getBoolean("thrax.allow-full-sentence-rules", true);
        ALLOW_ABSTRACT = conf.getBoolean("thrax.allow-abstract-rules", false);
        ALLOW_X_NONLEX = conf.getBoolean("thrax.allow-nonlexical-x", false);
        RULE_SPAN_LIMIT = conf.getInt("thrax.rule-span-limit", 12);
        LEX_TARGET_LENGTH_LIMIT = conf.getInt("thrax.lex-target-words", 12);
        LEX_SOURCE_LENGTH_LIMIT = conf.getInt("thrax.lex-source-words", 12);
        SOURCE_IS_PARSED = conf.getBoolean("thrax.source-is-parsed", false);
        TARGET_IS_PARSED = conf.getBoolean("thrax.target-is-parsed", false);
        // a backwards-compatibility hack for matt
        if (conf.get("thrax.english-is-parsed") != null)
            TARGET_IS_PARSED = conf.getBoolean("thrax.english-is-parsed", false);
        int defaultID = Vocabulary.getId(conf.get("thrax.default-nt", "X"));
        REVERSE = conf.getBoolean("thrax.reverse", false);
        defaultLabel = new HashSet();
        defaultLabel.add(defaultID);
    }

    public List extract(String inp) throws MalformedInputException
    {
        String [] inputs = inp.split(ThraxConfig.DELIMITER_REGEX);
        if (inputs.length < 3) {
            throw new NotEnoughFieldsException();
        }
        String [] sourceWords = InputUtilities.getWords(inputs[0], SOURCE_IS_PARSED);
        String [] targetWords = InputUtilities.getWords(inputs[1], TARGET_IS_PARSED);
        if (sourceWords.length == 0 || targetWords.length == 0)
            throw new EmptySentenceException();

        int [] source = Vocabulary.getIds(sourceWords);
        int [] target = Vocabulary.getIds(targetWords);
        if (REVERSE) {
            int [] tmp = source;
            source = target;
            target = tmp;
        }

        Alignment alignment = new Alignment(inputs[2], REVERSE);
        if (alignment.isEmpty())
            throw new EmptyAlignmentException();
        if (!alignment.consistent(source.length, target.length)) {
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("source: %s (length %d)\n", inputs[0], source.length));
            sb.append(String.format("target: %s (length %d)\n", inputs[1], target.length));
            sb.append("alignment: " + inputs[2]);
            throw new InconsistentAlignmentException(sb.toString());
        }

        PhrasePair [][] phrasesByStart = initialPhrasePairs(source, target, alignment);
        labeler.setInput(inp);

        Queue q = new LinkedList();
        for (int i = 0; i < source.length; i++)
            q.offer(new Rule(source, target, alignment, i, NT_LIMIT));
=======
public class HierarchicalRuleExtractor
{
	private int arityLimit = 2;
	private int initialPhraseSourceLimit = 10;
	private int initialPhraseTargetLimit = 10;
	private boolean requireMinimalPhrases = true;
	private int minimumInitialAlignmentPoints = 1;
	private boolean allowAdjacent = false;
	private int sourceSymbolLimit = 5;
	private int targetSymbolLimit = 1000;
	private int minimumRuleAlignmentPoints = 1;
	private boolean allowAbstract = false;

	public HierarchicalRuleExtractor()
	{
		// just use the defaults!
	}
>>>>>>> e00e5499ed868ec9ad4c03b7c320143811e1eda2

	public HierarchicalRuleExtractor(int arity,
									 int initialPhraseSource,
Solution content
import java.io.IOException;

public class HierarchicalRuleExtractor
{
	private int arityLimit = 2;
	private int initialPhraseSourceLimit = 10;
	private int initialPhraseTargetLimit = 10;
	private boolean requireMinimalPhrases = true;
	private int minimumInitialAlignmentPoints = 1;
	private boolean allowAdjacent = false;
	private int sourceSymbolLimit = 5;
	private int targetSymbolLimit = 1000;
	private int minimumRuleAlignmentPoints = 1;
	private boolean allowAbstract = false;

	public HierarchicalRuleExtractor()
	{
		// just use the defaults!
	}

	public HierarchicalRuleExtractor(int arity,
									 int initialPhraseSource,
File
HierarchicalRuleExtractor.java
Developer's decision
Version 2
Kind of conflict
Attribute
Class signature
Comment
For statement
If statement
Import
Method declaration
Method invocation
Method signature
Variable
Chunk
Conflicting content
<<<<<<< HEAD
		allowAbstract = allow_abstract;
	}
    protected List processQueue(Queue q, PhrasePair [][] phrasesByStart)
    {
        List rules = new ArrayList();
        while (q.peek() != null) {
            Rule r = q.poll();
			context.progress();

	    for (Rule t : getAlignmentVariants(r)) {
                if (isWellFormed(t)) {
			for (Rule s : getLabelVariants(t)) {
				context.progress();
			    rules.add(s);
=======
	public List extract(int sourceLength, int targetLength, Alignment alignment)
	{
		List initialPhrasePairs = initialPhrasePairs(sourceLength, targetLength, alignment);

		HierarchicalRule [][] rulesByArity = new HierarchicalRule[arityLimit+1][];
		// we have one arity-0 rule for each initial phrase pair
		rulesByArity[0] = new HierarchicalRule[initialPhrasePairs.size()];
		for (int i = 0; i < initialPhrasePairs.size(); i++)
			rulesByArity[0][i] = new HierarchicalRule(initialPhrasePairs.get(i));
		// rules of arity j can be created from rules of arity j - 1 using the
		// initial phrase pairs
		for (int j = 1; j <= arityLimit; j++) {
			rulesByArity[j] = addNonterminalsTo(rulesByArity[j-1], initialPhrasePairs);
		}
		return removeIfNotValid(rulesByArity, alignment);
	}

	private List initialPhrasePairs(int sourceLength, int targetLength, Alignment a)
	{
		List result = new ArrayList();
		for (int i = 0; i < sourceLength; i++) {
			for (int x = 1; x <= initialPhraseSourceLimit; x++) {
				if (i + x > sourceLength)
					break;
				for (int j = 0; j < targetLength; j++) {
					for (int y = 1; y <= initialPhraseTargetLimit; y++) {
						if (j + y > targetLength)
							break;
						PhrasePair pp = new PhrasePair(i, i + x, j, j + y);
						if (pp.isInitialPhrasePair(a, !requireMinimalPhrases, minimumInitialAlignmentPoints)) {
							result.add(pp);
						}
					}
				}
>>>>>>> e00e5499ed868ec9ad4c03b7c320143811e1eda2
			}
		}
		return result;
Solution content
		allowAbstract = allow_abstract;
	}

	public List extract(int sourceLength, int targetLength, Alignment alignment)
	{
		List initialPhrasePairs = initialPhrasePairs(sourceLength, targetLength, alignment);

		HierarchicalRule [][] rulesByArity = new HierarchicalRule[arityLimit+1][];
		// we have one arity-0 rule for each initial phrase pair
		rulesByArity[0] = new HierarchicalRule[initialPhrasePairs.size()];
		for (int i = 0; i < initialPhrasePairs.size(); i++)
			rulesByArity[0][i] = new HierarchicalRule(initialPhrasePairs.get(i));
		// rules of arity j can be created from rules of arity j - 1 using the
		// initial phrase pairs
		for (int j = 1; j <= arityLimit; j++) {
			rulesByArity[j] = addNonterminalsTo(rulesByArity[j-1], initialPhrasePairs);
		}
		return removeIfNotValid(rulesByArity, alignment);
	}

	private List initialPhrasePairs(int sourceLength, int targetLength, Alignment a)
	{
		List result = new ArrayList();
		for (int i = 0; i < sourceLength; i++) {
			for (int x = 1; x <= initialPhraseSourceLimit; x++) {
				if (i + x > sourceLength)
					break;
				for (int j = 0; j < targetLength; j++) {
					for (int y = 1; y <= initialPhraseTargetLimit; y++) {
						if (j + y > targetLength)
							break;
						PhrasePair pp = new PhrasePair(i, i + x, j, j + y);
						if (pp.isInitialPhrasePair(a, !requireMinimalPhrases, minimumInitialAlignmentPoints)) {
							result.add(pp);
						}
					}
				}
			}
		}
		return result;
File
HierarchicalRuleExtractor.java
Developer's decision
Version 2
Kind of conflict
For statement
If statement
Method declaration
Method invocation
Method signature
Variable
While statement
Chunk
Conflicting content
		return;
	}

<<<<<<< HEAD
    public static void main(String [] argv) throws IOException,MalformedInputException,ConfigurationException
    {
        if (argv.length < 1) {
            System.err.println("usage: HierarchicalRuleExtractor ");
            return;
        }
        Configuration conf = new Configuration();
        Map options = ConfFileParser.parse(argv[0]);
        for (String opt : options.keySet())
            conf.set("thrax." + opt, options.get(opt));
        Scanner scanner = new Scanner(System.in);
        RuleExtractor extractor = RuleExtractorFactory.create(null, conf);
        while (scanner.hasNextLine()) {
            String line = scanner.nextLine();
            for (Rule r : extractor.extract(line))
                System.out.println(r);
        }
        return;
    }
=======
>>>>>>> e00e5499ed868ec9ad4c03b7c320143811e1eda2
}
Solution content
		return;
	}

}
File
HierarchicalRuleExtractor.java
Developer's decision
Version 2
Kind of conflict
Method declaration
Chunk
Conflicting content
    protected void setup(Context context) throws IOException, InterruptedException
    {
<<<<<<< HEAD
        Configuration conf = context.getConfiguration();
        try {
            extractor = RuleExtractorFactory.create(context, conf);
        }
        catch (ConfigurationException ex) {
            System.err.println(ex.getMessage());
        }
=======
		extractor = RuleWritableExtractorFactory.create(context);
		if (extractor == null) {
			System.err.println("WARNING: could not create rule extractor as configured!");
		}
>>>>>>> e00e5499ed868ec9ad4c03b7c320143811e1eda2
    }

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
Solution content
    protected void setup(Context context) throws IOException, InterruptedException
    {
		extractor = RuleWritableExtractorFactory.create(context);
		if (extractor == null) {
			System.err.println("WARNING: could not create rule extractor as configured!");
		}
    }

    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
File
ExtractionMapper.java
Developer's decision
Version 2
Kind of conflict
Attribute
If statement
Method invocation
Try statement
Variable